[RFC,v2,06/11] KVM: x86: Advertise ARCH_CAP_VIRTUAL_ENUM support
Commit Message
From: Zhang Chen <chen.zhang@intel.com>
Bit 63 of IA32_ARCH_CAPABILITIES MSR indicates availablility of the
VIRTUAL_ENUMERATION_MSR (index 0x50000000) that enumerates features like
e.g., mitigation enumeration which is used for guest to hint VMMs the
software mitigations in use.
Advertise ARCH_CAP_VIRTUAL_ENUM support for VMX and emulate read/write
of the VIRTUAL_ENUMERATION_MSR. Now VIRTUAL_ENUMERATION_MSR is always 0.
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
Co-developed-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
Tested-by: Jiaan Lu <jiaan.lu@intel.com>
---
arch/x86/kvm/svm/svm.c | 1 +
arch/x86/kvm/vmx/vmx.c | 19 +++++++++++++++++++
arch/x86/kvm/vmx/vmx.h | 1 +
arch/x86/kvm/x86.c | 16 +++++++++++++++-
4 files changed, 36 insertions(+), 1 deletion(-)
Comments
On 4/14/2023 2:25 PM, Chao Gao wrote:
> From: Zhang Chen <chen.zhang@intel.com>
>
> Bit 63 of IA32_ARCH_CAPABILITIES MSR indicates availablility of the
> VIRTUAL_ENUMERATION_MSR (index 0x50000000) that enumerates features like
> e.g., mitigation enumeration which is used for guest to hint VMMs the
> software mitigations in use.
>
> Advertise ARCH_CAP_VIRTUAL_ENUM support for VMX and emulate read/write
> of the VIRTUAL_ENUMERATION_MSR. Now VIRTUAL_ENUMERATION_MSR is always 0.
>
> Signed-off-by: Zhang Chen <chen.zhang@intel.com>
> Co-developed-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Tested-by: Jiaan Lu <jiaan.lu@intel.com>
> ---
> arch/x86/kvm/svm/svm.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 19 +++++++++++++++++++
> arch/x86/kvm/vmx/vmx.h | 1 +
> arch/x86/kvm/x86.c | 16 +++++++++++++++-
> 4 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 57f241c5a371..195d0cf9309a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4093,6 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
> {
> switch (index) {
> case MSR_IA32_MCG_EXT_CTL:
> + case MSR_VIRTUAL_ENUMERATION:
> case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
> return false;
> case MSR_IA32_SMBASE:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9f6919bec2b3..85419137decb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1943,6 +1943,8 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
> return !(msr->data & ~valid_bits);
> }
>
> +#define VIRTUAL_ENUMERATION_VALID_BITS 0ULL
> +
> static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
> {
> switch (msr->index) {
> @@ -1950,6 +1952,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
> if (!nested)
> return 1;
> return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
> + case MSR_VIRTUAL_ENUMERATION:
> + msr->data = VIRTUAL_ENUMERATION_VALID_BITS;
> + return 0;
> default:
> return KVM_MSR_RET_INVALID;
> }
> @@ -2096,6 +2101,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_IA32_DEBUGCTLMSR:
> msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> break;
> + case MSR_VIRTUAL_ENUMERATION:
> + if (!msr_info->host_initiated &&
> + !(vcpu->arch.arch_capabilities & ARCH_CAP_VIRTUAL_ENUM))
> + return 1;
> + msr_info->data = vmx->msr_virtual_enumeration;
> + break;
> default:
> find_uret_msr:
> msr = vmx_find_uret_msr(vmx, msr_info->index);
> @@ -2437,6 +2448,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> }
> ret = kvm_set_msr_common(vcpu, msr_info);
> break;
> + case MSR_VIRTUAL_ENUMERATION:
> + if (!msr_info->host_initiated)
> + return 1;
> + if (data & ~VIRTUAL_ENUMERATION_VALID_BITS)
> + return 1;
> +
> + vmx->msr_virtual_enumeration = data;
> + break;
>
> default:
> find_uret_msr:
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 021d86b52e18..a7faaf9fdc26 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -292,6 +292,7 @@ struct vcpu_vmx {
>
> u64 spec_ctrl;
> u64 guest_spec_ctrl;
> + u64 msr_virtual_enumeration;
> u32 msr_ia32_umwait_control;
>
> /*
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3c58dbae7b4c..a1bc52bebdcc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1537,6 +1537,7 @@ static const u32 emulated_msrs_all[] = {
>
> MSR_K7_HWCR,
> MSR_KVM_POLL_CONTROL,
> + MSR_VIRTUAL_ENUMERATION,
> };
>
> static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
> @@ -1570,6 +1571,7 @@ static const u32 msr_based_features_all[] = {
> MSR_IA32_UCODE_REV,
> MSR_IA32_ARCH_CAPABILITIES,
> MSR_IA32_PERF_CAPABILITIES,
> + MSR_VIRTUAL_ENUMERATION,
> };
>
> static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
> @@ -1591,7 +1593,8 @@ static unsigned int num_msr_based_features;
> ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
> ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
> ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
> - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
> + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | \
> + ARCH_CAP_VIRTUAL_ENUM)
We cannot do it.
Otherwise, an AMD L1 with X86_FEATURE_ARCH_CAPABILITIES configured is
possible to expose MSR_VIRTUAL_ENUMERATION to L2 while no support for it.
>
> static u64 kvm_get_arch_capabilities(void)
> {
> @@ -1610,6 +1613,17 @@ static u64 kvm_get_arch_capabilities(void)
> */
> data |= ARCH_CAP_PSCHANGE_MC_NO;
>
> + /*
> + * Virtual enumeration is a paravirt feature. The only usage for now
> + * is to bridge the gap caused by microarchitecture changes between
> + * different Intel processors. And its usage is linked to "virtualize
> + * IA32_SPEC_CTRL" which is a VMX feature. Whether AMD SVM can benefit
> + * from the same usage and how to implement it is still unclear. Limit
> + * virtual enumeration to VMX.
> + */
> + if (static_call(kvm_x86_has_emulated_msr)(NULL, MSR_VIRTUAL_ENUMERATION))
> + data |= ARCH_CAP_VIRTUAL_ENUM;
> +
> /*
> * If we're doing cache flushes (either "always" or "cond")
> * we will do one whenever the guest does a vmlaunch/vmresume.
On Thu, May 18, 2023 at 06:14:40PM +0800, Xiaoyao Li wrote:
>> static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
>> @@ -1591,7 +1593,8 @@ static unsigned int num_msr_based_features;
>> ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
>> ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
>> ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
>> - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
>> + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | \
>> + ARCH_CAP_VIRTUAL_ENUM)
>
>We cannot do it.
>
>Otherwise, an AMD L1 with X86_FEATURE_ARCH_CAPABILITIES configured is
>possible to expose MSR_VIRTUAL_ENUMERATION to L2 while no support for it.
How does AMD L1 see the ARCH_CAP_VIRTUAL_ENUM feature in the first
place? because ...
>
>> static u64 kvm_get_arch_capabilities(void)
>> {
>> @@ -1610,6 +1613,17 @@ static u64 kvm_get_arch_capabilities(void)
>> */
>> data |= ARCH_CAP_PSCHANGE_MC_NO;
>> + /*
>> + * Virtual enumeration is a paravirt feature. The only usage for now
>> + * is to bridge the gap caused by microarchitecture changes between
>> + * different Intel processors. And its usage is linked to "virtualize
>> + * IA32_SPEC_CTRL" which is a VMX feature. Whether AMD SVM can benefit
>> + * from the same usage and how to implement it is still unclear. Limit
>> + * virtual enumeration to VMX.
>> + */
>> + if (static_call(kvm_x86_has_emulated_msr)(NULL, MSR_VIRTUAL_ENUMERATION))
>> + data |= ARCH_CAP_VIRTUAL_ENUM;
the feature is exposed on Intel CPUs only.
Do you mean AMD L1 created on Intel L0? and Intel L0 even emulates
nested (SVM) support for the L1? This sounds a very contrived case.
>> +
>> /*
>> * If we're doing cache flushes (either "always" or "cond")
>> * we will do one whenever the guest does a vmlaunch/vmresume.
>
On 5/19/2023 5:57 PM, Chao Gao wrote:
> On Thu, May 18, 2023 at 06:14:40PM +0800, Xiaoyao Li wrote:
>>> static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
>>> @@ -1591,7 +1593,8 @@ static unsigned int num_msr_based_features;
>>> ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
>>> ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
>>> ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
>>> - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
>>> + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | \
>>> + ARCH_CAP_VIRTUAL_ENUM)
>>
>> We cannot do it.
>>
>> Otherwise, an AMD L1 with X86_FEATURE_ARCH_CAPABILITIES configured is
>> possible to expose MSR_VIRTUAL_ENUMERATION to L2 while no support for it.
>
> How does AMD L1 see the ARCH_CAP_VIRTUAL_ENUM feature in the first
> place? because ...
>
>>
>>> static u64 kvm_get_arch_capabilities(void)
>>> {
>>> @@ -1610,6 +1613,17 @@ static u64 kvm_get_arch_capabilities(void)
>>> */
>>> data |= ARCH_CAP_PSCHANGE_MC_NO;
>>> + /*
>>> + * Virtual enumeration is a paravirt feature. The only usage for now
>>> + * is to bridge the gap caused by microarchitecture changes between
>>> + * different Intel processors. And its usage is linked to "virtualize
>>> + * IA32_SPEC_CTRL" which is a VMX feature. Whether AMD SVM can benefit
>>> + * from the same usage and how to implement it is still unclear. Limit
>>> + * virtual enumeration to VMX.
>>> + */
>>> + if (static_call(kvm_x86_has_emulated_msr)(NULL, MSR_VIRTUAL_ENUMERATION))
>>> + data |= ARCH_CAP_VIRTUAL_ENUM;
>
> the feature is exposed on Intel CPUs only.
>
> Do you mean AMD L1 created on Intel L0? and Intel L0 even emulates
> nested (SVM) support for the L1? This sounds a very contrived case.
you are right. I was thinking of an rare case but ignored the fact that
VMX doesn't nested svm.
Sorry for it.
>>> +
>>> /*
>>> * If we're doing cache flushes (either "always" or "cond")
>>> * we will do one whenever the guest does a vmlaunch/vmresume.
>>
@@ -4093,6 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
+ case MSR_VIRTUAL_ENUMERATION:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
case MSR_IA32_SMBASE:
@@ -1943,6 +1943,8 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
return !(msr->data & ~valid_bits);
}
+#define VIRTUAL_ENUMERATION_VALID_BITS 0ULL
+
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
@@ -1950,6 +1952,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
if (!nested)
return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+ case MSR_VIRTUAL_ENUMERATION:
+ msr->data = VIRTUAL_ENUMERATION_VALID_BITS;
+ return 0;
default:
return KVM_MSR_RET_INVALID;
}
@@ -2096,6 +2101,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
break;
+ case MSR_VIRTUAL_ENUMERATION:
+ if (!msr_info->host_initiated &&
+ !(vcpu->arch.arch_capabilities & ARCH_CAP_VIRTUAL_ENUM))
+ return 1;
+ msr_info->data = vmx->msr_virtual_enumeration;
+ break;
default:
find_uret_msr:
msr = vmx_find_uret_msr(vmx, msr_info->index);
@@ -2437,6 +2448,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
ret = kvm_set_msr_common(vcpu, msr_info);
break;
+ case MSR_VIRTUAL_ENUMERATION:
+ if (!msr_info->host_initiated)
+ return 1;
+ if (data & ~VIRTUAL_ENUMERATION_VALID_BITS)
+ return 1;
+
+ vmx->msr_virtual_enumeration = data;
+ break;
default:
find_uret_msr:
@@ -292,6 +292,7 @@ struct vcpu_vmx {
u64 spec_ctrl;
u64 guest_spec_ctrl;
+ u64 msr_virtual_enumeration;
u32 msr_ia32_umwait_control;
/*
@@ -1537,6 +1537,7 @@ static const u32 emulated_msrs_all[] = {
MSR_K7_HWCR,
MSR_KVM_POLL_CONTROL,
+ MSR_VIRTUAL_ENUMERATION,
};
static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
@@ -1570,6 +1571,7 @@ static const u32 msr_based_features_all[] = {
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
+ MSR_VIRTUAL_ENUMERATION,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
@@ -1591,7 +1593,8 @@ static unsigned int num_msr_based_features;
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
- ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | \
+ ARCH_CAP_VIRTUAL_ENUM)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1610,6 +1613,17 @@ static u64 kvm_get_arch_capabilities(void)
*/
data |= ARCH_CAP_PSCHANGE_MC_NO;
+ /*
+ * Virtual enumeration is a paravirt feature. The only usage for now
+ * is to bridge the gap caused by microarchitecture changes between
+ * different Intel processors. And its usage is linked to "virtualize
+ * IA32_SPEC_CTRL" which is a VMX feature. Whether AMD SVM can benefit
+ * from the same usage and how to implement it is still unclear. Limit
+ * virtual enumeration to VMX.
+ */
+ if (static_call(kvm_x86_has_emulated_msr)(NULL, MSR_VIRTUAL_ENUMERATION))
+ data |= ARCH_CAP_VIRTUAL_ENUM;
+
/*
* If we're doing cache flushes (either "always" or "cond")
* we will do one whenever the guest does a vmlaunch/vmresume.