[v3,20/21] KVM:x86: Enable kernel IBT support for guest
Commit Message
Enable MSR_IA32_S_CET access for guest kernel IBT.
Mainline Linux kernel now supports supervisor IBT for kernel code,
to make s-IBT work in guest(nested guest), pass through MSR_IA32_S_CET
to guest(nested guest) if host kernel and KVM enabled IBT.
Note, s-IBT can work independent to host xsaves support because guest
MSR_IA32_S_CET is {stored|loaded} from VMCS GUEST_S_CET field.
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
arch/x86/kvm/vmx/nested.c | 3 +++
arch/x86/kvm/vmx/vmx.c | 39 ++++++++++++++++++++++++++++++++++-----
arch/x86/kvm/x86.c | 7 ++++++-
3 files changed, 43 insertions(+), 6 deletions(-)
Comments
On Thu, May 11, 2023, Yang Weijiang wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index a2494156902d..1d0151f9e575 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
> return true;
> case MSR_IA32_U_CET:
> case MSR_IA32_PL3_SSP:
> + case MSR_IA32_S_CET:
> return true;
> }
>
> @@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> break;
> case MSR_IA32_U_CET:
> + case MSR_IA32_S_CET:
> case MSR_IA32_PL3_SSP:
> case MSR_KVM_GUEST_SSP:
> if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
> return 1;
> - if (msr_info->index == MSR_KVM_GUEST_SSP)
> + if (msr_info->index == MSR_KVM_GUEST_SSP) {
Unnecessary curly braces.
> msr_info->data = vmcs_readl(GUEST_SSP);
> - else
> + } else if (msr_info->index == MSR_IA32_S_CET) {
> + msr_info->data = vmcs_readl(GUEST_S_CET);
> + } else {
> kvm_get_xsave_msr(msr_info);
> + }
> break;
> case MSR_IA32_DEBUGCTLMSR:
> msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> @@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> vmx->pt_desc.guest.addr_a[index / 2] = data;
> break;
> case MSR_IA32_U_CET:
> + case MSR_IA32_S_CET:
> case MSR_IA32_PL3_SSP:
> case MSR_KVM_GUEST_SSP:
> if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
> @@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> if ((msr_index == MSR_IA32_PL3_SSP ||
> msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
> return 1;
> - if (msr_index == MSR_KVM_GUEST_SSP)
> + if (msr_index == MSR_KVM_GUEST_SSP) {
> vmcs_writel(GUEST_SSP, data);
> - else
> + } else if (msr_index == MSR_IA32_S_CET) {
> + vmcs_writel(GUEST_S_CET, data);
> + } else {
Same here.
> kvm_set_xsave_msr(msr_info);
> + }
> break;
> case MSR_IA32_PERF_CAPABILITIES:
> if (data && !vcpu_to_pmu(vcpu)->version)
> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> kvm_wait_lapic_expire(vcpu);
>
> + /*
> + * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
> + * No need to save the other two vmcs fields as supervisor SHSTK
> + * are not enabled on Intel platform now.
> + */
> + if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
> + (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
> + u64 msr;
> +
> + rdmsrl(MSR_IA32_S_CET, msr);
Reading the MSR on every VM-Enter can't possibly be necessary. At the absolute
minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
from NMI context, KVM is hosed. And *if* S_CET isn't static post-boot, this can
be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
context.
But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
i.e. is static post boot, which means it can be read once at KVM load time, e.g.
just like host_efer.
The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
vCPU across a BIOS call then we've got bigger issues.
> + vmcs_writel(HOST_S_CET, msr);
> + }
> +
> /* The actual VMENTER/EXIT is in the .noinstr.text section. */
> vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>
> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>
> incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
> vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
> +
> + /*
> + * If IBT is available to guest, then passthrough S_CET MSR too since
> + * kernel IBT is already in mainline kernel tree.
> + */
> + incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> + vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
> }
>
> static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> /* Refresh #PF interception to account for MAXPHYADDR changes. */
> vmx_update_exception_bitmap(vcpu);
>
> - if (kvm_cet_user_supported())
> + if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
Yeah, kvm_cet_user_supported() simply looks wrong.
On 6/24/2023 8:03 AM, Sean Christopherson wrote:
> On Thu, May 11, 2023, Yang Weijiang wrote:
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index a2494156902d..1d0151f9e575 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
>> return true;
>> case MSR_IA32_U_CET:
>> case MSR_IA32_PL3_SSP:
>> + case MSR_IA32_S_CET:
>> return true;
>> }
>>
>> @@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>> break;
>> case MSR_IA32_U_CET:
>> + case MSR_IA32_S_CET:
>> case MSR_IA32_PL3_SSP:
>> case MSR_KVM_GUEST_SSP:
>> if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
>> return 1;
>> - if (msr_info->index == MSR_KVM_GUEST_SSP)
>> + if (msr_info->index == MSR_KVM_GUEST_SSP) {
> Unnecessary curly braces.
Something in my mind must be wrong :-), will remove them.
>
>> msr_info->data = vmcs_readl(GUEST_SSP);
>> - else
>> + } else if (msr_info->index == MSR_IA32_S_CET) {
>> + msr_info->data = vmcs_readl(GUEST_S_CET);
>> + } else {
>> kvm_get_xsave_msr(msr_info);
>> + }
>> break;
>> case MSR_IA32_DEBUGCTLMSR:
>> msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>> @@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> vmx->pt_desc.guest.addr_a[index / 2] = data;
>> break;
>> case MSR_IA32_U_CET:
>> + case MSR_IA32_S_CET:
>> case MSR_IA32_PL3_SSP:
>> case MSR_KVM_GUEST_SSP:
>> if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
>> @@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> if ((msr_index == MSR_IA32_PL3_SSP ||
>> msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
>> return 1;
>> - if (msr_index == MSR_KVM_GUEST_SSP)
>> + if (msr_index == MSR_KVM_GUEST_SSP) {
>> vmcs_writel(GUEST_SSP, data);
>> - else
>> + } else if (msr_index == MSR_IA32_S_CET) {
>> + vmcs_writel(GUEST_S_CET, data);
>> + } else {
> Same here.
>
>> kvm_set_xsave_msr(msr_info);
>> + }
>> break;
>> case MSR_IA32_PERF_CAPABILITIES:
>> if (data && !vcpu_to_pmu(vcpu)->version)
>> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>
>> kvm_wait_lapic_expire(vcpu);
>>
>> + /*
>> + * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
>> + * No need to save the other two vmcs fields as supervisor SHSTK
>> + * are not enabled on Intel platform now.
>> + */
>> + if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
>> + (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
>> + u64 msr;
>> +
>> + rdmsrl(MSR_IA32_S_CET, msr);
> Reading the MSR on every VM-Enter can't possibly be necessary. At the absolute
> minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
> from NMI context, KVM is hosed. And *if* S_CET isn't static post-boot, this can
> be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
> context.
Agree with you.
>
> But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
> i.e. is static post boot, which means it can be read once at KVM load time, e.g.
> just like host_efer.
I think handling S_CET like host_efer from usage perspective is possible
given currently only
kernel IBT is enabled in kernel, I'll remove the code and initialize the
vmcs field once like host_efer.
>
> The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
> vCPU across a BIOS call then we've got bigger issues.
What's the problem you're referring to?
>
>> + vmcs_writel(HOST_S_CET, msr);
>> + }
>> +
>> /* The actual VMENTER/EXIT is in the .noinstr.text section. */
>> vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>>
>> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>>
>> incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
>> vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
>> +
>> + /*
>> + * If IBT is available to guest, then passthrough S_CET MSR too since
>> + * kernel IBT is already in mainline kernel tree.
>> + */
>> + incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
>> + vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>> }
>>
>> static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>> /* Refresh #PF interception to account for MAXPHYADDR changes. */
>> vmx_update_exception_bitmap(vcpu);
>>
>> - if (kvm_cet_user_supported())
>> + if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
> Yeah, kvm_cet_user_supported() simply looks wrong.
These are preconditions to set up CET MSRs for guest, in
vmx_update_intercept_for_cet_msr(),
the actual MSR control is based on guest_cpuid_has() results.
On Mon, Jun 26, 2023, Weijiang Yang wrote:
>
> On 6/24/2023 8:03 AM, Sean Christopherson wrote:
> > > @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > > kvm_wait_lapic_expire(vcpu);
> > > + /*
> > > + * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
> > > + * No need to save the other two vmcs fields as supervisor SHSTK
> > > + * are not enabled on Intel platform now.
> > > + */
> > > + if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
> > > + (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
> > > + u64 msr;
> > > +
> > > + rdmsrl(MSR_IA32_S_CET, msr);
> > Reading the MSR on every VM-Enter can't possibly be necessary. At the absolute
> > minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
> > from NMI context, KVM is hosed. And *if* S_CET isn't static post-boot, this can
> > be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
> > context.
>
> Agree with you.
>
> >
> > But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
> > i.e. is static post boot, which means it can be read once at KVM load time, e.g.
> > just like host_efer.
>
> I think handling S_CET like host_efer from usage perspective is possible
> given currently only
>
> kernel IBT is enabled in kernel, I'll remove the code and initialize the
> vmcs field once like host_efer.
>
> >
> > The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
> > vCPU across a BIOS call then we've got bigger issues.
>
> What's the problem you're referring to?
I was pointing out that S_CET isn't strictly constant, as it's saved/modified/restored
by ibt_save() + ibt_restore(). But KVM should never run between those paired
functions, so from KVM's perspective the host value is effectively constant.
> > > + vmcs_writel(HOST_S_CET, msr);
> > > + }
> > > +
> > > /* The actual VMENTER/EXIT is in the .noinstr.text section. */
> > > vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
> > > @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
> > > incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
> > > vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
> > > +
> > > + /*
> > > + * If IBT is available to guest, then passthrough S_CET MSR too since
> > > + * kernel IBT is already in mainline kernel tree.
> > > + */
> > > + incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> > > + vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
> > > }
> > > static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > > @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > > /* Refresh #PF interception to account for MAXPHYADDR changes. */
> > > vmx_update_exception_bitmap(vcpu);
> > > - if (kvm_cet_user_supported())
> > > + if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
> > Yeah, kvm_cet_user_supported() simply looks wrong.
>
> These are preconditions to set up CET MSRs for guest, in
> vmx_update_intercept_for_cet_msr(),
>
> the actual MSR control is based on guest_cpuid_has() results.
I know. My point is that with the below combination,
kvm_cet_user_supported() = true
kvm_cpu_cap_has(X86_FEATURE_IBT) = false
guest_cpuid_has(vcpu, X86_FEATURE_IBT) = true
KVM will passthrough MSR_IA32_S_CET for guest IBT even though IBT isn't supported
on the host.
incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
So either KVM is broken and is passing through S_CET when it shouldn't, or the
check on kvm_cet_user_supported() is redundant, i.e. the above combination is
impossible.
Either way, the code *looks* wrong, which is almost as bad as it being functionally
wrong.
On 6/27/2023 4:50 AM, Sean Christopherson wrote:
> On Mon, Jun 26, 2023, Weijiang Yang wrote:
>> On 6/24/2023 8:03 AM, Sean Christopherson wrote:
>>>> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>>> kvm_wait_lapic_expire(vcpu);
>>>> + /*
>>>> + * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
>>>> + * No need to save the other two vmcs fields as supervisor SHSTK
>>>> + * are not enabled on Intel platform now.
>>>> + */
>>>> + if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
>>>> + (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
>>>> + u64 msr;
>>>> +
>>>> + rdmsrl(MSR_IA32_S_CET, msr);
>>> Reading the MSR on every VM-Enter can't possibly be necessary. At the absolute
>>> minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
>>> from NMI context, KVM is hosed. And *if* S_CET isn't static post-boot, this can
>>> be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
>>> context.
>> Agree with you.
>>
>>> But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
>>> i.e. is static post boot, which means it can be read once at KVM load time, e.g.
>>> just like host_efer.
>> I think handling S_CET like host_efer from usage perspective is possible
>> given currently only
>>
>> kernel IBT is enabled in kernel, I'll remove the code and initialize the
>> vmcs field once like host_efer.
>>
>>> The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
>>> vCPU across a BIOS call then we've got bigger issues.
>> What's the problem you're referring to?
> I was pointing out that S_CET isn't strictly constant, as it's saved/modified/restored
> by ibt_save() + ibt_restore(). But KVM should never run between those paired
> functions, so from KVM's perspective the host value is effectively constant.
Yeah, so I think host S_CET setup can be handled as host_efer, thanks.
>
>>>> + vmcs_writel(HOST_S_CET, msr);
>>>> + }
>>>> +
>>>> /* The actual VMENTER/EXIT is in the .noinstr.text section. */
>>>> vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>>>> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>>>> incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
>>>> vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
>>>> +
>>>> + /*
>>>> + * If IBT is available to guest, then passthrough S_CET MSR too since
>>>> + * kernel IBT is already in mainline kernel tree.
>>>> + */
>>>> + incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
>>>> + vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>>>> }
>>>> static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>>> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>>> /* Refresh #PF interception to account for MAXPHYADDR changes. */
>>>> vmx_update_exception_bitmap(vcpu);
>>>> - if (kvm_cet_user_supported())
>>>> + if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
>>> Yeah, kvm_cet_user_supported() simply looks wrong.
>> These are preconditions to set up CET MSRs for guest, in
>> vmx_update_intercept_for_cet_msr(),
>>
>> the actual MSR control is based on guest_cpuid_has() results.
> I know. My point is that with the below combination,
>
> kvm_cet_user_supported() = true
> kvm_cpu_cap_has(X86_FEATURE_IBT) = false
> guest_cpuid_has(vcpu, X86_FEATURE_IBT) = true
>
> KVM will passthrough MSR_IA32_S_CET for guest IBT even though IBT isn't supported
> on the host.
>
> incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>
> So either KVM is broken and is passing through S_CET when it shouldn't, or the
> check on kvm_cet_user_supported() is redundant, i.e. the above combination is
> impossible.
>
> Either way, the code *looks* wrong, which is almost as bad as it being functionally
> wrong.
Got your point, I'll refine related code to make the handling reasonable.
@@ -664,6 +664,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_U_CET, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_S_CET, MSR_TYPE_RW);
+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PL3_SSP, MSR_TYPE_RW);
@@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
return true;
case MSR_IA32_U_CET:
case MSR_IA32_PL3_SSP:
+ case MSR_IA32_S_CET:
return true;
}
@@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
case MSR_IA32_PL3_SSP:
case MSR_KVM_GUEST_SSP:
if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
return 1;
- if (msr_info->index == MSR_KVM_GUEST_SSP)
+ if (msr_info->index == MSR_KVM_GUEST_SSP) {
msr_info->data = vmcs_readl(GUEST_SSP);
- else
+ } else if (msr_info->index == MSR_IA32_S_CET) {
+ msr_info->data = vmcs_readl(GUEST_S_CET);
+ } else {
kvm_get_xsave_msr(msr_info);
+ }
break;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
case MSR_IA32_PL3_SSP:
case MSR_KVM_GUEST_SSP:
if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
@@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr_index == MSR_IA32_PL3_SSP ||
msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
return 1;
- if (msr_index == MSR_KVM_GUEST_SSP)
+ if (msr_index == MSR_KVM_GUEST_SSP) {
vmcs_writel(GUEST_SSP, data);
- else
+ } else if (msr_index == MSR_IA32_S_CET) {
+ vmcs_writel(GUEST_S_CET, data);
+ } else {
kvm_set_xsave_msr(msr_info);
+ }
break;
case MSR_IA32_PERF_CAPABILITIES:
if (data && !vcpu_to_pmu(vcpu)->version)
@@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_wait_lapic_expire(vcpu);
+ /*
+ * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
+ * No need to save the other two vmcs fields as supervisor SHSTK
+ * are not enabled on Intel platform now.
+ */
+ if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+ (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
+ u64 msr;
+
+ rdmsrl(MSR_IA32_S_CET, msr);
+ vmcs_writel(HOST_S_CET, msr);
+ }
+
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
@@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
+
+ /*
+ * If IBT is available to guest, then passthrough S_CET MSR too since
+ * kernel IBT is already in mainline kernel tree.
+ */
+ incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
}
static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);
- if (kvm_cet_user_supported())
+ if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
vmx_update_intercept_for_cet_msr(vcpu);
}
@@ -1471,6 +1471,7 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
MSR_IA32_XSS,
MSR_IA32_U_CET, MSR_IA32_PL3_SSP, MSR_KVM_GUEST_SSP,
+ MSR_IA32_S_CET,
};
static const u32 msrs_to_save_pmu[] = {
@@ -13652,7 +13653,8 @@ EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
- if (!kvm_cet_user_supported())
+ if (!kvm_cet_user_supported() &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
return false;
if (msr->host_initiated)
@@ -13666,6 +13668,9 @@ bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (msr->index == MSR_KVM_GUEST_SSP)
return false;
+ if (msr->index == MSR_IA32_S_CET)
+ return guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+
if (msr->index == MSR_IA32_PL3_SSP &&
!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
return false;