diff mbox series

[v3,20/21] KVM:x86: Enable kernel IBT support for guest

Message ID	20230511040857.6094-21-weijiang.yang@intel.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: Yang Weijiang <weijiang.yang@intel.com> To: seanjc@google.com, pbonzini@redhat.com, kvm@vger.kernel.org, linux-kernel@vger.kernel.org Cc: peterz@infradead.org, rppt@kernel.org, binbin.wu@linux.intel.com, rick.p.edgecombe@intel.com, weijiang.yang@intel.com, john.allen@amd.com Subject: [PATCH v3 20/21] KVM:x86: Enable kernel IBT support for guest Date: Thu, 11 May 2023 00:08:56 -0400 Message-Id: <20230511040857.6094-21-weijiang.yang@intel.com> In-Reply-To: <20230511040857.6094-1-weijiang.yang@intel.com> References: <20230511040857.6094-1-weijiang.yang@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Enable CET Virtualization \| [v3,00/21] Enable CET Virtualization [v3,01/21] x86/shstk: Add Kconfig option for shadow stack [v3,02/21] x86/cpufeatures: Add CPU feature flags for shadow stacks [v3,03/21] x86/cpufeatures: Enable CET CR4 bit for shadow stack [v3,04/21] x86/fpu/xstate: Introduce CET MSR and XSAVES supervisor states [v3,05/21] x86/fpu: Add helper for modifying xstate [v3,06/21] KVM:x86: Report XSS as to-be-saved if there are supported features [v3,07/21] KVM:x86: Refresh CPUID on write to guest MSR_IA32_XSS [v3,08/21] KVM:x86: Init kvm_caps.supported_xss with supported feature bits [v3,09/21] KVM:x86: Load guest FPU state when accessing xsaves-managed MSRs [v3,10/21] KVM:x86: Add #CP support in guest exception classification [v3,11/21] KVM:VMX: Introduce CET VMCS fields and control bits [v3,12/21] KVM:x86: Add fault checks for guest CR4.CET setting [v3,13/21] KVM:VMX: Emulate reads and writes to CET MSRs [v3,14/21] KVM:VMX: Add a synthetic MSR to allow userspace to access GUEST_SSP [v3,15/21] KVM:x86: Report CET MSRs as to-be-saved if CET is supported [v3,16/21] KVM:x86: Save/Restore GUEST_SSP to/from SMM state save area [v3,17/21] KVM:VMX: Pass through user CET MSRs to the guest [v3,18/21] KVM:x86: Enable CET virtualization for VMX and advertise to userspace [v3,19/21] KVM:nVMX: Enable user CET support for nested VMX [v3,20/21] KVM:x86: Enable kernel IBT support for guest [v3,21/21] KVM:x86: Support CET supervisor shadow stack MSR access

Commit Message

Yang, Weijiang May 11, 2023, 4:08 a.m. UTC

  Enable MSR_IA32_S_CET access for guest kernel IBT.

Mainline Linux kernel now supports supervisor IBT for kernel code,
to make s-IBT work in guest(nested guest), pass through MSR_IA32_S_CET
to guest(nested guest) if host kernel and KVM enabled IBT.

Note, s-IBT can work independent to host xsaves support because guest
MSR_IA32_S_CET is {stored|loaded} from VMCS GUEST_S_CET field.

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/nested.c |  3 +++
 arch/x86/kvm/vmx/vmx.c    | 39 ++++++++++++++++++++++++++++++++++-----
 arch/x86/kvm/x86.c        |  7 ++++++-
 3 files changed, 43 insertions(+), 6 deletions(-)

Comments

Sean Christopherson June 24, 2023, 12:03 a.m. UTC | #1

On Thu, May 11, 2023, Yang Weijiang wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index a2494156902d..1d0151f9e575 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
>  		return true;
>  	case MSR_IA32_U_CET:
>  	case MSR_IA32_PL3_SSP:
> +	case MSR_IA32_S_CET:
>  		return true;
>  	}
>  
> @@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>  		break;
>  	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:
>  	case MSR_IA32_PL3_SSP:
>  	case MSR_KVM_GUEST_SSP:
>  		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
>  			return 1;
> -		if (msr_info->index == MSR_KVM_GUEST_SSP)
> +		if (msr_info->index == MSR_KVM_GUEST_SSP) {

Unnecessary curly braces.

>  			msr_info->data = vmcs_readl(GUEST_SSP);
> -		else
> +		} else if (msr_info->index == MSR_IA32_S_CET) {
> +			msr_info->data = vmcs_readl(GUEST_S_CET);
> +		} else {
>  			kvm_get_xsave_msr(msr_info);
> +		}
>  		break;
>  	case MSR_IA32_DEBUGCTLMSR:
>  		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> @@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			vmx->pt_desc.guest.addr_a[index / 2] = data;
>  		break;
>  	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:
>  	case MSR_IA32_PL3_SSP:
>  	case MSR_KVM_GUEST_SSP:
>  		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
> @@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		if ((msr_index == MSR_IA32_PL3_SSP ||
>  		     msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
>  			return 1;
> -		if (msr_index == MSR_KVM_GUEST_SSP)
> +		if (msr_index == MSR_KVM_GUEST_SSP) {
>  			vmcs_writel(GUEST_SSP, data);
> -		else
> +		} else if (msr_index == MSR_IA32_S_CET) {
> +			vmcs_writel(GUEST_S_CET, data);
> +		} else {

Same here.

>  			kvm_set_xsave_msr(msr_info);
> +		}
>  		break;
>  	case MSR_IA32_PERF_CAPABILITIES:
>  		if (data && !vcpu_to_pmu(vcpu)->version)
> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	kvm_wait_lapic_expire(vcpu);
>  
> +	/*
> +	 * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
> +	 * No need to save the other two vmcs fields as supervisor SHSTK
> +	 * are not enabled on Intel platform now.
> +	 */
> +	if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
> +	    (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
> +		u64 msr;
> +
> +		rdmsrl(MSR_IA32_S_CET, msr);

Reading the MSR on every VM-Enter can't possibly be necessary.  At the absolute
minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
from NMI context, KVM is hosed.  And *if* S_CET isn't static post-boot, this can
be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
context.

But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
i.e. is static post boot, which means it can be read once at KVM load time, e.g.
just like host_efer.

The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
vCPU across a BIOS call then we've got bigger issues.

> +		vmcs_writel(HOST_S_CET, msr);
> +	}
> +
>  	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
>  	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>  
> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>  
>  	incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
>  	vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
> +
> +	/*
> +	 * If IBT is available to guest, then passthrough S_CET MSR too since
> +	 * kernel IBT is already in mainline kernel tree.
> +	 */
> +	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> +	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>  }
>  
>  static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	/* Refresh #PF interception to account for MAXPHYADDR changes. */
>  	vmx_update_exception_bitmap(vcpu);
>  
> -	if (kvm_cet_user_supported())
> +	if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))

Yeah, kvm_cet_user_supported() simply looks wrong.

Yang, Weijiang June 26, 2023, 12:10 p.m. UTC | #2

On 6/24/2023 8:03 AM, Sean Christopherson wrote:
> On Thu, May 11, 2023, Yang Weijiang wrote:
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index a2494156902d..1d0151f9e575 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
>>   		return true;
>>   	case MSR_IA32_U_CET:
>>   	case MSR_IA32_PL3_SSP:
>> +	case MSR_IA32_S_CET:
>>   		return true;
>>   	}
>>   
>> @@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>>   		break;
>>   	case MSR_IA32_U_CET:
>> +	case MSR_IA32_S_CET:
>>   	case MSR_IA32_PL3_SSP:
>>   	case MSR_KVM_GUEST_SSP:
>>   		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
>>   			return 1;
>> -		if (msr_info->index == MSR_KVM_GUEST_SSP)
>> +		if (msr_info->index == MSR_KVM_GUEST_SSP) {
> Unnecessary curly braces.

Something in my mind must be wrong :-), will remove them.

>
>>   			msr_info->data = vmcs_readl(GUEST_SSP);
>> -		else
>> +		} else if (msr_info->index == MSR_IA32_S_CET) {
>> +			msr_info->data = vmcs_readl(GUEST_S_CET);
>> +		} else {
>>   			kvm_get_xsave_msr(msr_info);
>> +		}
>>   		break;
>>   	case MSR_IA32_DEBUGCTLMSR:
>>   		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>> @@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   			vmx->pt_desc.guest.addr_a[index / 2] = data;
>>   		break;
>>   	case MSR_IA32_U_CET:
>> +	case MSR_IA32_S_CET:
>>   	case MSR_IA32_PL3_SSP:
>>   	case MSR_KVM_GUEST_SSP:
>>   		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
>> @@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		if ((msr_index == MSR_IA32_PL3_SSP ||
>>   		     msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
>>   			return 1;
>> -		if (msr_index == MSR_KVM_GUEST_SSP)
>> +		if (msr_index == MSR_KVM_GUEST_SSP) {
>>   			vmcs_writel(GUEST_SSP, data);
>> -		else
>> +		} else if (msr_index == MSR_IA32_S_CET) {
>> +			vmcs_writel(GUEST_S_CET, data);
>> +		} else {
> Same here.
>
>>   			kvm_set_xsave_msr(msr_info);
>> +		}
>>   		break;
>>   	case MSR_IA32_PERF_CAPABILITIES:
>>   		if (data && !vcpu_to_pmu(vcpu)->version)
>> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>   
>>   	kvm_wait_lapic_expire(vcpu);
>>   
>> +	/*
>> +	 * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
>> +	 * No need to save the other two vmcs fields as supervisor SHSTK
>> +	 * are not enabled on Intel platform now.
>> +	 */
>> +	if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
>> +	    (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
>> +		u64 msr;
>> +
>> +		rdmsrl(MSR_IA32_S_CET, msr);
> Reading the MSR on every VM-Enter can't possibly be necessary.  At the absolute
> minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
> from NMI context, KVM is hosed.  And *if* S_CET isn't static post-boot, this can
> be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
> context.

Agree with you.

>
> But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
> i.e. is static post boot, which means it can be read once at KVM load time, e.g.
> just like host_efer.

I think handling S_CET like host_efer from usage perspective is possible 
given currently only

kernel IBT is enabled in kernel, I'll remove the code and initialize the 
vmcs field once like host_efer.

>
> The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
> vCPU across a BIOS call then we've got bigger issues.

What's the problem you're referring to?

>
>> +		vmcs_writel(HOST_S_CET, msr);
>> +	}
>> +
>>   	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
>>   	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>>   
>> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>>   
>>   	incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
>>   	vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
>> +
>> +	/*
>> +	 * If IBT is available to guest, then passthrough S_CET MSR too since
>> +	 * kernel IBT is already in mainline kernel tree.
>> +	 */
>> +	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
>> +	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>>   }
>>   
>>   static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>   	/* Refresh #PF interception to account for MAXPHYADDR changes. */
>>   	vmx_update_exception_bitmap(vcpu);
>>   
>> -	if (kvm_cet_user_supported())
>> +	if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
> Yeah, kvm_cet_user_supported() simply looks wrong.

These are preconditions to set up CET MSRs for guest, in 
vmx_update_intercept_for_cet_msr(),

the actual MSR control is based on guest_cpuid_has() results.

Sean Christopherson June 26, 2023, 8:50 p.m. UTC | #3

On Mon, Jun 26, 2023, Weijiang Yang wrote:
> 
> On 6/24/2023 8:03 AM, Sean Christopherson wrote:
> > > @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > >   	kvm_wait_lapic_expire(vcpu);
> > > +	/*
> > > +	 * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
> > > +	 * No need to save the other two vmcs fields as supervisor SHSTK
> > > +	 * are not enabled on Intel platform now.
> > > +	 */
> > > +	if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
> > > +	    (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
> > > +		u64 msr;
> > > +
> > > +		rdmsrl(MSR_IA32_S_CET, msr);
> > Reading the MSR on every VM-Enter can't possibly be necessary.  At the absolute
> > minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
> > from NMI context, KVM is hosed.  And *if* S_CET isn't static post-boot, this can
> > be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
> > context.
> 
> Agree with you.
> 
> > 
> > But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
> > i.e. is static post boot, which means it can be read once at KVM load time, e.g.
> > just like host_efer.
> 
> I think handling S_CET like host_efer from usage perspective is possible
> given currently only
> 
> kernel IBT is enabled in kernel, I'll remove the code and initialize the
> vmcs field once like host_efer.
> 
> > 
> > The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
> > vCPU across a BIOS call then we've got bigger issues.
> 
> What's the problem you're referring to?

I was pointing out that S_CET isn't strictly constant, as it's saved/modified/restored
by ibt_save() + ibt_restore().  But KVM should never run between those paired
functions, so from KVM's perspective the host value is effectively constant.

> > > +		vmcs_writel(HOST_S_CET, msr);
> > > +	}
> > > +
> > >   	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
> > >   	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
> > > @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
> > >   	incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
> > >   	vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
> > > +
> > > +	/*
> > > +	 * If IBT is available to guest, then passthrough S_CET MSR too since
> > > +	 * kernel IBT is already in mainline kernel tree.
> > > +	 */
> > > +	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> > > +	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
> > >   }
> > >   static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > > @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > >   	/* Refresh #PF interception to account for MAXPHYADDR changes. */
> > >   	vmx_update_exception_bitmap(vcpu);
> > > -	if (kvm_cet_user_supported())
> > > +	if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
> > Yeah, kvm_cet_user_supported() simply looks wrong.
> 
> These are preconditions to set up CET MSRs for guest, in
> vmx_update_intercept_for_cet_msr(),
> 
> the actual MSR control is based on guest_cpuid_has() results.

I know.  My point is that with the below combination, 

	kvm_cet_user_supported()		= true
	kvm_cpu_cap_has(X86_FEATURE_IBT)	= false 
	guest_cpuid_has(vcpu, X86_FEATURE_IBT)	= true

KVM will passthrough MSR_IA32_S_CET for guest IBT even though IBT isn't supported
on the host.

	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);

So either KVM is broken and is passing through S_CET when it shouldn't, or the
check on kvm_cet_user_supported() is redundant, i.e. the above combination is
impossible.

Either way, the code *looks* wrong, which is almost as bad as it being functionally
wrong.

Yang, Weijiang June 27, 2023, 1:53 a.m. UTC | #4

On 6/27/2023 4:50 AM, Sean Christopherson wrote:
> On Mon, Jun 26, 2023, Weijiang Yang wrote:
>> On 6/24/2023 8:03 AM, Sean Christopherson wrote:
>>>> @@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>>>    	kvm_wait_lapic_expire(vcpu);
>>>> +	/*
>>>> +	 * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
>>>> +	 * No need to save the other two vmcs fields as supervisor SHSTK
>>>> +	 * are not enabled on Intel platform now.
>>>> +	 */
>>>> +	if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
>>>> +	    (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
>>>> +		u64 msr;
>>>> +
>>>> +		rdmsrl(MSR_IA32_S_CET, msr);
>>> Reading the MSR on every VM-Enter can't possibly be necessary.  At the absolute
>>> minimum, this could be moved outside of the fastpath; if the kernel modifies S_CET
>>> from NMI context, KVM is hosed.  And *if* S_CET isn't static post-boot, this can
>>> be done in .prepare_switch_to_guest() so long as S_CET isn't modified from IRQ
>>> context.
>> Agree with you.
>>
>>> But unless mine eyes deceive me, S_CET is only truly modified during setup_cet(),
>>> i.e. is static post boot, which means it can be read once at KVM load time, e.g.
>>> just like host_efer.
>> I think handling S_CET like host_efer from usage perspective is possible
>> given currently only
>>
>> kernel IBT is enabled in kernel, I'll remove the code and initialize the
>> vmcs field once like host_efer.
>>
>>> The kernel does save/restore IBT when making BIOS calls, but if KVM is running a
>>> vCPU across a BIOS call then we've got bigger issues.
>> What's the problem you're referring to?
> I was pointing out that S_CET isn't strictly constant, as it's saved/modified/restored
> by ibt_save() + ibt_restore().  But KVM should never run between those paired
> functions, so from KVM's perspective the host value is effectively constant.

Yeah, so I think host S_CET setup can be handled as host_efer, thanks.

>
>>>> +		vmcs_writel(HOST_S_CET, msr);
>>>> +	}
>>>> +
>>>>    	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
>>>>    	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
>>>> @@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
>>>>    	incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
>>>>    	vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
>>>> +
>>>> +	/*
>>>> +	 * If IBT is available to guest, then passthrough S_CET MSR too since
>>>> +	 * kernel IBT is already in mainline kernel tree.
>>>> +	 */
>>>> +	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
>>>> +	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>>>>    }
>>>>    static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>>> @@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>>>>    	/* Refresh #PF interception to account for MAXPHYADDR changes. */
>>>>    	vmx_update_exception_bitmap(vcpu);
>>>> -	if (kvm_cet_user_supported())
>>>> +	if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
>>> Yeah, kvm_cet_user_supported() simply looks wrong.
>> These are preconditions to set up CET MSRs for guest, in
>> vmx_update_intercept_for_cet_msr(),
>>
>> the actual MSR control is based on guest_cpuid_has() results.
> I know.  My point is that with the below combination,
>
> 	kvm_cet_user_supported()		= true
> 	kvm_cpu_cap_has(X86_FEATURE_IBT)	= false
> 	guest_cpuid_has(vcpu, X86_FEATURE_IBT)	= true
>
> KVM will passthrough MSR_IA32_S_CET for guest IBT even though IBT isn't supported
> on the host.
>
> 	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
> 	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
>
> So either KVM is broken and is passing through S_CET when it shouldn't, or the
> check on kvm_cet_user_supported() is redundant, i.e. the above combination is
> impossible.
>
> Either way, the code *looks* wrong, which is almost as bad as it being functionally
> wrong.

Got your point, I'll refine related code to make the handling reasonable.

diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 522ac27d2534..bf690827bfee 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -664,6 +664,9 @@  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
 					 MSR_IA32_U_CET, MSR_TYPE_RW);
 
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_IA32_S_CET, MSR_TYPE_RW);
+
 	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
 					 MSR_IA32_PL3_SSP, MSR_TYPE_RW);
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a2494156902d..1d0151f9e575 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -711,6 +711,7 @@  static bool is_valid_passthrough_msr(u32 msr)
 		return true;
 	case MSR_IA32_U_CET:
 	case MSR_IA32_PL3_SSP:
+	case MSR_IA32_S_CET:
 		return true;
 	}
 
@@ -2097,14 +2098,18 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
 	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
 	case MSR_IA32_PL3_SSP:
 	case MSR_KVM_GUEST_SSP:
 		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
 			return 1;
-		if (msr_info->index == MSR_KVM_GUEST_SSP)
+		if (msr_info->index == MSR_KVM_GUEST_SSP) {
 			msr_info->data = vmcs_readl(GUEST_SSP);
-		else
+		} else if (msr_info->index == MSR_IA32_S_CET) {
+			msr_info->data = vmcs_readl(GUEST_S_CET);
+		} else {
 			kvm_get_xsave_msr(msr_info);
+		}
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -2419,6 +2424,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
 	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
 	case MSR_IA32_PL3_SSP:
 	case MSR_KVM_GUEST_SSP:
 		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
@@ -2430,10 +2436,13 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if ((msr_index == MSR_IA32_PL3_SSP ||
 		     msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
 			return 1;
-		if (msr_index == MSR_KVM_GUEST_SSP)
+		if (msr_index == MSR_KVM_GUEST_SSP) {
 			vmcs_writel(GUEST_SSP, data);
-		else
+		} else if (msr_index == MSR_IA32_S_CET) {
+			vmcs_writel(GUEST_S_CET, data);
+		} else {
 			kvm_set_xsave_msr(msr_info);
+		}
 		break;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
@@ -7322,6 +7331,19 @@  static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	kvm_wait_lapic_expire(vcpu);
 
+	/*
+	 * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
+	 * No need to save the other two vmcs fields as supervisor SHSTK
+	 * are not enabled on Intel platform now.
+	 */
+	if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+	    (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
+		u64 msr;
+
+		rdmsrl(MSR_IA32_S_CET, msr);
+		vmcs_writel(HOST_S_CET, msr);
+	}
+
 	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
 	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
 
@@ -7735,6 +7757,13 @@  static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
 
 	incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
 	vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
+
+	/*
+	 * If IBT is available to guest, then passthrough S_CET MSR too since
+	 * kernel IBT is already in mainline kernel tree.
+	 */
+	incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
 }
 
 static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -7805,7 +7834,7 @@  static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	/* Refresh #PF interception to account for MAXPHYADDR changes. */
 	vmx_update_exception_bitmap(vcpu);
 
-	if (kvm_cet_user_supported())
+	if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
 		vmx_update_intercept_for_cet_msr(vcpu);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 858cb68e781a..b450361b94ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1471,6 +1471,7 @@  static const u32 msrs_to_save_base[] = {
 	MSR_IA32_XFD, MSR_IA32_XFD_ERR,
 	MSR_IA32_XSS,
 	MSR_IA32_U_CET, MSR_IA32_PL3_SSP, MSR_KVM_GUEST_SSP,
+	MSR_IA32_S_CET,
 };
 
 static const u32 msrs_to_save_pmu[] = {
@@ -13652,7 +13653,8 @@  EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
 
 bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
-	if (!kvm_cet_user_supported())
+	if (!kvm_cet_user_supported() &&
+	    !kvm_cpu_cap_has(X86_FEATURE_IBT))
 		return false;
 
 	if (msr->host_initiated)
@@ -13666,6 +13668,9 @@  bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	if (msr->index == MSR_KVM_GUEST_SSP)
 		return false;
 
+	if (msr->index == MSR_IA32_S_CET)
+		return guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+
 	if (msr->index == MSR_IA32_PL3_SSP &&
 	    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
 		return false;