[3/9] KVM: x86: SVM: Pass through shadow stack MSRs

Message ID 20231010200220.897953-4-john.allen@amd.com
State New
Headers
Series SVM guest shadow stack support |

Commit Message

John Allen Oct. 10, 2023, 8:02 p.m. UTC
  If kvm supports shadow stack, pass through shadow stack MSRs to improve
guest performance.

Signed-off-by: John Allen <john.allen@amd.com>
---
 arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.h |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)
  

Comments

Nikunj A. Dadhania Oct. 12, 2023, 9:01 a.m. UTC | #1
On 10/11/2023 1:32 AM, John Allen wrote:
> If kvm supports shadow stack, pass through shadow stack MSRs to improve
> guest performance.
> 
> Signed-off-by: John Allen <john.allen@amd.com>
> ---
>  arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.h |  2 +-
>  2 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index e435e4fbadda..984e89d7a734 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -139,6 +139,13 @@ static const struct svm_direct_access_msrs {
>  	{ .index = X2APIC_MSR(APIC_TMICT),		.always = false },
>  	{ .index = X2APIC_MSR(APIC_TMCCT),		.always = false },
>  	{ .index = X2APIC_MSR(APIC_TDCR),		.always = false },
> +	{ .index = MSR_IA32_U_CET,                      .always = false },
> +	{ .index = MSR_IA32_S_CET,                      .always = false },
> +	{ .index = MSR_IA32_INT_SSP_TAB,                .always = false },
> +	{ .index = MSR_IA32_PL0_SSP,                    .always = false },
> +	{ .index = MSR_IA32_PL1_SSP,                    .always = false },
> +	{ .index = MSR_IA32_PL2_SSP,                    .always = false },
> +	{ .index = MSR_IA32_PL3_SSP,                    .always = false },

First three MSRs are emulated in the patch 1, any specific reason for skipping MSR_IA32_PL[0-3]_SSP ?

Regards,
Nikunj
  
John Allen Oct. 17, 2023, 6:17 p.m. UTC | #2
On Thu, Oct 12, 2023 at 02:31:19PM +0530, Nikunj A. Dadhania wrote:
> On 10/11/2023 1:32 AM, John Allen wrote:
> > If kvm supports shadow stack, pass through shadow stack MSRs to improve
> > guest performance.
> > 
> > Signed-off-by: John Allen <john.allen@amd.com>
> > ---
> >  arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++
> >  arch/x86/kvm/svm/svm.h |  2 +-
> >  2 files changed, 27 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index e435e4fbadda..984e89d7a734 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -139,6 +139,13 @@ static const struct svm_direct_access_msrs {
> >  	{ .index = X2APIC_MSR(APIC_TMICT),		.always = false },
> >  	{ .index = X2APIC_MSR(APIC_TMCCT),		.always = false },
> >  	{ .index = X2APIC_MSR(APIC_TDCR),		.always = false },
> > +	{ .index = MSR_IA32_U_CET,                      .always = false },
> > +	{ .index = MSR_IA32_S_CET,                      .always = false },
> > +	{ .index = MSR_IA32_INT_SSP_TAB,                .always = false },
> > +	{ .index = MSR_IA32_PL0_SSP,                    .always = false },
> > +	{ .index = MSR_IA32_PL1_SSP,                    .always = false },
> > +	{ .index = MSR_IA32_PL2_SSP,                    .always = false },
> > +	{ .index = MSR_IA32_PL3_SSP,                    .always = false },
> 
> First three MSRs are emulated in the patch 1, any specific reason for skipping MSR_IA32_PL[0-3]_SSP ?

I'm not sure what you mean. The PLx_SSP MSRS should be getting passed
through here unless I'm misunderstanding something.

Thanks,
John
  
Nikunj A. Dadhania Oct. 18, 2023, 11:27 a.m. UTC | #3
On 10/17/2023 11:47 PM, John Allen wrote:
> On Thu, Oct 12, 2023 at 02:31:19PM +0530, Nikunj A. Dadhania wrote:
>> On 10/11/2023 1:32 AM, John Allen wrote:
>>> If kvm supports shadow stack, pass through shadow stack MSRs to improve
>>> guest performance.
>>>
>>> Signed-off-by: John Allen <john.allen@amd.com>
>>> ---
>>>  arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++
>>>  arch/x86/kvm/svm/svm.h |  2 +-
>>>  2 files changed, 27 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>>> index e435e4fbadda..984e89d7a734 100644
>>> --- a/arch/x86/kvm/svm/svm.c
>>> +++ b/arch/x86/kvm/svm/svm.c
>>> @@ -139,6 +139,13 @@ static const struct svm_direct_access_msrs {
>>>  	{ .index = X2APIC_MSR(APIC_TMICT),		.always = false },
>>>  	{ .index = X2APIC_MSR(APIC_TMCCT),		.always = false },
>>>  	{ .index = X2APIC_MSR(APIC_TDCR),		.always = false },
>>> +	{ .index = MSR_IA32_U_CET,                      .always = false },
>>> +	{ .index = MSR_IA32_S_CET,                      .always = false },
>>> +	{ .index = MSR_IA32_INT_SSP_TAB,                .always = false },
>>> +	{ .index = MSR_IA32_PL0_SSP,                    .always = false },
>>> +	{ .index = MSR_IA32_PL1_SSP,                    .always = false },
>>> +	{ .index = MSR_IA32_PL2_SSP,                    .always = false },
>>> +	{ .index = MSR_IA32_PL3_SSP,                    .always = false },
>>
>> First three MSRs are emulated in the patch 1, any specific reason for skipping MSR_IA32_PL[0-3]_SSP ?
> 
> I'm not sure what you mean. 

MSR_IA32_U_CET, MSR_IA32_S_CET and MSR_IA32_INT_SSP_TAB are selectively emulated and there is no good explanation why MSR_IA32_PL[0-3]_SSP do not need emulation. Moreover, MSR interception is initially set (i.e. always=false) for MSR_IA32_PL[0-3]_SSP.

> The PLx_SSP MSRS should be getting passed
> through here unless I'm misunderstanding something.

In that case, intercept should be cleared from the very beginning.

+	{ .index = MSR_IA32_PL0_SSP,                    .always = true },
+	{ .index = MSR_IA32_PL1_SSP,                    .always = true },
+	{ .index = MSR_IA32_PL2_SSP,                    .always = true },
+	{ .index = MSR_IA32_PL3_SSP,                    .always = true },

Regards
Nikunj
  
Maxim Levitsky Nov. 2, 2023, 6:05 p.m. UTC | #4
On Wed, 2023-10-18 at 16:57 +0530, Nikunj A. Dadhania wrote:
> On 10/17/2023 11:47 PM, John Allen wrote:
> > On Thu, Oct 12, 2023 at 02:31:19PM +0530, Nikunj A. Dadhania wrote:
> > > On 10/11/2023 1:32 AM, John Allen wrote:
> > > > If kvm supports shadow stack, pass through shadow stack MSRs to improve
> > > > guest performance.
> > > > 
> > > > Signed-off-by: John Allen <john.allen@amd.com>
> > > > ---
> > > >  arch/x86/kvm/svm/svm.c | 26 ++++++++++++++++++++++++++
> > > >  arch/x86/kvm/svm/svm.h |  2 +-
> > > >  2 files changed, 27 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > > > index e435e4fbadda..984e89d7a734 100644
> > > > --- a/arch/x86/kvm/svm/svm.c
> > > > +++ b/arch/x86/kvm/svm/svm.c
> > > > @@ -139,6 +139,13 @@ static const struct svm_direct_access_msrs {
> > > >  	{ .index = X2APIC_MSR(APIC_TMICT),		.always = false },
> > > >  	{ .index = X2APIC_MSR(APIC_TMCCT),		.always = false },
> > > >  	{ .index = X2APIC_MSR(APIC_TDCR),		.always = false },
> > > > +	{ .index = MSR_IA32_U_CET,                      .always = false },
> > > > +	{ .index = MSR_IA32_S_CET,                      .always = false },
> > > > +	{ .index = MSR_IA32_INT_SSP_TAB,                .always = false },
> > > > +	{ .index = MSR_IA32_PL0_SSP,                    .always = false },
> > > > +	{ .index = MSR_IA32_PL1_SSP,                    .always = false },
> > > > +	{ .index = MSR_IA32_PL2_SSP,                    .always = false },
> > > > +	{ .index = MSR_IA32_PL3_SSP,                    .always = false },
> > > 
> > > First three MSRs are emulated in the patch 1, any specific reason for skipping MSR_IA32_PL[0-3]_SSP ?
> > 
> > I'm not sure what you mean. 
> 
> MSR_IA32_U_CET, MSR_IA32_S_CET and MSR_IA32_INT_SSP_TAB are selectively emulated and there is no good explanation why MSR_IA32_PL[0-3]_SSP do not need emulation. Moreover, MSR interception is initially set (i.e. always=false) for MSR_IA32_PL[0-3]_SSP.
> 

Let me explain:

Passing through an MSR and having code for reading/writing it in svm_set_msr/svm_get_msr are two different things:

Passing through an MSR means that the guest can read/write the msr freely (that assumes that this can't cause harm to the host),
but either KVM or the hardware usually swaps the guest MSR value with host msr value on vm entry/exit.

Therefore the function of svm_set_msr/svm_get_msr is to obtain the saved guest msr value while the guest is not running for various use cases (for example for migration).

In case of MSR_IA32_U_CET, MSR_IA32_S_CET and MSR_IA32_INT_SSP_TAB the hardware itself loads the guest values from the vmcb on VM entry and saves the modified guest values
to the vmcb on vm exit, thus correct way of reading/writing the guest value is to read/write it from/to the vmcb field.


Now why other CET msrs are not handled by svm_set_msr/svm_get_msr? 
The answer is that those msrs are not saved/restored by SVM microcode, and instead their guest values remain
during the VMexit.

The CET common code which I finally finished reviewing last night, context switches them together with the rest of guest FPU state using xsaves/xrstors instruction,
and if the KVM wants to read these msrs, the common code will first load the guest FPU state and then read/write the msrs using regular rdmsr/wrmsr.


> > The PLx_SSP MSRS should be getting passed
> > through here unless I'm misunderstanding something.
> 
> In that case, intercept should be cleared from the very beginning.
> 
> +	{ .index = MSR_IA32_PL0_SSP,                    .always = true },
> +	{ .index = MSR_IA32_PL1_SSP,                    .always = true },
> +	{ .index = MSR_IA32_PL2_SSP,                    .always = true },
> +	{ .index = MSR_IA32_PL3_SSP,                    .always = true },

.always is only true when a MSR is *always* passed through. CET msrs are only passed through when CET is supported.

Therefore I don't expect that we ever add another msr to this list which has .always = true.

In fact the .always = True for X86_64 arch msrs like MSR_GS_BASE/MSR_FS_BASE and such is not 100% correct too - 
when we start a VM which doesn't have cpuid bit X86_FEATURE_LM, these msrs should not exist and I think that we have a
kvm unit test that fails because of this on 32 bit but I didn't bother yet to fix it.

.always probably needs to be dropped completely.


So IMHO this patch is correct (I might have missed something though):

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky

> 
> Regards
> Nikunj
> 
>
  
Sean Christopherson Nov. 6, 2023, 4:45 p.m. UTC | #5
On Thu, Nov 02, 2023, Maxim Levitsky wrote:
> On Wed, 2023-10-18 at 16:57 +0530, Nikunj A. Dadhania wrote:
> > On 10/17/2023 11:47 PM, John Allen wrote:
> > In that case, intercept should be cleared from the very beginning.
> > 
> > +	{ .index = MSR_IA32_PL0_SSP,                    .always = true },
> > +	{ .index = MSR_IA32_PL1_SSP,                    .always = true },
> > +	{ .index = MSR_IA32_PL2_SSP,                    .always = true },
> > +	{ .index = MSR_IA32_PL3_SSP,                    .always = true },
> 
> .always is only true when a MSR is *always* passed through. CET msrs are only
> passed through when CET is supported.
> 
> Therefore I don't expect that we ever add another msr to this list which has
> .always = true.
> 
> In fact the .always = True for X86_64 arch msrs like MSR_GS_BASE/MSR_FS_BASE
> and such is not 100% correct too - when we start a VM which doesn't have
> cpuid bit X86_FEATURE_LM, these msrs should not exist and I think that we
> have a kvm unit test that fails because of this on 32 bit but I didn't bother
> yet to fix it.
> 
> .always probably needs to be dropped completely.

FWIW, I have a half-baked series to clean up SVM's MSR interception code and
converge the SVM and VMX APIs.  E.g. set_msr_interception_bitmap()'s inverted
polarity confuses me every time I look at its usage.

I can hunt down the branch if someone plans on tackling this code.
  
Maxim Levitsky Nov. 7, 2023, 6:20 p.m. UTC | #6
On Mon, 2023-11-06 at 08:45 -0800, Sean Christopherson wrote:
> On Thu, Nov 02, 2023, Maxim Levitsky wrote:
> > On Wed, 2023-10-18 at 16:57 +0530, Nikunj A. Dadhania wrote:
> > > On 10/17/2023 11:47 PM, John Allen wrote:
> > > In that case, intercept should be cleared from the very beginning.
> > > 
> > > +	{ .index = MSR_IA32_PL0_SSP,                    .always = true },
> > > +	{ .index = MSR_IA32_PL1_SSP,                    .always = true },
> > > +	{ .index = MSR_IA32_PL2_SSP,                    .always = true },
> > > +	{ .index = MSR_IA32_PL3_SSP,                    .always = true },
> > 
> > .always is only true when a MSR is *always* passed through. CET msrs are only
> > passed through when CET is supported.
> > 
> > Therefore I don't expect that we ever add another msr to this list which has
> > .always = true.
> > 
> > In fact the .always = True for X86_64 arch msrs like MSR_GS_BASE/MSR_FS_BASE
> > and such is not 100% correct too - when we start a VM which doesn't have
> > cpuid bit X86_FEATURE_LM, these msrs should not exist and I think that we
> > have a kvm unit test that fails because of this on 32 bit but I didn't bother
> > yet to fix it.
> > 
> > .always probably needs to be dropped completely.
> 
> FWIW, I have a half-baked series to clean up SVM's MSR interception code and
> converge the SVM and VMX APIs.  E.g. set_msr_interception_bitmap()'s inverted
> polarity confuses me every time I look at its usage.

100% agree. Any refactoring here is very welcome!

> 
> I can hunt down the branch if someone plans on tackling this code.

One of the things I don't like that much in the SVM msr's bitmap code
is that it uses L1's msr bitmap when the guest's msr interception is disabled,
and the combined msr bitmap otherwise.

This is very fragile and one mistake away from a CVE.

Since no sane L1 hypervisor will ever allow access to all its msrs from L2,
it might make sense to always use a dedicated MSR bitmap for L2.

Also since all sane L1 hypervisors do use a msr bitmap means that
dedicated code path that doesn't use it is not well tested.

On VMX if I am not mistaken, this is not an issue because either all
MSRS are intercepted or a bitmap is used.

> 

Best regards,
	Maxim Levitsky
  
Sean Christopherson Nov. 7, 2023, 11:10 p.m. UTC | #7
On Tue, Nov 07, 2023, Maxim Levitsky wrote:
> Since no sane L1 hypervisor will ever allow access to all its msrs from L2,
> it might make sense to always use a dedicated MSR bitmap for L2.

Hmm, there might be a full passthrough use case out there, but in general, yeah,
I agree.  I think even kernel hardening use cases where the "hypervisor" is just
a lowvisor would utilize MSR bitmaps to prevent modifying the de-privileged
kernel from modifying select MSRs.

> Also since all sane L1 hypervisors do use a msr bitmap means that
> dedicated code path that doesn't use it is not well tested.
> 
> On VMX if I am not mistaken, this is not an issue because either all
> MSRS are intercepted or a bitmap is used.

Yep, if the MSR bitmaps aren't used then all MSR accesses are intercepted.
  

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e435e4fbadda..984e89d7a734 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -139,6 +139,13 @@  static const struct svm_direct_access_msrs {
 	{ .index = X2APIC_MSR(APIC_TMICT),		.always = false },
 	{ .index = X2APIC_MSR(APIC_TMCCT),		.always = false },
 	{ .index = X2APIC_MSR(APIC_TDCR),		.always = false },
+	{ .index = MSR_IA32_U_CET,                      .always = false },
+	{ .index = MSR_IA32_S_CET,                      .always = false },
+	{ .index = MSR_IA32_INT_SSP_TAB,                .always = false },
+	{ .index = MSR_IA32_PL0_SSP,                    .always = false },
+	{ .index = MSR_IA32_PL1_SSP,                    .always = false },
+	{ .index = MSR_IA32_PL2_SSP,                    .always = false },
+	{ .index = MSR_IA32_PL3_SSP,                    .always = false },
 	{ .index = MSR_INVALID,				.always = false },
 };
 
@@ -1225,6 +1232,25 @@  static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
 	}
+
+	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+		bool shstk_enabled = guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
+
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_U_CET,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_S_CET,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_INT_SSP_TAB,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PL0_SSP,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PL1_SSP,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PL2_SSP,
+				     shstk_enabled, shstk_enabled);
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PL3_SSP,
+				     shstk_enabled, shstk_enabled);
+	}
 }
 
 static void init_vmcb(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f41253958357..bdc39003b955 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@ 
 #define	IOPM_SIZE PAGE_SIZE * 3
 #define	MSRPM_SIZE PAGE_SIZE * 2
 
-#define MAX_DIRECT_ACCESS_MSRS	46
+#define MAX_DIRECT_ACCESS_MSRS	53
 #define MSRPM_OFFSETS	32
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;