[RFC,v1,2/6] x86/sev: Add support for NestedVirtSnpMsr

Message ID 20230123165128.28185-3-jpiotrowski@linux.microsoft.com
State New
Headers
Series Support nested SNP KVM guests on Hyper-V |

Commit Message

Jeremi Piotrowski Jan. 23, 2023, 4:51 p.m. UTC
  The rmpupdate and psmash instructions, which are used in AMD's SEV-SNP
to update the RMP (Reverse Map) table, can't be trapped. For nested
scenarios, AMD defined MSR versions of these instructions which can be
emulated by the top-level hypervisor. One instance where these MSRs are
used are Hyper-V VMs which expose SNP isolation features to the guest.

The MSRs are defined in "AMD64 Architecture Programmer’s Manual, Volume 2:
System Programming", section 15.36.19.

Signed-off-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/msr-index.h   |  2 +
 arch/x86/kernel/sev.c              | 62 +++++++++++++++++++++++++-----
 3 files changed, 55 insertions(+), 10 deletions(-)
  

Comments

Michael Kelley (LINUX) Jan. 28, 2023, 7:48 p.m. UTC | #1
From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> Sent: Monday, January 23, 2023 8:51 AM
> 
> The rmpupdate and psmash instructions, which are used in AMD's SEV-SNP
> to update the RMP (Reverse Map) table, can't be trapped. For nested
> scenarios, AMD defined MSR versions of these instructions which can be

s/can be/must be/  ??

> emulated by the top-level hypervisor. One instance where these MSRs are

And by "top-level", I think you are referring the hypervisor running at L1, right?
Using the L0/L1/L2 terminology would probably help make the description
more precise.

> used are Hyper-V VMs which expose SNP isolation features to the guest.
> 
> The MSRs are defined in "AMD64 Architecture Programmer’s Manual, Volume 2:
> System Programming", section 15.36.19.
> 
> Signed-off-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
> ---
>  arch/x86/include/asm/cpufeatures.h |  1 +
>  arch/x86/include/asm/msr-index.h   |  2 +
>  arch/x86/kernel/sev.c              | 62 +++++++++++++++++++++++++-----
>  3 files changed, 55 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 480b4eaef310..e6e2e824f67b 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -423,6 +423,7 @@
>  #define X86_FEATURE_SEV_SNP		(19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
>  #define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* "" Virtual TSC_AUX */
>  #define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
> +#define X86_FEATURE_NESTED_VIRT_SNP_MSR	(19*32+29) /* Virtualizable RMPUPDATE and PSMASH MSR available */
> 
>  /*
>   * BUG word(s)
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 35100c630617..d6103e607896 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -567,6 +567,8 @@
>  #define MSR_AMD64_SEV_SNP_ENABLED
> 	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
>  #define MSR_AMD64_RMP_BASE		0xc0010132
>  #define MSR_AMD64_RMP_END		0xc0010133
> +#define MSR_AMD64_VIRT_RMPUPDATE	0xc001f001
> +#define MSR_AMD64_VIRT_PSMASH		0xc001f002
> 
>  #define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
> 
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index 7fa39dc17edd..95404c7e5150 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2566,6 +2566,24 @@ int snp_lookup_rmpentry(u64 pfn, int *level)
>  }
>  EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
> 
> +static bool virt_snp_msr(void)
> +{
> +	return boot_cpu_has(X86_FEATURE_NESTED_VIRT_SNP_MSR);
> +}
> +
> +static u64 virt_psmash(u64 paddr)
> +{
> +	int ret;
> +
> +	asm volatile(
> +		"wrmsr\n\t"
> +		: "=a"(ret)
> +		: "a"(paddr), "c"(MSR_AMD64_VIRT_PSMASH)
> +		: "memory", "cc"
> +	);
> +	return ret;
> +}

From checking the AMD spec, I can see that the above use
of wrmsr is non-conventional.  Could you capture the basics
of the usage paradigm in a comment?  I.e., the expected
inputs and outputs, and the core assumption that the
MSR isn't implemented in hardware, but must trap
to the hypervisor.

> +
>  /*
>   * psmash is used to smash a 2MB aligned page into 4K
>   * pages while preserving the Validated bit in the RMP.
> @@ -2581,11 +2599,15 @@ int psmash(u64 pfn)
>  	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
>  		return -ENXIO;
> 
> -	/* Binutils version 2.36 supports the PSMASH mnemonic. */
> -	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> -		      : "=a"(ret)
> -		      : "a"(paddr)
> -		      : "memory", "cc");
> +	if (virt_snp_msr()) {
> +		ret = virt_psmash(paddr);
> +	} else {
> +		/* Binutils version 2.36 supports the PSMASH mnemonic. */
> +		asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> +			      : "=a"(ret)
> +			      : "a"(paddr)
> +			      : "memory", "cc");
> +	}
> 
>  	return ret;
>  }
> @@ -2601,6 +2623,21 @@ static int invalidate_direct_map(unsigned long pfn, int npages)
>  	return set_memory_np((unsigned long)pfn_to_kaddr(pfn), npages);
>  }
> 
> +static u64 virt_rmpupdate(unsigned long paddr, struct rmp_state *val)
> +{
> +	int ret;
> +	register u64 hi asm("r8") = ((u64 *)val)[1];
> +	register u64 lo asm("rdx") = ((u64 *)val)[0];
> +
> +	asm volatile(
> +		"wrmsr\n\t"
> +		: "=a"(ret)
> +		: "a"(paddr), "c"(MSR_AMD64_VIRT_RMPUPDATE), "r"(lo), "r"(hi)
> +		: "memory", "cc"
> +	);
> +	return ret;
> +}

Same here about a comment capturing the expected inputs
and outputs.

> +
>  static int rmpupdate(u64 pfn, struct rmp_state *val)
>  {
>  	unsigned long paddr = pfn << PAGE_SHIFT;
> @@ -2626,11 +2663,16 @@ static int rmpupdate(u64 pfn, struct rmp_state *val)
>  	}
> 
>  retry:
> -	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> -	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> -		     : "=a"(ret)
> -		     : "a"(paddr), "c"((unsigned long)val)
> -		     : "memory", "cc");
> +
> +	if (virt_snp_msr()) {
> +		ret = virt_rmpupdate(paddr, val);
> +	} else {
> +		/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> +		asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> +			     : "=a"(ret)
> +			     : "a"(paddr), "c"((unsigned long)val)
> +			     : "memory", "cc");
> +	}
> 
>  	if (ret) {
>  		if (!retries) {
> --
> 2.25.1
  
Jeremi Piotrowski Jan. 30, 2023, 3:25 p.m. UTC | #2
On Sat, Jan 28, 2023 at 07:48:27PM +0000, Michael Kelley (LINUX) wrote:
> From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> Sent: Monday, January 23, 2023 8:51 AM
> > 
> > The rmpupdate and psmash instructions, which are used in AMD's SEV-SNP
> > to update the RMP (Reverse Map) table, can't be trapped. For nested
> > scenarios, AMD defined MSR versions of these instructions which can be
> 
> s/can be/must be/  ??
> 

yes indeed

> > emulated by the top-level hypervisor. One instance where these MSRs are
> 
> And by "top-level", I think you are referring the hypervisor running at L1, right?
> Using the L0/L1/L2 terminology would probably help make the description
> more precise.

These instructions are called by the L1 hypervisor and are emulated by the L0
hypervisor which controls the actual rmp table. I'll rephrase the commit
message to make that clearer.

> 
> > used are Hyper-V VMs which expose SNP isolation features to the guest.
> > 
> > The MSRs are defined in "AMD64 Architecture Programmer’s Manual, Volume 2:
> > System Programming", section 15.36.19.
> > 
> > Signed-off-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
> > ---
> >  arch/x86/include/asm/cpufeatures.h |  1 +
> >  arch/x86/include/asm/msr-index.h   |  2 +
> >  arch/x86/kernel/sev.c              | 62 +++++++++++++++++++++++++-----
> >  3 files changed, 55 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> > index 480b4eaef310..e6e2e824f67b 100644
> > --- a/arch/x86/include/asm/cpufeatures.h
> > +++ b/arch/x86/include/asm/cpufeatures.h
> > @@ -423,6 +423,7 @@
> >  #define X86_FEATURE_SEV_SNP		(19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
> >  #define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* "" Virtual TSC_AUX */
> >  #define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
> > +#define X86_FEATURE_NESTED_VIRT_SNP_MSR	(19*32+29) /* Virtualizable RMPUPDATE and PSMASH MSR available */
> > 
> >  /*
> >   * BUG word(s)
> > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> > index 35100c630617..d6103e607896 100644
> > --- a/arch/x86/include/asm/msr-index.h
> > +++ b/arch/x86/include/asm/msr-index.h
> > @@ -567,6 +567,8 @@
> >  #define MSR_AMD64_SEV_SNP_ENABLED
> > 	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
> >  #define MSR_AMD64_RMP_BASE		0xc0010132
> >  #define MSR_AMD64_RMP_END		0xc0010133
> > +#define MSR_AMD64_VIRT_RMPUPDATE	0xc001f001
> > +#define MSR_AMD64_VIRT_PSMASH		0xc001f002
> > 
> >  #define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
> > 
> > diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> > index 7fa39dc17edd..95404c7e5150 100644
> > --- a/arch/x86/kernel/sev.c
> > +++ b/arch/x86/kernel/sev.c
> > @@ -2566,6 +2566,24 @@ int snp_lookup_rmpentry(u64 pfn, int *level)
> >  }
> >  EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
> > 
> > +static bool virt_snp_msr(void)
> > +{
> > +	return boot_cpu_has(X86_FEATURE_NESTED_VIRT_SNP_MSR);
> > +}
> > +
> > +static u64 virt_psmash(u64 paddr)
> > +{
> > +	int ret;
> > +
> > +	asm volatile(
> > +		"wrmsr\n\t"
> > +		: "=a"(ret)
> > +		: "a"(paddr), "c"(MSR_AMD64_VIRT_PSMASH)
> > +		: "memory", "cc"
> > +	);
> > +	return ret;
> > +}
> 
> From checking the AMD spec, I can see that the above use
> of wrmsr is non-conventional.  Could you capture the basics
> of the usage paradigm in a comment?  I.e., the expected
> inputs and outputs, and the core assumption that the
> MSR isn't implemented in hardware, but must trap
> to the hypervisor.

ok, how does this sound:

/*
 * This version of rmpupdate is not implemented in hardware but always
 * traps to L0 hypervisor. It doesn't follow usual wrmsr conventions.
 * Inputs:
 *   rax: 4KB aligned GPA
 *   rdx: bytes 7:0 of new rmp entry
 *   r8:  bytes 15:8 of new rmp entry
 * Outputs:
 *   rax: rmpupdate return code
 */

and similar for psmash.

> 
> > +
> >  /*
> >   * psmash is used to smash a 2MB aligned page into 4K
> >   * pages while preserving the Validated bit in the RMP.
> > @@ -2581,11 +2599,15 @@ int psmash(u64 pfn)
> >  	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> >  		return -ENXIO;
> > 
> > -	/* Binutils version 2.36 supports the PSMASH mnemonic. */
> > -	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> > -		      : "=a"(ret)
> > -		      : "a"(paddr)
> > -		      : "memory", "cc");
> > +	if (virt_snp_msr()) {
> > +		ret = virt_psmash(paddr);
> > +	} else {
> > +		/* Binutils version 2.36 supports the PSMASH mnemonic. */
> > +		asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> > +			      : "=a"(ret)
> > +			      : "a"(paddr)
> > +			      : "memory", "cc");
> > +	}
> > 
> >  	return ret;
> >  }
> > @@ -2601,6 +2623,21 @@ static int invalidate_direct_map(unsigned long pfn, int npages)
> >  	return set_memory_np((unsigned long)pfn_to_kaddr(pfn), npages);
> >  }
> > 
> > +static u64 virt_rmpupdate(unsigned long paddr, struct rmp_state *val)
> > +{
> > +	int ret;
> > +	register u64 hi asm("r8") = ((u64 *)val)[1];
> > +	register u64 lo asm("rdx") = ((u64 *)val)[0];
> > +
> > +	asm volatile(
> > +		"wrmsr\n\t"
> > +		: "=a"(ret)
> > +		: "a"(paddr), "c"(MSR_AMD64_VIRT_RMPUPDATE), "r"(lo), "r"(hi)
> > +		: "memory", "cc"
> > +	);
> > +	return ret;
> > +}
> 
> Same here about a comment capturing the expected inputs
> and outputs.

ok

> 
> > +
> >  static int rmpupdate(u64 pfn, struct rmp_state *val)
> >  {
> >  	unsigned long paddr = pfn << PAGE_SHIFT;
> > @@ -2626,11 +2663,16 @@ static int rmpupdate(u64 pfn, struct rmp_state *val)
> >  	}
> > 
> >  retry:
> > -	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> > -	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> > -		     : "=a"(ret)
> > -		     : "a"(paddr), "c"((unsigned long)val)
> > -		     : "memory", "cc");
> > +
> > +	if (virt_snp_msr()) {
> > +		ret = virt_rmpupdate(paddr, val);
> > +	} else {
> > +		/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> > +		asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> > +			     : "=a"(ret)
> > +			     : "a"(paddr), "c"((unsigned long)val)
> > +			     : "memory", "cc");
> > +	}
> > 
> >  	if (ret) {
> >  		if (!retries) {
> > --
> > 2.25.1
>
  
Michael Kelley (LINUX) Jan. 30, 2023, 3:39 p.m. UTC | #3
From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> Sent: Monday, January 30, 2023 7:26 AM
> 
> On Sat, Jan 28, 2023 at 07:48:27PM +0000, Michael Kelley (LINUX) wrote:
> > From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com> Sent: Monday, January
> 23, 2023 8:51 AM
> > >
> > > The rmpupdate and psmash instructions, which are used in AMD's SEV-SNP
> > > to update the RMP (Reverse Map) table, can't be trapped. For nested
> > > scenarios, AMD defined MSR versions of these instructions which can be
> >
> > s/can be/must be/  ??
> >
> 
> yes indeed
> 
> > > emulated by the top-level hypervisor. One instance where these MSRs are
> >
> > And by "top-level", I think you are referring the hypervisor running at L1, right?
> > Using the L0/L1/L2 terminology would probably help make the description
> > more precise.
> 
> These instructions are called by the L1 hypervisor and are emulated by the L0
> hypervisor which controls the actual rmp table. I'll rephrase the commit
> message to make that clearer.
> 

[snip]

> > > +
> > > +static u64 virt_psmash(u64 paddr)
> > > +{
> > > +	int ret;
> > > +
> > > +	asm volatile(
> > > +		"wrmsr\n\t"
> > > +		: "=a"(ret)
> > > +		: "a"(paddr), "c"(MSR_AMD64_VIRT_PSMASH)
> > > +		: "memory", "cc"
> > > +	);
> > > +	return ret;
> > > +}
> >
> > From checking the AMD spec, I can see that the above use
> > of wrmsr is non-conventional.  Could you capture the basics
> > of the usage paradigm in a comment?  I.e., the expected
> > inputs and outputs, and the core assumption that the
> > MSR isn't implemented in hardware, but must trap
> > to the hypervisor.
> 
> ok, how does this sound:
> 
> /*
>  * This version of rmpupdate is not implemented in hardware but always
>  * traps to L0 hypervisor. It doesn't follow usual wrmsr conventions.
>  * Inputs:
>  *   rax: 4KB aligned GPA
>  *   rdx: bytes 7:0 of new rmp entry
>  *   r8:  bytes 15:8 of new rmp entry
>  * Outputs:
>  *   rax: rmpupdate return code
>  */
> 
> and similar for psmash.
> 

Yes, that works for me.

Michael
  

Patch

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 480b4eaef310..e6e2e824f67b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -423,6 +423,7 @@ 
 #define X86_FEATURE_SEV_SNP		(19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
 #define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_NESTED_VIRT_SNP_MSR	(19*32+29) /* Virtualizable RMPUPDATE and PSMASH MSR available */
 
 /*
  * BUG word(s)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 35100c630617..d6103e607896 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -567,6 +567,8 @@ 
 #define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
 #define MSR_AMD64_RMP_BASE		0xc0010132
 #define MSR_AMD64_RMP_END		0xc0010133
+#define MSR_AMD64_VIRT_RMPUPDATE	0xc001f001
+#define MSR_AMD64_VIRT_PSMASH		0xc001f002
 
 #define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
 
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 7fa39dc17edd..95404c7e5150 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2566,6 +2566,24 @@  int snp_lookup_rmpentry(u64 pfn, int *level)
 }
 EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
 
+static bool virt_snp_msr(void)
+{
+	return boot_cpu_has(X86_FEATURE_NESTED_VIRT_SNP_MSR);
+}
+
+static u64 virt_psmash(u64 paddr)
+{
+	int ret;
+
+	asm volatile(
+		"wrmsr\n\t"
+		: "=a"(ret)
+		: "a"(paddr), "c"(MSR_AMD64_VIRT_PSMASH)
+		: "memory", "cc"
+	);
+	return ret;
+}
+
 /*
  * psmash is used to smash a 2MB aligned page into 4K
  * pages while preserving the Validated bit in the RMP.
@@ -2581,11 +2599,15 @@  int psmash(u64 pfn)
 	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
 		return -ENXIO;
 
-	/* Binutils version 2.36 supports the PSMASH mnemonic. */
-	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
-		      : "=a"(ret)
-		      : "a"(paddr)
-		      : "memory", "cc");
+	if (virt_snp_msr()) {
+		ret = virt_psmash(paddr);
+	} else {
+		/* Binutils version 2.36 supports the PSMASH mnemonic. */
+		asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
+			      : "=a"(ret)
+			      : "a"(paddr)
+			      : "memory", "cc");
+	}
 
 	return ret;
 }
@@ -2601,6 +2623,21 @@  static int invalidate_direct_map(unsigned long pfn, int npages)
 	return set_memory_np((unsigned long)pfn_to_kaddr(pfn), npages);
 }
 
+static u64 virt_rmpupdate(unsigned long paddr, struct rmp_state *val)
+{
+	int ret;
+	register u64 hi asm("r8") = ((u64 *)val)[1];
+	register u64 lo asm("rdx") = ((u64 *)val)[0];
+
+	asm volatile(
+		"wrmsr\n\t"
+		: "=a"(ret)
+		: "a"(paddr), "c"(MSR_AMD64_VIRT_RMPUPDATE), "r"(lo), "r"(hi)
+		: "memory", "cc"
+	);
+	return ret;
+}
+
 static int rmpupdate(u64 pfn, struct rmp_state *val)
 {
 	unsigned long paddr = pfn << PAGE_SHIFT;
@@ -2626,11 +2663,16 @@  static int rmpupdate(u64 pfn, struct rmp_state *val)
 	}
 
 retry:
-	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
-	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
-		     : "=a"(ret)
-		     : "a"(paddr), "c"((unsigned long)val)
-		     : "memory", "cc");
+
+	if (virt_snp_msr()) {
+		ret = virt_rmpupdate(paddr, val);
+	} else {
+		/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
+		asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
+			     : "=a"(ret)
+			     : "a"(paddr), "c"((unsigned long)val)
+			     : "memory", "cc");
+	}
 
 	if (ret) {
 		if (!retries) {