[v6,01/21] x86/tdx: Use enum to define page level of TDX supported page sizes

Message ID 8a5b40d43f8b993a48b99d6647b16a82b433627c.1666824663.git.kai.huang@intel.com
State New
Headers
Series TDX host kernel support |

Commit Message

Kai Huang Oct. 26, 2022, 11:16 p.m. UTC
  TDX supports 4K, 2M and 1G page sizes.  When TDX guest accepts one page
via try_accept_one(), it passes the page size level to the TDX module.
Currently try_accept_one() uses hard-coded magic number for that.

Introduce a new enum type to represent the page level of TDX supported
page sizes to replace the hard-coded values.  Both initializing the TDX
module and KVM TDX support will need to use that too.

Also, currently try_accept_one() uses an open-coded switch statement to
get the TDX page level from the kernel page level.  As KVM will also
need to do the same thing, introduce a common helper to convert the
kernel page level to the TDX page level.

Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/coco/tdx/tdx.c    | 20 ++++----------------
 arch/x86/include/asm/tdx.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 16 deletions(-)
  

Comments

Xiaoyao Li Oct. 27, 2022, 7:08 a.m. UTC | #1
On 10/27/2022 7:16 AM, Kai Huang wrote:
> TDX supports 4K, 2M and 1G page sizes.  When TDX guest accepts one page
> via try_accept_one(), it passes the page size level to the TDX module.
> Currently try_accept_one() uses hard-coded magic number for that.
> 
> Introduce a new enum type to represent the page level of TDX supported
> page sizes to replace the hard-coded values.  Both initializing the TDX
> module and KVM TDX support will need to use that too.
> 
> Also, currently try_accept_one() uses an open-coded switch statement to
> get the TDX page level from the kernel page level.  As KVM will also
> need to do the same thing, introduce a common helper to convert the
> kernel page level to the TDX page level.
> 
> Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Kai Huang <kai.huang@intel.com>
> ---
>   arch/x86/coco/tdx/tdx.c    | 20 ++++----------------
>   arch/x86/include/asm/tdx.h | 33 +++++++++++++++++++++++++++++++++
>   2 files changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 928dcf7a20d9..c5ff9647213d 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -655,7 +655,6 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
>   {
>   	unsigned long accept_size = page_level_size(pg_level);
>   	u64 tdcall_rcx;
> -	u8 page_size;
>   
>   	if (!IS_ALIGNED(*start, accept_size))
>   		return false;
> @@ -663,27 +662,16 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
>   	if (len < accept_size)
>   		return false;
>   
> +	/* TDX only supports 4K/2M/1G page sizes */

yes, a page can be mapped as 1G size to TD via secure/shared EPT. But 
for this particular TDX_ACCEPT_PAGE case, it only supports 4K and 2M 
currently, which is defined in TDX module spec.

This also implies one thing can be improved in current kernel that 
trying accepting a page from 1G in tdx_enc_status_changed() can be 
optimized to from 2M. It can be changed to start from 1G when TDX 
supports accepting 1G page directly.

> +	if (pg_level < PG_LEVEL_4K || pg_level > PG_LEVEL_1G)
> +		return false;
>   	/*
>   	 * Pass the page physical address to the TDX module to accept the
>   	 * pending, private page.
>   	 *
>   	 * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.

Maybe the “page size” can be adjusted to “TDX page level” accordingly.

>   	 */
> -	switch (pg_level) {
> -	case PG_LEVEL_4K:
> -		page_size = 0;
> -		break;
> -	case PG_LEVEL_2M:
> -		page_size = 1;
> -		break;
> -	case PG_LEVEL_1G:
> -		page_size = 2;
> -		break;
> -	default:
> -		return false;
> -	}
> -
> -	tdcall_rcx = *start | page_size;
> +	tdcall_rcx = *start | to_tdx_pg_level(pg_level);
>   	if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
>   		return false;
>   
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 020c81a7c729..1c166fb9c22f 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -20,6 +20,39 @@
>   
>   #ifndef __ASSEMBLY__
>   
> +#include <asm/pgtable_types.h>
> +
> +/*
> + * The page levels of TDX supported page sizes (4K/2M/1G).
> + *
> + * Those values are part of the TDX module ABI.  Do not change them.
> + */
> +enum tdx_pg_level {
> +	TDX_PG_LEVEL_4K,
> +	TDX_PG_LEVEL_2M,
> +	TDX_PG_LEVEL_1G,
> +	TDX_PG_LEVEL_NUM
> +};
> +
> +/*
> + * Get the TDX page level based on the kernel page level.  The caller
> + * to make sure only pass 4K/2M/1G kernel page level.
> + */
> +static inline enum tdx_pg_level to_tdx_pg_level(enum pg_level pglvl)
> +{
> +	switch (pglvl) {
> +	case PG_LEVEL_4K:
> +		return TDX_PG_LEVEL_4K;
> +	case PG_LEVEL_2M:
> +		return TDX_PG_LEVEL_2M;
> +	case PG_LEVEL_1G:
> +		return TDX_PG_LEVEL_1G;
> +	default:
> +		WARN_ON_ONCE(1);
> +	}
> +	return TDX_PG_LEVEL_NUM;
> +}
> +
>   /*
>    * Used to gather the output registers values of the TDCALL and SEAMCALL
>    * instructions when requesting services from the TDX module.
  
Kai Huang Oct. 27, 2022, 8:42 a.m. UTC | #2
On Thu, 2022-10-27 at 15:08 +0800, Li, Xiaoyao wrote:
> > @@ -663,27 +662,16 @@ static bool try_accept_one(phys_addr_t *start,
> > unsigned long len,
> >    	if (len < accept_size)
> >    		return false;
> >    
> > +	/* TDX only supports 4K/2M/1G page sizes */
> 
> yes, a page can be mapped as 1G size to TD via secure/shared EPT. But 
> for this particular TDX_ACCEPT_PAGE case, it only supports 4K and 2M 
> currently, which is defined in TDX module spec.

I checked the TDX module public spec, and it appears you are right.  But I am
not sure whether it will be changed in the future?

Anyway this patch doesn't intend to bring any functional change (I should have
stated this in the changelog), so I think fixing to this, if ever needed, should
be another patch.

Hi Isaku,

You suggested to introduce a helper, but this reminds me how KVM is going to use
this helper? KVM secure EPT can accept more levels than try_accept_one().

Perhaps I can just get rid of this helper? TDX host series only needs some
definitions to represent 4K/2M/1G page to get rid of using magic numbers.

> 
> This also implies one thing can be improved in current kernel that 
> trying accepting a page from 1G in tdx_enc_status_changed() can be 
> optimized to from 2M. It can be changed to start from 1G when TDX 
> supports accepting 1G page directly.

Ditto.

> 
> > +	if (pg_level < PG_LEVEL_4K || pg_level > PG_LEVEL_1G)
> > +		return false;
> >    	/*
> >    	 * Pass the page physical address to the TDX module to accept the
> >    	 * pending, private page.
> >    	 *
> >    	 * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
> 
> Maybe the “page size” can be adjusted to “TDX page level” accordingly.

Perhaps, but I think "page size" is also not that wrong, so I am reluctant to
change the existing comment.  And let's see Isaku's response to my above
question first.
  
Kirill A. Shutemov Oct. 27, 2022, 1:51 p.m. UTC | #3
On Thu, Oct 27, 2022 at 08:42:16AM +0000, Huang, Kai wrote:
> On Thu, 2022-10-27 at 15:08 +0800, Li, Xiaoyao wrote:
> > > @@ -663,27 +662,16 @@ static bool try_accept_one(phys_addr_t *start,
> > > unsigned long len,
> > >    	if (len < accept_size)
> > >    		return false;
> > >    
> > > +	/* TDX only supports 4K/2M/1G page sizes */
> > 
> > yes, a page can be mapped as 1G size to TD via secure/shared EPT. But 
> > for this particular TDX_ACCEPT_PAGE case, it only supports 4K and 2M 
> > currently, which is defined in TDX module spec.
> 
> I checked the TDX module public spec, and it appears you are right.  But I am
> not sure whether it will be changed in the future?

The spec says:

	Level of the Secure EPT entry that maps the private page to be
	accepted: either 0 (4KB) or 1 (2MB) – see 20.5.1

Yes, it is about 4k and 2M, but it also refers 20.5.1, which lists sizes
up to 16PB.

Ultimately, it does not matter: if TDX module doesn't support the size or
cannot accept the memory for other reason guest kernel will fallback to
smaller size.
  
Dave Hansen Oct. 27, 2022, 3:27 p.m. UTC | #4
On 10/26/22 16:16, Kai Huang wrote:
> +/*
> + * Get the TDX page level based on the kernel page level.  The caller
> + * to make sure only pass 4K/2M/1G kernel page level.
> + */
> +static inline enum tdx_pg_level to_tdx_pg_level(enum pg_level pglvl)
> +{
> +	switch (pglvl) {
> +	case PG_LEVEL_4K:
> +		return TDX_PG_LEVEL_4K;
> +	case PG_LEVEL_2M:
> +		return TDX_PG_LEVEL_2M;
> +	case PG_LEVEL_1G:
> +		return TDX_PG_LEVEL_1G;
> +	default:
> +		WARN_ON_ONCE(1);
> +	}
> +	return TDX_PG_LEVEL_NUM;
> +}

Is TDX_PG_LEVEL_NUM part of the ABI?  Or, is this going to accidentally
pass a whacky value to the SEAM module?

This needs something like this at the call-site:

	page_size = to_tdx_pg_level(pg_level);
	if (page_size >= TDX_PG_LEVEL_NUM)
		return false;
  
Isaku Yamahata Oct. 27, 2022, 10:28 p.m. UTC | #5
On Thu, Oct 27, 2022 at 08:42:16AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Thu, 2022-10-27 at 15:08 +0800, Li, Xiaoyao wrote:
> > > @@ -663,27 +662,16 @@ static bool try_accept_one(phys_addr_t *start,
> > > unsigned long len,
> > >    	if (len < accept_size)
> > >    		return false;
> > >    
> > > +	/* TDX only supports 4K/2M/1G page sizes */
> > 
> > yes, a page can be mapped as 1G size to TD via secure/shared EPT. But 
> > for this particular TDX_ACCEPT_PAGE case, it only supports 4K and 2M 
> > currently, which is defined in TDX module spec.
> 
> I checked the TDX module public spec, and it appears you are right.  But I am
> not sure whether it will be changed in the future?
> 
> Anyway this patch doesn't intend to bring any functional change (I should have
> stated this in the changelog), so I think fixing to this, if ever needed, should
> be another patch.
> 
> Hi Isaku,
> 
> You suggested to introduce a helper, but this reminds me how KVM is going to use
> this helper? KVM secure EPT can accept more levels than try_accept_one().
> 
> Perhaps I can just get rid of this helper? TDX host series only needs some
> definitions to represent 4K/2M/1G page to get rid of using magic numbers.

Ok, let remove the helper function. The usage seems different from KVM case.
In KVM side, it can introduce its own helper.
  
Kai Huang Oct. 28, 2022, 12:10 a.m. UTC | #6
On Thu, 2022-10-27 at 08:27 -0700, Dave Hansen wrote:
> On 10/26/22 16:16, Kai Huang wrote:
> > +/*
> > + * Get the TDX page level based on the kernel page level.  The caller
> > + * to make sure only pass 4K/2M/1G kernel page level.
> > + */
> > +static inline enum tdx_pg_level to_tdx_pg_level(enum pg_level pglvl)
> > +{
> > +	switch (pglvl) {
> > +	case PG_LEVEL_4K:
> > +		return TDX_PG_LEVEL_4K;
> > +	case PG_LEVEL_2M:
> > +		return TDX_PG_LEVEL_2M;
> > +	case PG_LEVEL_1G:
> > +		return TDX_PG_LEVEL_1G;
> > +	default:
> > +		WARN_ON_ONCE(1);
> > +	}
> > +	return TDX_PG_LEVEL_NUM;
> > +}
> 
> Is TDX_PG_LEVEL_NUM part of the ABI?  Or, is this going to accidentally
> pass a whacky value to the SEAM module?

The intention is TDX_PG_LEVEL_NUM is not part of ABI, but looks I was wrong. 
KVM secure EPT can accept larger page level of 1G as page table.

> This needs something like this at the call-site:
> 
> 	page_size = to_tdx_pg_level(pg_level);
> 	if (page_size >= TDX_PG_LEVEL_NUM)
> 		return false;

Yes.  Thanks for the time to review.  It's bad, and should go away.

This reminds me I have mixed two tings together: 1) leaf page sizes (4K/2M/1G);
2) page table levels, which can have larger level than 1G.

In fact, the TDX module spec has a separate definition for the leaf page sizes:

	Table 20.10: Page Size Definition

	PS_1G	1G	2
	PS_2M	2M	1
	PS_4K	4K	0

While TDX guest and TDX host code only needs leaf page sizes, KVM needs all the
page table levels, so it's not necessarily to provide a common helper to get TDX
page level from kernel page level.

As Isaku also replied, I'll remove the helper.

Hi Kirill,

You expressed perhaps we can use macro definitions instead of the enum type. 
Does below look good to you?

--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -671,13 +671,13 @@ static bool try_accept_one(phys_addr_t *start, unsigned
long len,
         */
        switch (pg_level) {
        case PG_LEVEL_4K:
-               page_size = 0;
+               page_size = TDX_PS_4K;
                break;
        case PG_LEVEL_2M:
-               page_size = 1;
+               page_size = TDX_PS_2M;
                break;
        case PG_LEVEL_1G:
-               page_size = 2;
+               page_size = TDX_PS_1G;
                break;
        default:
                return false;
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 020c81a7c729..74845d014d1c 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -20,6 +20,18 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * TDX supported page sizes (4K/2M/1G).
+ *
+ * Please refer to the TDX module 1.0 spec 20.4.1 Physical Page Size.
+ *
+ * Those values are part of the TDX module ABI (except TDX_PS_NUM).
+ */
+#define TDX_PS_4K      0
+#define TDX_PS_2M      1
+#define TDX_PS_1G      2
+#define TDX_PS_NUM     3
+

Btw, TDX host patch will use them in below way (please refer to patch 14:
x86/virt/tdx: Allocate and set up PAMTs for TDMRs):

	unsigned long pamt_size[TDX_PS_NUM];

	/*
	 * Calculate the PAMT size for each TDX supported page size
	 * and the total PAMT size.  TDX_PS_* are contiguous from 0 to 3.
	 */
	for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NUM; pgsz++) {
		pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz);
		...
	}
  
Kai Huang Oct. 28, 2022, 12:47 a.m. UTC | #7
On Thu, 2022-10-27 at 16:51 +0300, Kirill A. Shutemov wrote:
> On Thu, Oct 27, 2022 at 08:42:16AM +0000, Huang, Kai wrote:
> > On Thu, 2022-10-27 at 15:08 +0800, Li, Xiaoyao wrote:
> > > > @@ -663,27 +662,16 @@ static bool try_accept_one(phys_addr_t *start,
> > > > unsigned long len,
> > > >    	if (len < accept_size)
> > > >    		return false;
> > > >    
> > > > +	/* TDX only supports 4K/2M/1G page sizes */
> > > 
> > > yes, a page can be mapped as 1G size to TD via secure/shared EPT. But 
> > > for this particular TDX_ACCEPT_PAGE case, it only supports 4K and 2M 
> > > currently, which is defined in TDX module spec.
> > 
> > I checked the TDX module public spec, and it appears you are right.  But I am
> > not sure whether it will be changed in the future?
> 
> The spec says:
> 
> 	Level of the Secure EPT entry that maps the private page to be
> 	accepted: either 0 (4KB) or 1 (2MB) – see 20.5.1
> 
> Yes, it is about 4k and 2M, but it also refers 20.5.1, which lists sizes
> up to 16PB.

Also, I think we are mixing two things: 1) leaf page sizes  (4K/2M/1G);  2) page
table levels.  The latter has more levels than the former.  For try_accept_one()
(and TDX host code), we actually care only about the former.

KVM needs the latter (or both), so it's better for KVM to handle on its own.

> 
> Ultimately, it does not matter: if TDX module doesn't support the size or
> cannot accept the memory for other reason guest kernel will fallback to
> smaller size.
> 

Yes.
  

Patch

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 928dcf7a20d9..c5ff9647213d 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -655,7 +655,6 @@  static bool try_accept_one(phys_addr_t *start, unsigned long len,
 {
 	unsigned long accept_size = page_level_size(pg_level);
 	u64 tdcall_rcx;
-	u8 page_size;
 
 	if (!IS_ALIGNED(*start, accept_size))
 		return false;
@@ -663,27 +662,16 @@  static bool try_accept_one(phys_addr_t *start, unsigned long len,
 	if (len < accept_size)
 		return false;
 
+	/* TDX only supports 4K/2M/1G page sizes */
+	if (pg_level < PG_LEVEL_4K || pg_level > PG_LEVEL_1G)
+		return false;
 	/*
 	 * Pass the page physical address to the TDX module to accept the
 	 * pending, private page.
 	 *
 	 * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
 	 */
-	switch (pg_level) {
-	case PG_LEVEL_4K:
-		page_size = 0;
-		break;
-	case PG_LEVEL_2M:
-		page_size = 1;
-		break;
-	case PG_LEVEL_1G:
-		page_size = 2;
-		break;
-	default:
-		return false;
-	}
-
-	tdcall_rcx = *start | page_size;
+	tdcall_rcx = *start | to_tdx_pg_level(pg_level);
 	if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
 		return false;
 
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 020c81a7c729..1c166fb9c22f 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -20,6 +20,39 @@ 
 
 #ifndef __ASSEMBLY__
 
+#include <asm/pgtable_types.h>
+
+/*
+ * The page levels of TDX supported page sizes (4K/2M/1G).
+ *
+ * Those values are part of the TDX module ABI.  Do not change them.
+ */
+enum tdx_pg_level {
+	TDX_PG_LEVEL_4K,
+	TDX_PG_LEVEL_2M,
+	TDX_PG_LEVEL_1G,
+	TDX_PG_LEVEL_NUM
+};
+
+/*
+ * Get the TDX page level based on the kernel page level.  The caller
+ * to make sure only pass 4K/2M/1G kernel page level.
+ */
+static inline enum tdx_pg_level to_tdx_pg_level(enum pg_level pglvl)
+{
+	switch (pglvl) {
+	case PG_LEVEL_4K:
+		return TDX_PG_LEVEL_4K;
+	case PG_LEVEL_2M:
+		return TDX_PG_LEVEL_2M;
+	case PG_LEVEL_1G:
+		return TDX_PG_LEVEL_1G;
+	default:
+		WARN_ON_ONCE(1);
+	}
+	return TDX_PG_LEVEL_NUM;
+}
+
 /*
  * Used to gather the output registers values of the TDCALL and SEAMCALL
  * instructions when requesting services from the TDX module.