[v4,2/6] x86/tdx: Support vmalloc() for tdx_enc_status_changed()

Message ID 20230408204759.14902-3-decui@microsoft.com
State New
Headers
Series Support TDX guests on Hyper-V |

Commit Message

Dexuan Cui April 8, 2023, 8:47 p.m. UTC
  When a TDX guest runs on Hyper-V, the hv_netvsc driver's netvsc_init_buf()
allocates buffers using vzalloc(), and needs to share the buffers with the
host OS by calling set_memory_decrypted(), which is not working for
vmalloc() yet. Add the support by handling the pages one by one.

Co-developed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 arch/x86/coco/tdx/tdx.c | 76 ++++++++++++++++++++++++++++-------------
 1 file changed, 52 insertions(+), 24 deletions(-)

Changes in v2:
  Changed tdx_enc_status_changed() in place.

Hi, Dave, I checked the huge vmalloc mapping code, but still don't know
how to get the underlying huge page info (if huge page is in use) and
try to use PG_LEVEL_2M/1G in try_accept_page() for vmalloc: I checked
is_vm_area_hugepages() and  __vfree() -> __vunmap(), and I think the
underlying page allocation info is internal to the mm code, and there
is no mm API to for me get the info in tdx_enc_status_changed().

Changes in v3:
  No change since v2.

Changes in v4:
  Added Kirill's Co-developed-by since Kirill helped to improve the
    code by adding tdx_enc_status_changed_phys().

  Thanks Kirill for the clarification on load_unaligned_zeropad()!

  The vzalloc() usage in drivers/net/hyperv/netvsc.c: netvsc_init_buf()
    remains the same. It may not worth it to "allocate a vmalloc region,
    allocate pages manually", because we have to consider the worst case
    where the system is sufferiing from severe memory fragmentation and
    we can only allocate multiple single pages. We may not want to
    complicate the code in netvsc_init_buf(). We'll support NIC SR-IOV
    for TDX VMs on Hyper-V, so the netvsc send/recv buffers won't be
    used when the VF NIC is up.
  

Comments

Michael Kelley (LINUX) April 11, 2023, 4:28 p.m. UTC | #1
From: Dexuan Cui <decui@microsoft.com> Sent: Saturday, April 8, 2023 1:48 PM
> 
> When a TDX guest runs on Hyper-V, the hv_netvsc driver's netvsc_init_buf()
> allocates buffers using vzalloc(), and needs to share the buffers with the
> host OS by calling set_memory_decrypted(), which is not working for
> vmalloc() yet. Add the support by handling the pages one by one.
> 
> Co-developed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  arch/x86/coco/tdx/tdx.c | 76 ++++++++++++++++++++++++++++-------------
>  1 file changed, 52 insertions(+), 24 deletions(-)
> 
> Changes in v2:
>   Changed tdx_enc_status_changed() in place.
> 
> Hi, Dave, I checked the huge vmalloc mapping code, but still don't know
> how to get the underlying huge page info (if huge page is in use) and
> try to use PG_LEVEL_2M/1G in try_accept_page() for vmalloc: I checked
> is_vm_area_hugepages() and  __vfree() -> __vunmap(), and I think the
> underlying page allocation info is internal to the mm code, and there
> is no mm API to for me get the info in tdx_enc_status_changed().
> 
> Changes in v3:
>   No change since v2.
> 
> Changes in v4:
>   Added Kirill's Co-developed-by since Kirill helped to improve the
>     code by adding tdx_enc_status_changed_phys().
> 
>   Thanks Kirill for the clarification on load_unaligned_zeropad()!
> 
>   The vzalloc() usage in drivers/net/hyperv/netvsc.c: netvsc_init_buf()
>     remains the same. It may not worth it to "allocate a vmalloc region,
>     allocate pages manually", because we have to consider the worst case
>     where the system is sufferiing from severe memory fragmentation and
>     we can only allocate multiple single pages. We may not want to
>     complicate the code in netvsc_init_buf(). We'll support NIC SR-IOV
>     for TDX VMs on Hyper-V, so the netvsc send/recv buffers won't be
>     used when the VF NIC is up.
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 5574c91541a2d..731be50b3d093 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -7,6 +7,7 @@
>  #include <linux/cpufeature.h>
>  #include <linux/export.h>
>  #include <linux/io.h>
> +#include <linux/mm.h>
>  #include <asm/coco.h>
>  #include <asm/tdx.h>
>  #include <asm/vmx.h>
> @@ -789,6 +790,34 @@ static bool try_accept_one(phys_addr_t *start, unsigned long
> len,
>  	return true;
>  }
> 
> +static bool try_accept_page(phys_addr_t start, phys_addr_t end)
> +{
> +	/*
> +	 * For shared->private conversion, accept the page using
> +	 * TDX_ACCEPT_PAGE TDX module call.
> +	 */
> +	while (start < end) {
> +		unsigned long len = end - start;
> +
> +		/*
> +		 * Try larger accepts first. It gives chance to VMM to keep
> +		 * 1G/2M SEPT entries where possible and speeds up process by
> +		 * cutting number of hypercalls (if successful).
> +		 */
> +
> +		if (try_accept_one(&start, len, PG_LEVEL_1G))
> +			continue;
> +
> +		if (try_accept_one(&start, len, PG_LEVEL_2M))
> +			continue;
> +
> +		if (!try_accept_one(&start, len, PG_LEVEL_4K))
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
>  /*
>   * Notify the VMM about page mapping conversion. More info about ABI
>   * can be found in TDX Guest-Host-Communication Interface (GHCI),
> @@ -838,6 +867,19 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t
> end, bool enc)
>  	return !ret;
>  }
> 
> +static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end,
> +					bool enc)
> +{
> +	if (!tdx_map_gpa(start, end, enc))
> +		return false;
> +
> +	/* private->shared conversion requires only MapGPA call */
> +	if (!enc)
> +		return true;
> +
> +	return try_accept_page(start, end);
> +}
> +
>  /*
>   * Inform the VMM of the guest's intent for this physical page: shared with
>   * the VMM or private to the guest. The VMM is expected to change its mapping
> @@ -845,37 +887,23 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t
> end, bool enc)
>   */
>  static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
>  {
> -	phys_addr_t start = __pa(vaddr);
> -	phys_addr_t end   = __pa(vaddr + numpages * PAGE_SIZE);
> +	unsigned long start = vaddr;
> +	unsigned long end = start + numpages * PAGE_SIZE;
> 
> -	if (!tdx_map_gpa(start, end, enc))
> +	if (offset_in_page(start) != 0)
>  		return false;
> 
> -	/* private->shared conversion  requires only MapGPA call */
> -	if (!enc)
> -		return true;
> +	if (!is_vmalloc_addr((void *)start))
> +		return tdx_enc_status_changed_phys(__pa(start), __pa(end), enc);
> 
> -	/*
> -	 * For shared->private conversion, accept the page using
> -	 * TDX_ACCEPT_PAGE TDX module call.
> -	 */
>  	while (start < end) {
> -		unsigned long len = end - start;
> +		phys_addr_t start_pa = slow_virt_to_phys((void *)start);

I've usually seen "page_to_phys(vmalloc_to_page(start))" for getting
the physical address corresponding to a vmalloc() address.  For example,
see virt_to_hvpfn(), kvm_kaddr_to_phys(), nx842_get_pa(),
rproc_va_to_pa(), etc.

Does anyone know if there is a reason to use one vs. the other?
slow_virt_to_phys() is x86-only, but that's not a problem here.

Otherwise,
Reviewed-by: Michael Kelley <mikelley@microsoft.com>

> +		phys_addr_t end_pa = start_pa + PAGE_SIZE;
> 
> -		/*
> -		 * Try larger accepts first. It gives chance to VMM to keep
> -		 * 1G/2M SEPT entries where possible and speeds up process by
> -		 * cutting number of hypercalls (if successful).
> -		 */
> -
> -		if (try_accept_one(&start, len, PG_LEVEL_1G))
> -			continue;
> -
> -		if (try_accept_one(&start, len, PG_LEVEL_2M))
> -			continue;
> -
> -		if (!try_accept_one(&start, len, PG_LEVEL_4K))
> +		if (!tdx_enc_status_changed_phys(start_pa, end_pa, enc))
>  			return false;
> +
> +		start += PAGE_SIZE;
>  	}
> 
>  	return true;
> --
> 2.25.1
  
Kirill A. Shutemov April 12, 2023, 3:19 p.m. UTC | #2
On Tue, Apr 11, 2023 at 04:28:20PM +0000, Michael Kelley (LINUX) wrote:
> From: Dexuan Cui <decui@microsoft.com> Sent: Saturday, April 8, 2023 1:48 PM
> > 
> > When a TDX guest runs on Hyper-V, the hv_netvsc driver's netvsc_init_buf()
> > allocates buffers using vzalloc(), and needs to share the buffers with the
> > host OS by calling set_memory_decrypted(), which is not working for
> > vmalloc() yet. Add the support by handling the pages one by one.
> > 
> > Co-developed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Co-developed-by requires[1] Signed-off-by. You can use mine, if you want.

[1] Documentation/process/submitting-patches.rst
  
Kirill A. Shutemov April 12, 2023, 3:20 p.m. UTC | #3
On Tue, Apr 11, 2023 at 04:28:20PM +0000, Michael Kelley (LINUX) wrote:
> Does anyone know if there is a reason to use one vs. the other?
> slow_virt_to_phys() is x86-only, but that's not a problem here.

slow_virt_to_phys() is more generic as it works for non-vmalloc addresses,
but generally they are equivalent for the use.
  
Dexuan Cui April 21, 2023, 3:14 a.m. UTC | #4
> From: kirill.shutemov@linux.intel.com <kirill.shutemov@linux.intel.com>
> Sent: Wednesday, April 12, 2023 8:20 AM
> ...
> > > Co-developed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Co-developed-by requires[1] Signed-off-by. You can use mine, if you want.

Thanks, Kirill. I'll add your Signed-off-by in v5.
  

Patch

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 5574c91541a2d..731be50b3d093 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@ 
 #include <linux/cpufeature.h>
 #include <linux/export.h>
 #include <linux/io.h>
+#include <linux/mm.h>
 #include <asm/coco.h>
 #include <asm/tdx.h>
 #include <asm/vmx.h>
@@ -789,6 +790,34 @@  static bool try_accept_one(phys_addr_t *start, unsigned long len,
 	return true;
 }
 
+static bool try_accept_page(phys_addr_t start, phys_addr_t end)
+{
+	/*
+	 * For shared->private conversion, accept the page using
+	 * TDX_ACCEPT_PAGE TDX module call.
+	 */
+	while (start < end) {
+		unsigned long len = end - start;
+
+		/*
+		 * Try larger accepts first. It gives chance to VMM to keep
+		 * 1G/2M SEPT entries where possible and speeds up process by
+		 * cutting number of hypercalls (if successful).
+		 */
+
+		if (try_accept_one(&start, len, PG_LEVEL_1G))
+			continue;
+
+		if (try_accept_one(&start, len, PG_LEVEL_2M))
+			continue;
+
+		if (!try_accept_one(&start, len, PG_LEVEL_4K))
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * Notify the VMM about page mapping conversion. More info about ABI
  * can be found in TDX Guest-Host-Communication Interface (GHCI),
@@ -838,6 +867,19 @@  static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
 	return !ret;
 }
 
+static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end,
+					bool enc)
+{
+	if (!tdx_map_gpa(start, end, enc))
+		return false;
+
+	/* private->shared conversion requires only MapGPA call */
+	if (!enc)
+		return true;
+
+	return try_accept_page(start, end);
+}
+
 /*
  * Inform the VMM of the guest's intent for this physical page: shared with
  * the VMM or private to the guest. The VMM is expected to change its mapping
@@ -845,37 +887,23 @@  static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
  */
 static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
 {
-	phys_addr_t start = __pa(vaddr);
-	phys_addr_t end   = __pa(vaddr + numpages * PAGE_SIZE);
+	unsigned long start = vaddr;
+	unsigned long end = start + numpages * PAGE_SIZE;
 
-	if (!tdx_map_gpa(start, end, enc))
+	if (offset_in_page(start) != 0)
 		return false;
 
-	/* private->shared conversion  requires only MapGPA call */
-	if (!enc)
-		return true;
+	if (!is_vmalloc_addr((void *)start))
+		return tdx_enc_status_changed_phys(__pa(start), __pa(end), enc);
 
-	/*
-	 * For shared->private conversion, accept the page using
-	 * TDX_ACCEPT_PAGE TDX module call.
-	 */
 	while (start < end) {
-		unsigned long len = end - start;
+		phys_addr_t start_pa = slow_virt_to_phys((void *)start);
+		phys_addr_t end_pa = start_pa + PAGE_SIZE;
 
-		/*
-		 * Try larger accepts first. It gives chance to VMM to keep
-		 * 1G/2M SEPT entries where possible and speeds up process by
-		 * cutting number of hypercalls (if successful).
-		 */
-
-		if (try_accept_one(&start, len, PG_LEVEL_1G))
-			continue;
-
-		if (try_accept_one(&start, len, PG_LEVEL_2M))
-			continue;
-
-		if (!try_accept_one(&start, len, PG_LEVEL_4K))
+		if (!tdx_enc_status_changed_phys(start_pa, end_pa, enc))
 			return false;
+
+		start += PAGE_SIZE;
 	}
 
 	return true;