[v2,5/6] x86/hyperv: Support hypercalls for TDX guests

Message ID 20221207003325.21503-6-decui@microsoft.com
State New
Headers
Series Support TDX guests on Hyper-V |

Commit Message

Dexuan Cui Dec. 7, 2022, 12:33 a.m. UTC
  A TDX guest uses the GHCI call rather than hv_hypercall_pg.

In hv_do_hypercall(), Hyper-V requires that the input/output addresses
must have the cc_mask.

Signed-off-by: Dexuan Cui <decui@microsoft.com>

---

Changes in v2:
  Implemented hv_tdx_hypercall() in C rather than in assembly code.
  Renamed the parameter names of hv_tdx_hypercall().
  Used cc_mkdec() directly in hv_do_hypercall().

 arch/x86/hyperv/hv_init.c       |  8 ++++++++
 arch/x86/hyperv/ivm.c           | 14 ++++++++++++++
 arch/x86/include/asm/mshyperv.h | 17 +++++++++++++++++
 3 files changed, 39 insertions(+)
  

Comments

Dexuan Cui Dec. 12, 2022, 7:10 p.m. UTC | #1
> From: Michael Kelley (LINUX) <mikelley@microsoft.com>
> Sent: Monday, December 12, 2022 8:39 AM
> > [...]
> > A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> >
> > In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> > must have the cc_mask.
> 
> Is it a requirement that the input/output addresses refer to guest memory
> pages that have marked as shared/decrypted?  For example, I don't see

Yes. 

> any code to mark the hyperv_pcpu_input_arg page as shared/decrypted.
> Do the use cases for the hyperv_pcpu_input_arg page just not occur in a
> TDX VM?

I missed this when sending v2, and I realized this when testing DDA.
Will get this fixed in v3.

BTW, I noticed Tianyu posted a similar patch:
[RFC PATCH V2 10/18] drivers: hv: Decrypt percpu hvcall input arg page in sev-snp enlightened guest
  
Zhi Wang Jan. 6, 2023, 11:23 a.m. UTC | #2
On Tue,  6 Dec 2022 16:33:24 -0800
Dexuan Cui <decui@microsoft.com> wrote:

> A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> 
> In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> must have the cc_mask.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> 
> ---
> 
> Changes in v2:
>   Implemented hv_tdx_hypercall() in C rather than in assembly code.
>   Renamed the parameter names of hv_tdx_hypercall().
>   Used cc_mkdec() directly in hv_do_hypercall().
> 
>  arch/x86/hyperv/hv_init.c       |  8 ++++++++
>  arch/x86/hyperv/ivm.c           | 14 ++++++++++++++
>  arch/x86/include/asm/mshyperv.h | 17 +++++++++++++++++
>  3 files changed, 39 insertions(+)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index a823fde1ad7f..c0ba53ad8b8e 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -430,6 +430,10 @@ void __init hyperv_init(void)
>  	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
>  	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>  
> +	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> */
> +	if (hv_isolation_type_tdx())
> +		goto skip_hypercall_pg_init;
> +
>  	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1,
> VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
>  			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> @@ -469,6 +473,7 @@ void __init hyperv_init(void)
>  		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>  	}
>  
> +skip_hypercall_pg_init:
>  	/*
>  	 * hyperv_init() is called before LAPIC is initialized: see
>  	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
> @@ -604,6 +609,9 @@ bool hv_is_hyperv_initialized(void)
>  	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
>  		return false;
>  
> +	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> */
> +	if (hv_isolation_type_tdx())
> +		return true;
>  	/*
>  	 * Verify that earlier initialization succeeded by checking
>  	 * that the hypercall page is setup
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..07e4253b5809 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,20 @@ bool hv_isolation_type_tdx(void)
>  {
>  	return static_branch_unlikely(&isolation_type_tdx);
>  }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	struct tdx_hypercall_args args = { };
> +
> +	args.r10 = control;
> +	args.rdx = param1;
> +	args.r8  = param2;
> +
> +	(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> +	return args.r11;
> +}
> +EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
>  #endif
>  
>  /*
> diff --git a/arch/x86/include/asm/mshyperv.h
> b/arch/x86/include/asm/mshyperv.h index 8a2cafec4675..a4d665472d9e 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -10,6 +10,7 @@
>  #include <asm/nospec-branch.h>
>  #include <asm/paravirt.h>
>  #include <asm/mshyperv.h>
> +#include <asm/coco.h>
>  
>  union hv_ghcb;
>  
> @@ -39,6 +40,12 @@ int hv_call_deposit_pages(int node, u64 partition_id,
> u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32
> acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32
> vp_index, u32 flags); 
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +/*
> + * If the hypercall involves no input or output parameters, the
> hypervisor
> + * ignores the corresponding GPA pointer.
> + */
>  static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output) {
>  	u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +53,10 @@ static inline u64 hv_do_hypercall(u64 control, void
> *input, void *output) u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control,
> +					cc_mkdec(input_address),
> +					cc_mkdec(output_address));
>  	if (!hv_hypercall_pg)
>  		return U64_MAX;
>  

> @@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64
> input1) u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>  
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, 0);
> +
>  	{
>  		__asm__ __volatile__(CALL_NOSPEC
>  				     : "=a" (hv_status),
> ASM_CALL_CONSTRAINT, @@ -114,6 +128,9 @@ static inline u64
> hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) u64 hv_status,
> control = (u64)code | HV_HYPERCALL_FAST_BIT; 
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, input2);
> +
In some paths, for example vmbus_set_event(), choosing the SNP-based or
generic hypercall happens in the caller, while now TDX-based hypercall is
embraced in the generic hypercall path, e.g. hv_do_fast_hypercall8(). Which
style will be chosen in the future? Seems the coding structure needs to be
aligned.

void vmbus_set_event(struct vmbus_channel *channel)
{
        u32 child_relid = channel->offermsg.child_relid;

        if (!channel->is_dedicated_interrupt)
                vmbus_send_interrupt(child_relid);

        ++channel->sig_events;

        if (hv_isolation_type_snp())
                hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
                                NULL, sizeof(channel->sig_event));
        else
                hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT,
        channel->sig_event);
}


>  	{
>  		__asm__ __volatile__("mov %4, %%r8\n"
>  				     CALL_NOSPEC
  
Dexuan Cui Jan. 9, 2023, 7:27 a.m. UTC | #3
> From: Zhi Wang <zhi.wang.linux@gmail.com>
> Sent: Friday, January 6, 2023 3:24 AM
> > @@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64
> > input1) u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> >
> >  #ifdef CONFIG_X86_64
> > +	if (hv_isolation_type_tdx())
> > +		return hv_tdx_hypercall(control, input1, 0);
> > +
> >  	{
> >  		__asm__ __volatile__(CALL_NOSPEC
> >  				     : "=a" (hv_status),
> > ASM_CALL_CONSTRAINT, @@ -114,6 +128,9 @@ static inline u64
> > hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) u64 hv_status,
> > control = (u64)code | HV_HYPERCALL_FAST_BIT;
> >  #ifdef CONFIG_X86_64
> > +	if (hv_isolation_type_tdx())
> > +		return hv_tdx_hypercall(control, input1, input2);
> > +
> In some paths, for example vmbus_set_event(), choosing the SNP-based or

In a SNP guest with pavavisor on Hyper-V, hv_ghcb_hypercall() is called in
only two places: vmbus_set_event() and hv_post_message(), where the
hypercalls, which are done via GHCB, need to be handled by the Hyper-V
hypervisor directly; in other places, the hypercalls, which are done via the
hypercall page, need to be handled by the pavavisor. That's to say, there
are 2 different kinds of hypercalls for a SNP guest with pavavisor on Hyper-V,
and hence we have to use 2 styles.

> generic hypercall happens in the caller, while now TDX-based hypercall is
> embraced in the generic hypercall path, e.g. hv_do_fast_hypercall8(). Which
> style will be chosen in the future? Seems the coding structure needs to be
> aligned.

For a TDX guest without pavavisor on Hyper-V, there is only one style of
hypercalls, so I make the change in hv_do_hypercall() and
hv_do_fast_hypercall*() directly. 

I don't think we can make any clean-up changes right now. When we
support the TDX guest with pavavisor on Hyper-V, we'll figure out if we
can make any improvement.

> void vmbus_set_event(struct vmbus_channel *channel)
> {
>         u32 child_relid = channel->offermsg.child_relid;
> 
>         if (!channel->is_dedicated_interrupt)
>                 vmbus_send_interrupt(child_relid);
> 
>         ++channel->sig_events;
> 
>         if (hv_isolation_type_snp())
>                 hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT,
> &channel->sig_event,
>                                 NULL, sizeof(channel->sig_event));
>         else
>                 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT,
>         channel->sig_event);
> }
> 
> 
> >  	{
> >  		__asm__ __volatile__("mov %4, %%r8\n"
> >  				     CALL_NOSPEC
  

Patch

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a823fde1ad7f..c0ba53ad8b8e 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -430,6 +430,10 @@  void __init hyperv_init(void)
 	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
 	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
+	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+	if (hv_isolation_type_tdx())
+		goto skip_hypercall_pg_init;
+
 	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
 			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
 			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
@@ -469,6 +473,7 @@  void __init hyperv_init(void)
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 	}
 
+skip_hypercall_pg_init:
 	/*
 	 * hyperv_init() is called before LAPIC is initialized: see
 	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -604,6 +609,9 @@  bool hv_is_hyperv_initialized(void)
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return false;
 
+	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+	if (hv_isolation_type_tdx())
+		return true;
 	/*
 	 * Verify that earlier initialization succeeded by checking
 	 * that the hypercall page is setup
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 13ccb52eecd7..07e4253b5809 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -276,6 +276,20 @@  bool hv_isolation_type_tdx(void)
 {
 	return static_branch_unlikely(&isolation_type_tdx);
 }
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+	struct tdx_hypercall_args args = { };
+
+	args.r10 = control;
+	args.rdx = param1;
+	args.r8  = param2;
+
+	(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+	return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
 #endif
 
 /*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 8a2cafec4675..a4d665472d9e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -10,6 +10,7 @@ 
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
 #include <asm/mshyperv.h>
+#include <asm/coco.h>
 
 union hv_ghcb;
 
@@ -39,6 +40,12 @@  int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
 
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
 	u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +53,10 @@  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control,
+					cc_mkdec(input_address),
+					cc_mkdec(output_address));
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 
@@ -83,6 +94,9 @@  static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control, input1, 0);
+
 	{
 		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +128,9 @@  static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control, input1, input2);
+
 	{
 		__asm__ __volatile__("mov %4, %%r8\n"
 				     CALL_NOSPEC