[5/6] x86/hyperv: Support hypercalls for TDX guests

Message ID 20221121195151.21812-6-decui@microsoft.com
State New
Headers
Series Support TDX guests on Hyper-V |

Commit Message

Dexuan Cui Nov. 21, 2022, 7:51 p.m. UTC
  A TDX guest uses the GHCI call rather than hv_hypercall_pg.

In hv_do_hypercall(), Hyper-V requires that the input/output addresses
must have the vTOM bit set. With current Hyper-V, the bit for TDX is
bit 47, which is saved into ms_hyperv.shared_gpa_boundary() in
ms_hyperv_init_platform().

arch/x86/include/asm/mshyperv.h: hv_do_hypercall() needs
"struct ms_hyperv_info", which is defined in
include/asm-generic/mshyperv.h, which can't be included in
arch/x86/include/asm/mshyperv.h because include/asm-generic/mshyperv.h
has vmbus_signal_eom() -> hv_set_register(), which is defined in
arch/x86/include/asm/mshyperv.h.

Break this circular dependency by introducing a new header file
for "struct ms_hyperv_info".

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 MAINTAINERS                          |  1 +
 arch/x86/hyperv/hv_init.c            |  8 ++++++++
 arch/x86/include/asm/mshyperv.h      | 24 ++++++++++++++++++++++-
 arch/x86/kernel/cpu/mshyperv.c       |  2 ++
 include/asm-generic/ms_hyperv_info.h | 29 ++++++++++++++++++++++++++++
 include/asm-generic/mshyperv.h       | 24 +----------------------
 6 files changed, 64 insertions(+), 24 deletions(-)
 create mode 100644 include/asm-generic/ms_hyperv_info.h
  

Comments

Dave Hansen Nov. 21, 2022, 8:05 p.m. UTC | #1
>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx()) {

>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx())

>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx())
> +		return __tdx_ms_hv_hypercall(control, input2, input1);

See any common patterns there?

The "no #ifdef's in C files" rule would be good to apply here.  Please
do one #ifdef in a header.
  
Dexuan Cui Nov. 23, 2022, 2:14 a.m. UTC | #2
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Monday, November 21, 2022 12:05 PM
> [...]
> >  #ifdef CONFIG_X86_64
> > +#if CONFIG_INTEL_TDX_GUEST
> > +	if (hv_isolation_type_tdx()) {
> 
> >  #ifdef CONFIG_X86_64
> > +#if CONFIG_INTEL_TDX_GUEST
> > +	if (hv_isolation_type_tdx())
> 
> >  #ifdef CONFIG_X86_64
> > +#if CONFIG_INTEL_TDX_GUEST
> > +	if (hv_isolation_type_tdx())
> > +		return __tdx_ms_hv_hypercall(control, input2, input1);
> 
> See any common patterns there?
> 
> The "no #ifdef's in C files" rule would be good to apply here.  Please
> do one #ifdef in a header.

Sorry, I should use #ifdef rather than #if. I'll fix it like the below.

I don't think I can do one #ifdef, because, in the header file, there are
already 3 existing instances of 
#ifdef CONFIG_X86_64
#else
#endif
and I'm just adding a new block "#ifdef CONFIG_INTEL_TDX_GUEST ... #endif"
to the CONFIG_X86_64 case. FWIW, CONFIG_INTEL_TDX_GUEST already
depends on CONFIG_X86_64.

--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -48,7 +48,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        u64 hv_status;

 #ifdef CONFIG_X86_64
-#if CONFIG_INTEL_TDX_GUEST
+#ifdef CONFIG_INTEL_TDX_GUEST
        if (hv_isolation_type_tdx()) {
                if (input_address)
                        input_address += ms_hyperv.shared_gpa_boundary;
@@ -97,7 +97,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
-#if CONFIG_INTEL_TDX_GUEST
+#ifdef CONFIG_INTEL_TDX_GUEST
        if (hv_isolation_type_tdx())
                return __tdx_ms_hv_hypercall(control, 0, input1);
 #endif
@@ -133,7 +133,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
-#if CONFIG_INTEL_TDX_GUEST
+#ifdef CONFIG_INTEL_TDX_GUEST
        if (hv_isolation_type_tdx())
                return __tdx_ms_hv_hypercall(control, input2, input1);
 #endif
  
Michael Kelley (LINUX) Nov. 23, 2022, 2:45 p.m. UTC | #3
From: Dexuan Cui <decui@microsoft.com> Sent: Monday, November 21, 2022 11:52 AM
> 
> A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> 
> In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> must have the vTOM bit set. With current Hyper-V, the bit for TDX is
> bit 47, which is saved into ms_hyperv.shared_gpa_boundary() in
> ms_hyperv_init_platform().
> 
> arch/x86/include/asm/mshyperv.h: hv_do_hypercall() needs
> "struct ms_hyperv_info", which is defined in
> include/asm-generic/mshyperv.h, which can't be included in
> arch/x86/include/asm/mshyperv.h because include/asm-generic/mshyperv.h
> has vmbus_signal_eom() -> hv_set_register(), which is defined in
> arch/x86/include/asm/mshyperv.h.
> 
> Break this circular dependency by introducing a new header file
> for "struct ms_hyperv_info".
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  MAINTAINERS                          |  1 +
>  arch/x86/hyperv/hv_init.c            |  8 ++++++++
>  arch/x86/include/asm/mshyperv.h      | 24 ++++++++++++++++++++++-
>  arch/x86/kernel/cpu/mshyperv.c       |  2 ++
>  include/asm-generic/ms_hyperv_info.h | 29 ++++++++++++++++++++++++++++
>  include/asm-generic/mshyperv.h       | 24 +----------------------
>  6 files changed, 64 insertions(+), 24 deletions(-)
>  create mode 100644 include/asm-generic/ms_hyperv_info.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 256f03904987..455ecaf188fe 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9537,6 +9537,7 @@ F:	drivers/scsi/storvsc_drv.c
>  F:	drivers/uio/uio_hv_generic.c
>  F:	drivers/video/fbdev/hyperv_fb.c
>  F:	include/asm-generic/hyperv-tlfs.h
> +F:	include/asm-generic/ms_hyperv_info.h
>  F:	include/asm-generic/mshyperv.h
>  F:	include/clocksource/hyperv_timer.h
>  F:	include/linux/hyperv.h
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 89954490af93..05682c4e327f 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -432,6 +432,10 @@ void __init hyperv_init(void)
>  	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
>  	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
> 
> +	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
> +	if (hv_isolation_type_tdx())
> +		goto skip_hypercall_pg_init;
> +
>  	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
>  			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
>  			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> @@ -471,6 +475,7 @@ void __init hyperv_init(void)
>  		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>  	}
> 
> +skip_hypercall_pg_init:
>  	/*
>  	 * hyperv_init() is called before LAPIC is initialized: see
>  	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
> @@ -606,6 +611,9 @@ bool hv_is_hyperv_initialized(void)
>  	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
>  		return false;
> 
> +	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
> +	if (hv_isolation_type_tdx())
> +		return true;
>  	/*
>  	 * Verify that earlier initialization succeeded by checking
>  	 * that the hypercall page is setup
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 9d593ab2be26..650b4fae2fd8 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -9,7 +9,7 @@
>  #include <asm/hyperv-tlfs.h>
>  #include <asm/nospec-branch.h>
>  #include <asm/paravirt.h>
> -#include <asm/mshyperv.h>
> +#include <asm-generic/ms_hyperv_info.h>
> 
>  union hv_ghcb;
> 
> @@ -48,6 +48,18 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output)
>  	u64 hv_status;
> 
>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx()) {
> +		if (input_address)
> +			input_address += ms_hyperv.shared_gpa_boundary;
> +
> +		if (output_address)
> +			output_address += ms_hyperv.shared_gpa_boundary;
> +
> +		return __tdx_ms_hv_hypercall(control, output_address,
> +					     input_address);
> +	}
> +#endif

Two thoughts:

1)  The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed entirely
with a tweak.  hv_isolation_type_tdx() already doesn't need the #ifdef as there's
already a stub that returns 'false'.   Then you just need a way to handle
__tdx_ms_hv_hypercall(), or whatever it becomes based on the other discussion.
As long as you can provide a stub that does nothing, the #ifdef won't be needed.

2)  Assuming that we end up with some kind of Hyper-V specific version of
__tdx_hypercall(), and hopefully as a "C" function, could you move the handling
of  ms_hyperv.shared_gpa_boundary into that function?  Then you won't need
to break out a separate include file for struct ms_hyperv.  The Hyper-V TDX
hypercall function must handle both normal and "fast" hypercalls, and the
shared_gpa_boundary adjustment is needed only for normal hypercalls,
but you can check the "fast" bit in the control word to decide.

I haven't coded these ideas, so maybe there are snags I haven't thought of.
But I'm really hoping we can avoid having to create a separate include
file for struct ms_hyperv.

Michael

>  	if (!hv_hypercall_pg)
>  		return U64_MAX;
> 
> @@ -85,6 +97,11 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> 
>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx())
> +		return __tdx_ms_hv_hypercall(control, 0, input1);
> +#endif
> +
>  	{
>  		__asm__ __volatile__(CALL_NOSPEC
>  				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -116,6 +133,11 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1,
> u64 input2)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> 
>  #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> +	if (hv_isolation_type_tdx())
> +		return __tdx_ms_hv_hypercall(control, input2, input1);
> +#endif
> +
>  	{
>  		__asm__ __volatile__("mov %4, %%r8\n"
>  				     CALL_NOSPEC
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 9ad0b0abf0e0..dddccdbc5526 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -349,6 +349,8 @@ static void __init ms_hyperv_init_platform(void)
> 
>  			case HV_ISOLATION_TYPE_TDX:
>  				static_branch_enable(&isolation_type_tdx);
> +
> +				ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
>  				break;
> 
>  			default:
> diff --git a/include/asm-generic/ms_hyperv_info.h b/include/asm-
> generic/ms_hyperv_info.h
> new file mode 100644
> index 000000000000..734583dfea99
> --- /dev/null
> +++ b/include/asm-generic/ms_hyperv_info.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _ASM_GENERIC_MS_HYPERV_INFO_H
> +#define _ASM_GENERIC_MS_HYPERV_INFO_H
> +
> +struct ms_hyperv_info {
> +	u32 features;
> +	u32 priv_high;
> +	u32 misc_features;
> +	u32 hints;
> +	u32 nested_features;
> +	u32 max_vp_index;
> +	u32 max_lp_index;
> +	u32 isolation_config_a;
> +	union {
> +		u32 isolation_config_b;
> +		struct {
> +			u32 cvm_type : 4;
> +			u32 reserved1 : 1;
> +			u32 shared_gpa_boundary_active : 1;
> +			u32 shared_gpa_boundary_bits : 6;
> +			u32 reserved2 : 20;
> +		};
> +	};
> +	u64 shared_gpa_boundary;
> +};
> +extern struct ms_hyperv_info ms_hyperv;
> +
> +#endif
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index bfb9eb9d7215..2ae3e4e4256b 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -25,29 +25,7 @@
>  #include <linux/nmi.h>
>  #include <asm/ptrace.h>
>  #include <asm/hyperv-tlfs.h>
> -
> -struct ms_hyperv_info {
> -	u32 features;
> -	u32 priv_high;
> -	u32 misc_features;
> -	u32 hints;
> -	u32 nested_features;
> -	u32 max_vp_index;
> -	u32 max_lp_index;
> -	u32 isolation_config_a;
> -	union {
> -		u32 isolation_config_b;
> -		struct {
> -			u32 cvm_type : 4;
> -			u32 reserved1 : 1;
> -			u32 shared_gpa_boundary_active : 1;
> -			u32 shared_gpa_boundary_bits : 6;
> -			u32 reserved2 : 20;
> -		};
> -	};
> -	u64 shared_gpa_boundary;
> -};
> -extern struct ms_hyperv_info ms_hyperv;
> +#include <asm-generic/ms_hyperv_info.h>
> 
>  extern void * __percpu *hyperv_pcpu_input_arg;
>  extern void * __percpu *hyperv_pcpu_output_arg;
> --
> 2.25.1
  
Kirill A. Shutemov Nov. 23, 2022, 2:47 p.m. UTC | #4
On Wed, Nov 23, 2022 at 02:14:58AM +0000, Dexuan Cui wrote:
> > From: Dave Hansen <dave.hansen@intel.com>
> > Sent: Monday, November 21, 2022 12:05 PM
> > [...]
> > >  #ifdef CONFIG_X86_64
> > > +#if CONFIG_INTEL_TDX_GUEST
> > > +	if (hv_isolation_type_tdx()) {
> > 
> > >  #ifdef CONFIG_X86_64
> > > +#if CONFIG_INTEL_TDX_GUEST
> > > +	if (hv_isolation_type_tdx())
> > 
> > >  #ifdef CONFIG_X86_64
> > > +#if CONFIG_INTEL_TDX_GUEST
> > > +	if (hv_isolation_type_tdx())
> > > +		return __tdx_ms_hv_hypercall(control, input2, input1);
> > 
> > See any common patterns there?
> > 
> > The "no #ifdef's in C files" rule would be good to apply here.  Please
> > do one #ifdef in a header.
> 
> Sorry, I should use #ifdef rather than #if. I'll fix it like the below.

No, can we hide preprocessor hell inside hv_isolation_type_tdx()?

Like make it return false for !CONFIG_INTEL_TDX_GUEST and avoid all
#if/#ifdefs in C file.
  
Dexuan Cui Nov. 23, 2022, 6:13 p.m. UTC | #5
> From: Kirill A. Shutemov <kirill@shutemov.name>
> [...]
> > >
> > > The "no #ifdef's in C files" rule would be good to apply here.  Please
> > > do one #ifdef in a header.
> >
> > Sorry, I should use #ifdef rather than #if. I'll fix it like the below.
> 
> No, can we hide preprocessor hell inside hv_isolation_type_tdx()?
> 
> Like make it return false for !CONFIG_INTEL_TDX_GUEST and avoid all
> #if/#ifdefs in C file.

Ok, let me try to apply Michael's good ideas he shared in another email.
  
Kuppuswamy Sathyanarayanan Nov. 23, 2022, 6:18 p.m. UTC | #6
On 11/23/22 6:47 AM, Kirill A. Shutemov wrote:
> On Wed, Nov 23, 2022 at 02:14:58AM +0000, Dexuan Cui wrote:
>>> From: Dave Hansen <dave.hansen@intel.com>
>>> Sent: Monday, November 21, 2022 12:05 PM
>>> [...]
>>>>  #ifdef CONFIG_X86_64
>>>> +#if CONFIG_INTEL_TDX_GUEST
>>>> +	if (hv_isolation_type_tdx()) {
>>>
>>>>  #ifdef CONFIG_X86_64
>>>> +#if CONFIG_INTEL_TDX_GUEST
>>>> +	if (hv_isolation_type_tdx())
>>>
>>>>  #ifdef CONFIG_X86_64
>>>> +#if CONFIG_INTEL_TDX_GUEST
>>>> +	if (hv_isolation_type_tdx())
>>>> +		return __tdx_ms_hv_hypercall(control, input2, input1);
>>>
>>> See any common patterns there?
>>>
>>> The "no #ifdef's in C files" rule would be good to apply here.  Please
>>> do one #ifdef in a header.
>>
>> Sorry, I should use #ifdef rather than #if. I'll fix it like the below.
> 
> No, can we hide preprocessor hell inside hv_isolation_type_tdx()?
> 
> Like make it return false for !CONFIG_INTEL_TDX_GUEST and avoid all
> #if/#ifdefs in C file.

There is a weak reference to hv_isolation_type_tdx() which returns false
by default. So defining the hv_isolation_type_tdx within
#ifdef CONFIG_INTEL_TDX_GUEST would be enough.

> 
>
  
Dexuan Cui Nov. 23, 2022, 7:07 p.m. UTC | #7
> From: Sathyanarayanan Kuppuswamy
> <sathyanarayanan.kuppuswamy@linux.intel.com>
> > [...]
> > Like make it return false for !CONFIG_INTEL_TDX_GUEST and avoid all
> > #if/#ifdefs in C file.
> 
> There is a weak reference to hv_isolation_type_tdx() which returns false
> by default. So defining the hv_isolation_type_tdx within
> #ifdef CONFIG_INTEL_TDX_GUEST would be enough.

Will do. Thanks!
  
Dexuan Cui Nov. 28, 2022, 12:58 a.m. UTC | #8
> From: Michael Kelley (LINUX) <mikelley@microsoft.com>
> Sent: Wednesday, November 23, 2022 6:45 AM
> To: Dexuan Cui <decui@microsoft.com>; ak@linux.intel.com; arnd@arndb.de;
> 
> Two thoughts:
> 
> 1)  The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed
> entirely
> with a tweak.  hv_isolation_type_tdx() already doesn't need the #ifdef as
> there's
> already a stub that returns 'false'.   Then you just need a way to handle
> __tdx_ms_hv_hypercall(), or whatever it becomes based on the other
> discussion.
> As long as you can provide a stub that does nothing, the #ifdef won't be
> needed.
> 
> 2)  Assuming that we end up with some kind of Hyper-V specific version of
> __tdx_hypercall(), and hopefully as a "C" function, could you move the
> handling
> of  ms_hyperv.shared_gpa_boundary into that function?  Then you won't
> need
> to break out a separate include file for struct ms_hyperv.  The Hyper-V TDX
> hypercall function must handle both normal and "fast" hypercalls, and the
> shared_gpa_boundary adjustment is needed only for normal hypercalls,
> but you can check the "fast" bit in the control word to decide.
> 
> I haven't coded these ideas, so maybe there are snags I haven't thought of.
> But I'm really hoping we can avoid having to create a separate include
> file for struct ms_hyperv.
> 
> Michael

Thanks for the great suggestions! Now the code looks like this:
(the full list of v2 patches are still WIP: 
 https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 13ccb52eecd7..00e5c84e380b 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
 {
 	return static_branch_unlikely(&isolation_type_tdx);
 }
+
+u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
+{
+	struct tdx_hypercall_args args = { };
+
+	if (!(control & HV_HYPERCALL_FAST_BIT)) {
+		if (input_addr)
+			input_addr += ms_hyperv.shared_gpa_boundary;
+
+		if (output_addr)
+			output_addr += ms_hyperv.shared_gpa_boundary;
+	}
+
+	args.r10 = control;
+	args.rdx = input_addr;
+	args.r8  = output_addr;
+
+	(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+	return args.r11;
+}
 #endif
 
 /*

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 8a2cafec4675..1be7bcf0d7d1 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
 
+u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
+
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
 	u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control, input_address, output_address);
+
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 
@@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control, input1, 0);
+
 	{
 		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+	if (hv_isolation_type_tdx())
+		return hv_tdx_hypercall(control, input1, input2);
+
 	{
 		__asm__ __volatile__("mov %4, %%r8\n"
 				     CALL_NOSPEC
  
Michael Kelley (LINUX) Nov. 28, 2022, 1:20 a.m. UTC | #9
From: Dexuan Cui <decui@microsoft.com> Sent: Sunday, November 27, 2022 4:59 PM
> 
> > From: Michael Kelley (LINUX) <mikelley@microsoft.com>
> > Sent: Wednesday, November 23, 2022 6:45 AM
> > To: Dexuan Cui <decui@microsoft.com>; ak@linux.intel.com; arnd@arndb.de;
> >
> > Two thoughts:
> >
> > 1)  The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed entirely
> > with a tweak.  hv_isolation_type_tdx() already doesn't need the #ifdef asthere's
> > already a stub that returns 'false'.   Then you just need a way to handle
> > __tdx_ms_hv_hypercall(), or whatever it becomes based on the other discussion.
> > As long as you can provide a stub that does nothing, the #ifdef won't be needed.
> >
> > 2)  Assuming that we end up with some kind of Hyper-V specific version of
> > __tdx_hypercall(), and hopefully as a "C" function, could you move the handling
> > of  ms_hyperv.shared_gpa_boundary into that function?  Then you won't need
> > to break out a separate include file for struct ms_hyperv.  The Hyper-V TDX
> > hypercall function must handle both normal and "fast" hypercalls, and the
> > shared_gpa_boundary adjustment is needed only for normal hypercalls,
> > but you can check the "fast" bit in the control word to decide.
> >
> > I haven't coded these ideas, so maybe there are snags I haven't thought of.
> > But I'm really hoping we can avoid having to create a separate include
> > file for struct ms_hyperv.
> >
> > Michael
> 
> Thanks for the great suggestions! Now the code looks like this:
> (the full list of v2 patches are still WIP:
> 
> https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2 
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..00e5c84e380b 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
>  {
>  	return static_branch_unlikely(&isolation_type_tdx);
>  }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> +	struct tdx_hypercall_args args = { };
> +
> +	if (!(control & HV_HYPERCALL_FAST_BIT)) {
> +		if (input_addr)
> +			input_addr += ms_hyperv.shared_gpa_boundary;

At one point when working with the vTOM code, I realized that or'ing in
the shared_gpa_boundary is probably safer than add'ing it, just in case
the physical address already has vTOM set.  I don't know if that possibility
exists here, but it's something to consider as being slightly more robust.

> +
> +		if (output_addr)
> +			output_addr += ms_hyperv.shared_gpa_boundary;

Same here.

> +	}
> +
> +	args.r10 = control;
> +	args.rdx = input_addr;
> +	args.r8  = output_addr;
> +
> +	(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> +	return args.r11;
> +}
>  #endif
> 
>  /*
> 
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 8a2cafec4675..1be7bcf0d7d1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
> num_pages);
>  int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
>  int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> 
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
> +
>  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  {
>  	u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output)
>  	u64 hv_status;
> 
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input_address, output_address);
> +
>  	if (!hv_hypercall_pg)
>  		return U64_MAX;
> 
> @@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> 
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, 0);
> +
>  	{
>  		__asm__ __volatile__(CALL_NOSPEC
>  				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1,
> u64 input2)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> 
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, input2);
> +
>  	{
>  		__asm__ __volatile__("mov %4, %%r8\n"
>  				     CALL_NOSPEC

Yes.  This new structure LGTM.

Michael
  
Kuppuswamy Sathyanarayanan Nov. 28, 2022, 1:21 a.m. UTC | #10
On 11/27/22 4:58 PM, Dexuan Cui wrote:
>> From: Michael Kelley (LINUX) <mikelley@microsoft.com>
>> Sent: Wednesday, November 23, 2022 6:45 AM
>> To: Dexuan Cui <decui@microsoft.com>; ak@linux.intel.com; arnd@arndb.de;
>>
>> Two thoughts:
>>
>> 1)  The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed
>> entirely
>> with a tweak.  hv_isolation_type_tdx() already doesn't need the #ifdef as
>> there's
>> already a stub that returns 'false'.   Then you just need a way to handle
>> __tdx_ms_hv_hypercall(), or whatever it becomes based on the other
>> discussion.
>> As long as you can provide a stub that does nothing, the #ifdef won't be
>> needed.
>>
>> 2)  Assuming that we end up with some kind of Hyper-V specific version of
>> __tdx_hypercall(), and hopefully as a "C" function, could you move the
>> handling
>> of  ms_hyperv.shared_gpa_boundary into that function?  Then you won't
>> need
>> to break out a separate include file for struct ms_hyperv.  The Hyper-V TDX
>> hypercall function must handle both normal and "fast" hypercalls, and the
>> shared_gpa_boundary adjustment is needed only for normal hypercalls,
>> but you can check the "fast" bit in the control word to decide.
>>
>> I haven't coded these ideas, so maybe there are snags I haven't thought of.
>> But I'm really hoping we can avoid having to create a separate include
>> file for struct ms_hyperv.
>>
>> Michael
> 
> Thanks for the great suggestions! Now the code looks like this:
> (the full list of v2 patches are still WIP: 
>  https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2)
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..00e5c84e380b 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
>  {
>  	return static_branch_unlikely(&isolation_type_tdx);
>  }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> +	struct tdx_hypercall_args args = { };

I think you mean initialize to 0.

> +
> +	if (!(control & HV_HYPERCALL_FAST_BIT)) {
> +		if (input_addr)
> +			input_addr += ms_hyperv.shared_gpa_boundary;
> +
> +		if (output_addr)
> +			output_addr += ms_hyperv.shared_gpa_boundary;
> +	}
> +
> +	args.r10 = control;
> +	args.rdx = input_addr;
> +	args.r8  = output_addr;
> +
> +	(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> +	return args.r11;
> +}
>  #endif
>  
>  /*
> 
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 8a2cafec4675..1be7bcf0d7d1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
>  int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
>  int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
>  
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
> +
>  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  {
>  	u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  	u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input_address, output_address);
> +
>  	if (!hv_hypercall_pg)
>  		return U64_MAX;
>  
> @@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>  
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, 0);
> +
>  	{
>  		__asm__ __volatile__(CALL_NOSPEC
>  				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
>  	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>  
>  #ifdef CONFIG_X86_64
> +	if (hv_isolation_type_tdx())
> +		return hv_tdx_hypercall(control, input1, input2);
> +
>  	{
>  		__asm__ __volatile__("mov %4, %%r8\n"
>  				     CALL_NOSPEC
  
Dexuan Cui Nov. 28, 2022, 1:36 a.m. UTC | #11
> From: Michael Kelley (LINUX) <mikelley@microsoft.com>
> Sent: Sunday, November 27, 2022 5:21 PM
> > [...]
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > +	struct tdx_hypercall_args args = { };
> > +
> > +	if (!(control & HV_HYPERCALL_FAST_BIT)) {
> > +		if (input_addr)
> > +			input_addr += ms_hyperv.shared_gpa_boundary;
> 
> At one point when working with the vTOM code, I realized that or'ing in
> the shared_gpa_boundary is probably safer than add'ing it, just in case
> the physical address already has vTOM set.  I don't know if that possibility
> exists here, but it's something to consider as being slightly more robust.
> 
> > +
> > +		if (output_addr)
> > +			output_addr += ms_hyperv.shared_gpa_boundary;
> 
> Same here.

Ok, will use |= in all the reference.
  
Dexuan Cui Nov. 28, 2022, 1:55 a.m. UTC | #12
> From: Sathyanarayanan Kuppuswamy
> <sathyanarayanan.kuppuswamy@linux.intel.com>
> > +
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > +	struct tdx_hypercall_args args = { };
> 
> I think you mean initialize to 0.
Yes, it's the same as "struct tdx_hypercall_args args = { 0 };"

I was educated to use "{ }", which is said to be more preferred, 
compared to "{ 0 } ":

https://lwn.net/ml/linux-kernel/YG1qF8lULn8lLJa/@unreal/
https://lwn.net/ml/linux-kernel/MW2PR2101MB0892AC106C360F2A209560A8BF759@MW2PR2101MB0892.namprd21.prod.outlook.com/
  
Dave Hansen Nov. 28, 2022, 3:22 p.m. UTC | #13
On 11/27/22 16:58, Dexuan Cui wrote:
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> +	struct tdx_hypercall_args args = { };
> +
> +	if (!(control & HV_HYPERCALL_FAST_BIT)) {
> +		if (input_addr)
> +			input_addr += ms_hyperv.shared_gpa_boundary;
> +
> +		if (output_addr)
> +			output_addr += ms_hyperv.shared_gpa_boundary;
> +	}

This:

>
https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface

makes it sound like HV_HYPERCALL_FAST_BIT says whether arguments go in
registers (fast) or memory (slow).  But this hv_tdx_hypercall() function
looks like it takes addresses only.

*Is* there a register based calling convention to make Hyper-V
hypercalls when running under TDX?

Also, is this the output address manipulation fundamentally different from:

	output_addr = cc_mkdec(output_addr);

?  Decrypted addresses are the shared ones, right?
  
Dexuan Cui Nov. 28, 2022, 7:03 p.m. UTC | #14
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Monday, November 28, 2022 7:22 AM
> [...]
> On 11/27/22 16:58, Dexuan Cui wrote:
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > +	struct tdx_hypercall_args args = { };
> > +
> > +	if (!(control & HV_HYPERCALL_FAST_BIT)) {
> > +		if (input_addr)
> > +			input_addr += ms_hyperv.shared_gpa_boundary;
> > +
> > +		if (output_addr)
> > +			output_addr += ms_hyperv.shared_gpa_boundary;
> > +	}
> 
> This:
>  [...]
> makes it sound like HV_HYPERCALL_FAST_BIT says whether arguments go in
> registers (fast) or memory (slow).  But this hv_tdx_hypercall() function
> looks like it takes addresses only.

Good point! When hv_tdx_hypercall() is called from hv_do_fast_hypercall8()
and hv_do_fast_hypercall16(), actually the two parameters are not memory
addresses. I'll rename the parameters to param1 and param2. 

I also realized I need to export the function for drivers. 

> *Is* there a register based calling convention to make Hyper-V
> hypercalls when running under TDX?

When hv_tdx_hypercall() is called from hv_do_fast_hypercall8()
and hv_do_fast_hypercall16(), the params are indeed passed via registers
rather than memory.

> Also, is this the output address manipulation fundamentally different from:
> 
> 	output_addr = cc_mkdec(output_addr);
> 
> ?  Decrypted addresses are the shared ones, right?
Yes. 

Good point -- I'll use the updated version:

u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
{
        struct tdx_hypercall_args args = { };

        if (!(control & HV_HYPERCALL_FAST_BIT)) {
                if (param1)
                        param1 = cc_mkdec(param1);

                if (param2)
                        param2 = cc_mkdec(param2);
        }

        args.r10 = control;
        args.rdx = param1;
        args.r8  = param2;

        (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);

        return args.r11;
}
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
  
Dave Hansen Nov. 28, 2022, 7:11 p.m. UTC | #15
On 11/28/22 11:03, Dexuan Cui wrote:
...
> u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> {
>         struct tdx_hypercall_args args = { };
> 
>         if (!(control & HV_HYPERCALL_FAST_BIT)) {
>                 if (param1)
>                         param1 = cc_mkdec(param1);
> 
>                 if (param2)
>                         param2 = cc_mkdec(param2);
>         }
> 
>         args.r10 = control;
>         args.rdx = param1;
>         args.r8  = param2;
> 
>         (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> 
>         return args.r11;
> }

I still think this is problematic.

The cc_mkdec() should be done on the parameters when the code still
*knows* that they are addresses.

How do we know, for instance, that no hypercall using this interface
will *ever* take the 0x0 physical address as an argument?
  
Dexuan Cui Nov. 28, 2022, 7:37 p.m. UTC | #16
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Monday, November 28, 2022 11:11 AM
> On 11/28/22 11:03, Dexuan Cui wrote:
> ...
> > u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> > {
> >         struct tdx_hypercall_args args = { };
> >
> >         if (!(control & HV_HYPERCALL_FAST_BIT)) {
> >                 if (param1)
> >                         param1 = cc_mkdec(param1);
> >
> >                 if (param2)
> >                         param2 = cc_mkdec(param2);
> >         }
> >
> >         args.r10 = control;
> >         args.rdx = param1;
> >         args.r8  = param2;
> >
> >         (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> >
> >         return args.r11;
> > }
> 
> I still think this is problematic.
> 
> The cc_mkdec() should be done on the parameters when the code still
> *knows* that they are addresses.

Makes sense.
 
> How do we know, for instance, that no hypercall using this interface
> will *ever* take the 0x0 physical address as an argument?

A 0x0 physical address as an argument still works: the 0 is passed
to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
an error (if the param is needed), and returns an "invalid parameter"
error code to the guest.

Here is the updated version:

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 70170049be2c..41395891d112 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -242,6 +242,20 @@ bool hv_isolation_type_tdx(void)
 {
        return static_branch_unlikely(&isolation_type_tdx);
 }
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+       struct tdx_hypercall_args args = { };
+
+       args.r10 = control;
+       args.rdx = param1;
+       args.r8  = param2;
+
+       (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+       return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
 #endif

 /*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9cc6db45c3bc..ea053af067a2 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -10,6 +10,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
 #include <asm/mshyperv.h>
+#include <asm/coco.h>

 union hv_ghcb;

@@ -39,6 +40,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
        u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +49,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        u64 hv_status;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx()) {
+               u64 param1 = input_address  ? cc_mkdec(input_address)  : 0;
+               u64 param2 = output_address ? cc_mkdec(output_address) : 0;
+               return hv_tdx_hypercall(control, param1, param2);
+       }
+
        if (!hv_hypercall_pg)
                return U64_MAX;

@@ -83,6 +92,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx())
+               return hv_tdx_hypercall(control, input1, 0);
+
        {
                __asm__ __volatile__(CALL_NOSPEC
                                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +126,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx())
+               return hv_tdx_hypercall(control, input1, input2);
+
        {
                __asm__ __volatile__("mov %4, %%r8\n"
                                     CALL_NOSPEC
  
Dave Hansen Nov. 28, 2022, 7:48 p.m. UTC | #17
On 11/28/22 11:37, Dexuan Cui wrote:
>> From: Dave Hansen <dave.hansen@intel.com>
...
>> How do we know, for instance, that no hypercall using this interface
>> will *ever* take the 0x0 physical address as an argument?
> 
> A 0x0 physical address as an argument still works: the 0 is passed
> to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
> an error (if the param is needed), and returns an "invalid parameter"
> error code to the guest.

I don't see any data in the public documentation to support the claim
that 0x0 is a special argument for either the input or output GPA
parameters.

This is despite some actual discussion on things like their alignment
requirements[1] and interactions with overlay pages.

So, either you are mistaken about that behavior, or it looks like the
documentation needs updating.

1.
https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface
  
Dexuan Cui Nov. 28, 2022, 8:36 p.m. UTC | #18
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Monday, November 28, 2022 11:48 AM
> 
> On 11/28/22 11:37, Dexuan Cui wrote:
> >> From: Dave Hansen <dave.hansen@intel.com>
> ...
> >> How do we know, for instance, that no hypercall using this interface
> >> will *ever* take the 0x0 physical address as an argument?
> >
> > A 0x0 physical address as an argument still works: the 0 is passed
> > to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
> > an error (if the param is needed), and returns an "invalid parameter"
> > error code to the guest.
> 
> I don't see any data in the public documentation to support the claim
> that 0x0 is a special argument for either the input or output GPA
> parameters.

Sorry, I didn't make it clear. I meant: for some hypercalls, Hyper-V
doesn't really need an "input" param or an "output" param, so Linux
passes 0 for such a "not needed" param. Maybe Linux can pass any
value for such a "not needed" param, if Hyper-V just ignores the
"not needed" param. Some examples:

arch/x86/hyperv/hv_init.c: hv_get_partition_id():
    status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);

drivers/pci/controller/pci-hyperv.c:
    res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
                      params, NULL);


If a param is needed and is supposed to be a non-zero memory address,
Linux running as a TDX guest must pass "cc_mkdec(address)" rather than
"address", otherwise I suspect the result is undefined, e.g. Hyper-V might
return an error to the guest, or Hyper-V might just terminate the guest,
especially if Linux passes 0 or cc_mkdec(0).

Currently all the users of hv_do_hypercall() pass valid arguments.
 
> This is despite some actual discussion on things like their alignment
> requirements[1] and interactions with overlay pages.
> 
> So, either you are mistaken about that behavior, or it looks like the
> documentation needs updating.

The above is just my conjecture. I don't know how exactly Hyper-V works.
  
Dave Hansen Nov. 28, 2022, 9:15 p.m. UTC | #19
On 11/28/22 12:36, Dexuan Cui wrote:
>> From: Dave Hansen <dave.hansen@intel.com>
>> Sent: Monday, November 28, 2022 11:48 AM
>>
>> On 11/28/22 11:37, Dexuan Cui wrote:
>>>> From: Dave Hansen <dave.hansen@intel.com>
>> ...
>>>> How do we know, for instance, that no hypercall using this interface
>>>> will *ever* take the 0x0 physical address as an argument?
>>>
>>> A 0x0 physical address as an argument still works: the 0 is passed
>>> to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
>>> an error (if the param is needed), and returns an "invalid parameter"
>>> error code to the guest.
>>
>> I don't see any data in the public documentation to support the claim
>> that 0x0 is a special argument for either the input or output GPA
>> parameters.
> 
> Sorry, I didn't make it clear. I meant: for some hypercalls, Hyper-V
> doesn't really need an "input" param or an "output" param, so Linux
> passes 0 for such a "not needed" param. Maybe Linux can pass any
> value for such a "not needed" param, if Hyper-V just ignores the
> "not needed" param. Some examples:
> 
> arch/x86/hyperv/hv_init.c: hv_get_partition_id():
>     status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
> 
> drivers/pci/controller/pci-hyperv.c:
>     res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
>                       params, NULL);
> 
> 
> If a param is needed and is supposed to be a non-zero memory address,
> Linux running as a TDX guest must pass "cc_mkdec(address)" rather than
> "address", otherwise I suspect the result is undefined, e.g. Hyper-V might
> return an error to the guest, or Hyper-V might just terminate the guest,
> especially if Linux passes 0 or cc_mkdec(0).

This is the point where the maintainer gets a wee bit grumpy.  The page
I just pointed you to (twice) has this nice quote:

	If the hypercall involves no input or output parameters, the
	hypervisor ignores the corresponding GPA pointer.

So, bravo to your colleagues who nicely documented this.  But,
unfortunately, you didn't take advantage of their great documentation.
Instead, you made me search for it to provide actual facts to combat the
weak conjecture you offered above.

>> This is despite some actual discussion on things like their alignment
>> requirements[1] and interactions with overlay pages.
>>
>> So, either you are mistaken about that behavior, or it looks like the
>> documentation needs updating.
> 
> The above is just my conjecture. I don't know how exactly Hyper-V works.

I do!  I literally Googled "HV HYPERCALL FAST BIT" and the first page
that came up told me all I needed to know.

I could be merrily merging your patches.  But, instead, I'm reading
documentation that can be trivially found and repeatedly regurgitating it.

Please, slow down.  Take some time to answer emails and do it more
deliberately.  This isn't a race.
  
Dexuan Cui Nov. 28, 2022, 9:53 p.m. UTC | #20
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Monday, November 28, 2022 1:15 PM
> [...]
> This is the point where the maintainer gets a wee bit grumpy.  The page
> I just pointed you to (twice) has this nice quote:
> 
> 	If the hypercall involves no input or output parameters, the
> 	hypervisor ignores the corresponding GPA pointer.
> 
> So, bravo to your colleagues who nicely documented this.  But,
> unfortunately, you didn't take advantage of their great documentation.
> Instead, you made me search for it to provide actual facts to combat the
> weak conjecture you offered above.

Sorry, I should really have read the document before starting to conjecture...

> > The above is just my conjecture. I don't know how exactly Hyper-V works.
> 
> I do!  I literally Googled "HV HYPERCALL FAST BIT" and the first page
> that came up told me all I needed to know.
> 
> I could be merrily merging your patches.  But, instead, I'm reading
> documentation that can be trivially found and repeatedly regurgitating it.
> 
> Please, slow down.  Take some time to answer emails and do it more
> deliberately.  This isn't a race.

Thanks, I learn a lesson. Hopefully the below looks good enough. 

Compared to the earlier version, the only changes are: 1) simplified the
change in hv_do_hypercall(); 2) added a comment before the function.

BTW, I can't post the v2 patchset right now, as I'm waiting for Kirill to
expand __tdx_hypercall() first:
https://lwn.net/ml/linux-kernel/SA1PR21MB1335EEEC1DE4CB42F6477A5EBF0C9@SA1PR21MB1335.namprd21.prod.outlook.com/
I'm also thinking about how to properly address the vmalloc
issue with tdx_enc_status_changed().

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 70170049be2c..41395891d112 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -242,6 +242,20 @@ bool hv_isolation_type_tdx(void)
 {
        return static_branch_unlikely(&isolation_type_tdx);
 }
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+       struct tdx_hypercall_args args = { };
+
+       args.r10 = control;
+       args.rdx = param1;
+       args.r8  = param2;
+
+       (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+       return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
 #endif

 /*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9cc6db45c3bc..055b6fb8941f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -10,6 +10,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
 #include <asm/mshyperv.h>
+#include <asm/coco.h>

 union hv_ghcb;

@@ -39,6 +40,12 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
        u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +53,10 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        u64 hv_status;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx())
+               return hv_tdx_hypercall(control,
+                                   cc_mkdec(input_address),
+                                   cc_mkdec(output_address));
        if (!hv_hypercall_pg)
                return U64_MAX;

@@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx())
+               return hv_tdx_hypercall(control, input1, 0);
+
        {
                __asm__ __volatile__(CALL_NOSPEC
                                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +128,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

 #ifdef CONFIG_X86_64
+       if (hv_isolation_type_tdx())
+               return hv_tdx_hypercall(control, input1, input2);
+
        {
                __asm__ __volatile__("mov %4, %%r8\n"
                                     CALL_NOSPEC
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 256f03904987..455ecaf188fe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9537,6 +9537,7 @@  F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
 F:	drivers/video/fbdev/hyperv_fb.c
 F:	include/asm-generic/hyperv-tlfs.h
+F:	include/asm-generic/ms_hyperv_info.h
 F:	include/asm-generic/mshyperv.h
 F:	include/clocksource/hyperv_timer.h
 F:	include/linux/hyperv.h
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 89954490af93..05682c4e327f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -432,6 +432,10 @@  void __init hyperv_init(void)
 	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
 	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
+	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+	if (hv_isolation_type_tdx())
+		goto skip_hypercall_pg_init;
+
 	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
 			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
 			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
@@ -471,6 +475,7 @@  void __init hyperv_init(void)
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 	}
 
+skip_hypercall_pg_init:
 	/*
 	 * hyperv_init() is called before LAPIC is initialized: see
 	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -606,6 +611,9 @@  bool hv_is_hyperv_initialized(void)
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return false;
 
+	/* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+	if (hv_isolation_type_tdx())
+		return true;
 	/*
 	 * Verify that earlier initialization succeeded by checking
 	 * that the hypercall page is setup
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9d593ab2be26..650b4fae2fd8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,7 +9,7 @@ 
 #include <asm/hyperv-tlfs.h>
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
-#include <asm/mshyperv.h>
+#include <asm-generic/ms_hyperv_info.h>
 
 union hv_ghcb;
 
@@ -48,6 +48,18 @@  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+	if (hv_isolation_type_tdx()) {
+		if (input_address)
+			input_address += ms_hyperv.shared_gpa_boundary;
+
+		if (output_address)
+			output_address += ms_hyperv.shared_gpa_boundary;
+
+		return __tdx_ms_hv_hypercall(control, output_address,
+					     input_address);
+	}
+#endif
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 
@@ -85,6 +97,11 @@  static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+	if (hv_isolation_type_tdx())
+		return __tdx_ms_hv_hypercall(control, 0, input1);
+#endif
+
 	{
 		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -116,6 +133,11 @@  static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
 #ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+	if (hv_isolation_type_tdx())
+		return __tdx_ms_hv_hypercall(control, input2, input1);
+#endif
+
 	{
 		__asm__ __volatile__("mov %4, %%r8\n"
 				     CALL_NOSPEC
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9ad0b0abf0e0..dddccdbc5526 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -349,6 +349,8 @@  static void __init ms_hyperv_init_platform(void)
 
 			case HV_ISOLATION_TYPE_TDX:
 				static_branch_enable(&isolation_type_tdx);
+
+				ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
 				break;
 
 			default:
diff --git a/include/asm-generic/ms_hyperv_info.h b/include/asm-generic/ms_hyperv_info.h
new file mode 100644
index 000000000000..734583dfea99
--- /dev/null
+++ b/include/asm-generic/ms_hyperv_info.h
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_GENERIC_MS_HYPERV_INFO_H
+#define _ASM_GENERIC_MS_HYPERV_INFO_H
+
+struct ms_hyperv_info {
+	u32 features;
+	u32 priv_high;
+	u32 misc_features;
+	u32 hints;
+	u32 nested_features;
+	u32 max_vp_index;
+	u32 max_lp_index;
+	u32 isolation_config_a;
+	union {
+		u32 isolation_config_b;
+		struct {
+			u32 cvm_type : 4;
+			u32 reserved1 : 1;
+			u32 shared_gpa_boundary_active : 1;
+			u32 shared_gpa_boundary_bits : 6;
+			u32 reserved2 : 20;
+		};
+	};
+	u64 shared_gpa_boundary;
+};
+extern struct ms_hyperv_info ms_hyperv;
+
+#endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..2ae3e4e4256b 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -25,29 +25,7 @@ 
 #include <linux/nmi.h>
 #include <asm/ptrace.h>
 #include <asm/hyperv-tlfs.h>
-
-struct ms_hyperv_info {
-	u32 features;
-	u32 priv_high;
-	u32 misc_features;
-	u32 hints;
-	u32 nested_features;
-	u32 max_vp_index;
-	u32 max_lp_index;
-	u32 isolation_config_a;
-	union {
-		u32 isolation_config_b;
-		struct {
-			u32 cvm_type : 4;
-			u32 reserved1 : 1;
-			u32 shared_gpa_boundary_active : 1;
-			u32 shared_gpa_boundary_bits : 6;
-			u32 reserved2 : 20;
-		};
-	};
-	u64 shared_gpa_boundary;
-};
-extern struct ms_hyperv_info ms_hyperv;
+#include <asm-generic/ms_hyperv_info.h>
 
 extern void * __percpu *hyperv_pcpu_input_arg;
 extern void * __percpu *hyperv_pcpu_output_arg;