[v2,5/6] x86/hyperv: Support hypercalls for TDX guests
Commit Message
A TDX guest uses the GHCI call rather than hv_hypercall_pg.
In hv_do_hypercall(), Hyper-V requires that the input/output addresses
must have the cc_mask.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
Changes in v2:
Implemented hv_tdx_hypercall() in C rather than in assembly code.
Renamed the parameter names of hv_tdx_hypercall().
Used cc_mkdec() directly in hv_do_hypercall().
arch/x86/hyperv/hv_init.c | 8 ++++++++
arch/x86/hyperv/ivm.c | 14 ++++++++++++++
arch/x86/include/asm/mshyperv.h | 17 +++++++++++++++++
3 files changed, 39 insertions(+)
Comments
> From: Michael Kelley (LINUX) <mikelley@microsoft.com>
> Sent: Monday, December 12, 2022 8:39 AM
> > [...]
> > A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> >
> > In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> > must have the cc_mask.
>
> Is it a requirement that the input/output addresses refer to guest memory
> pages that have marked as shared/decrypted? For example, I don't see
Yes.
> any code to mark the hyperv_pcpu_input_arg page as shared/decrypted.
> Do the use cases for the hyperv_pcpu_input_arg page just not occur in a
> TDX VM?
I missed this when sending v2, and I realized this when testing DDA.
Will get this fixed in v3.
BTW, I noticed Tianyu posted a similar patch:
[RFC PATCH V2 10/18] drivers: hv: Decrypt percpu hvcall input arg page in sev-snp enlightened guest
On Tue, 6 Dec 2022 16:33:24 -0800
Dexuan Cui <decui@microsoft.com> wrote:
> A TDX guest uses the GHCI call rather than hv_hypercall_pg.
>
> In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> must have the cc_mask.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
>
> ---
>
> Changes in v2:
> Implemented hv_tdx_hypercall() in C rather than in assembly code.
> Renamed the parameter names of hv_tdx_hypercall().
> Used cc_mkdec() directly in hv_do_hypercall().
>
> arch/x86/hyperv/hv_init.c | 8 ++++++++
> arch/x86/hyperv/ivm.c | 14 ++++++++++++++
> arch/x86/include/asm/mshyperv.h | 17 +++++++++++++++++
> 3 files changed, 39 insertions(+)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index a823fde1ad7f..c0ba53ad8b8e 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -430,6 +430,10 @@ void __init hyperv_init(void)
> /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
> hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>
> + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> */
> + if (hv_isolation_type_tdx())
> + goto skip_hypercall_pg_init;
> +
> hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1,
> VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
> VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> @@ -469,6 +473,7 @@ void __init hyperv_init(void)
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> }
>
> +skip_hypercall_pg_init:
> /*
> * hyperv_init() is called before LAPIC is initialized: see
> * apic_intr_mode_init() -> x86_platform.apic_post_init() and
> @@ -604,6 +609,9 @@ bool hv_is_hyperv_initialized(void)
> if (x86_hyper_type != X86_HYPER_MS_HYPERV)
> return false;
>
> + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg.
> */
> + if (hv_isolation_type_tdx())
> + return true;
> /*
> * Verify that earlier initialization succeeded by checking
> * that the hypercall page is setup
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..07e4253b5809 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,20 @@ bool hv_isolation_type_tdx(void)
> {
> return static_branch_unlikely(&isolation_type_tdx);
> }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> +{
> + struct tdx_hypercall_args args = { };
> +
> + args.r10 = control;
> + args.rdx = param1;
> + args.r8 = param2;
> +
> + (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> + return args.r11;
> +}
> +EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
> #endif
>
> /*
> diff --git a/arch/x86/include/asm/mshyperv.h
> b/arch/x86/include/asm/mshyperv.h index 8a2cafec4675..a4d665472d9e 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -10,6 +10,7 @@
> #include <asm/nospec-branch.h>
> #include <asm/paravirt.h>
> #include <asm/mshyperv.h>
> +#include <asm/coco.h>
>
> union hv_ghcb;
>
> @@ -39,6 +40,12 @@ int hv_call_deposit_pages(int node, u64 partition_id,
> u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32
> acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32
> vp_index, u32 flags);
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +/*
> + * If the hypercall involves no input or output parameters, the
> hypervisor
> + * ignores the corresponding GPA pointer.
> + */
> static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output) {
> u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +53,10 @@ static inline u64 hv_do_hypercall(u64 control, void
> *input, void *output) u64 hv_status;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control,
> + cc_mkdec(input_address),
> + cc_mkdec(output_address));
> if (!hv_hypercall_pg)
> return U64_MAX;
>
> @@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64
> input1) u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, 0);
> +
> {
> __asm__ __volatile__(CALL_NOSPEC
> : "=a" (hv_status),
> ASM_CALL_CONSTRAINT, @@ -114,6 +128,9 @@ static inline u64
> hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) u64 hv_status,
> control = (u64)code | HV_HYPERCALL_FAST_BIT;
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, input2);
> +
In some paths, for example vmbus_set_event(), choosing the SNP-based or
generic hypercall happens in the caller, while now TDX-based hypercall is
embraced in the generic hypercall path, e.g. hv_do_fast_hypercall8(). Which
style will be chosen in the future? Seems the coding structure needs to be
aligned.
void vmbus_set_event(struct vmbus_channel *channel)
{
u32 child_relid = channel->offermsg.child_relid;
if (!channel->is_dedicated_interrupt)
vmbus_send_interrupt(child_relid);
++channel->sig_events;
if (hv_isolation_type_snp())
hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
NULL, sizeof(channel->sig_event));
else
hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT,
channel->sig_event);
}
> {
> __asm__ __volatile__("mov %4, %%r8\n"
> CALL_NOSPEC
> From: Zhi Wang <zhi.wang.linux@gmail.com>
> Sent: Friday, January 6, 2023 3:24 AM
> > @@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64
> > input1) u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> >
> > #ifdef CONFIG_X86_64
> > + if (hv_isolation_type_tdx())
> > + return hv_tdx_hypercall(control, input1, 0);
> > +
> > {
> > __asm__ __volatile__(CALL_NOSPEC
> > : "=a" (hv_status),
> > ASM_CALL_CONSTRAINT, @@ -114,6 +128,9 @@ static inline u64
> > hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) u64 hv_status,
> > control = (u64)code | HV_HYPERCALL_FAST_BIT;
> > #ifdef CONFIG_X86_64
> > + if (hv_isolation_type_tdx())
> > + return hv_tdx_hypercall(control, input1, input2);
> > +
> In some paths, for example vmbus_set_event(), choosing the SNP-based or
In a SNP guest with pavavisor on Hyper-V, hv_ghcb_hypercall() is called in
only two places: vmbus_set_event() and hv_post_message(), where the
hypercalls, which are done via GHCB, need to be handled by the Hyper-V
hypervisor directly; in other places, the hypercalls, which are done via the
hypercall page, need to be handled by the pavavisor. That's to say, there
are 2 different kinds of hypercalls for a SNP guest with pavavisor on Hyper-V,
and hence we have to use 2 styles.
> generic hypercall happens in the caller, while now TDX-based hypercall is
> embraced in the generic hypercall path, e.g. hv_do_fast_hypercall8(). Which
> style will be chosen in the future? Seems the coding structure needs to be
> aligned.
For a TDX guest without pavavisor on Hyper-V, there is only one style of
hypercalls, so I make the change in hv_do_hypercall() and
hv_do_fast_hypercall*() directly.
I don't think we can make any clean-up changes right now. When we
support the TDX guest with pavavisor on Hyper-V, we'll figure out if we
can make any improvement.
> void vmbus_set_event(struct vmbus_channel *channel)
> {
> u32 child_relid = channel->offermsg.child_relid;
>
> if (!channel->is_dedicated_interrupt)
> vmbus_send_interrupt(child_relid);
>
> ++channel->sig_events;
>
> if (hv_isolation_type_snp())
> hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT,
> &channel->sig_event,
> NULL, sizeof(channel->sig_event));
> else
> hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT,
> channel->sig_event);
> }
>
>
> > {
> > __asm__ __volatile__("mov %4, %%r8\n"
> > CALL_NOSPEC
@@ -430,6 +430,10 @@ void __init hyperv_init(void)
/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
+ /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+ if (hv_isolation_type_tdx())
+ goto skip_hypercall_pg_init;
+
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
@@ -469,6 +473,7 @@ void __init hyperv_init(void)
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}
+skip_hypercall_pg_init:
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -604,6 +609,9 @@ bool hv_is_hyperv_initialized(void)
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return false;
+ /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+ if (hv_isolation_type_tdx())
+ return true;
/*
* Verify that earlier initialization succeeded by checking
* that the hypercall page is setup
@@ -276,6 +276,20 @@ bool hv_isolation_type_tdx(void)
{
return static_branch_unlikely(&isolation_type_tdx);
}
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+ struct tdx_hypercall_args args = { };
+
+ args.r10 = control;
+ args.rdx = param1;
+ args.r8 = param2;
+
+ (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+ return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
#endif
/*
@@ -10,6 +10,7 @@
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/mshyperv.h>
+#include <asm/coco.h>
union hv_ghcb;
@@ -39,6 +40,12 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +53,10 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;
#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control,
+ cc_mkdec(input_address),
+ cc_mkdec(output_address));
if (!hv_hypercall_pg)
return U64_MAX;
@@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, 0);
+
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +128,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, input2);
+
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC