[6/6] Drivers: hv: vmbus: Support TDX guests
Commit Message
Intel folks added the generic code to support a TDX guest in April, 2022.
This commit and some earlier commits from me add the Hyper-V specific
code so that a TDX guest can run on Hyper-V.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
arch/x86/hyperv/hv_init.c | 19 +++++++++++++++----
arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++++
arch/x86/mm/pat/set_memory.c | 2 +-
drivers/hv/connection.c | 4 +++-
drivers/hv/hv.c | 25 +++++++++++++++++++++++++
drivers/hv/ring_buffer.c | 2 +-
6 files changed, 55 insertions(+), 7 deletions(-)
Comments
On Mon, 21 Nov 2022 11:51:51 -0800
Dexuan Cui <decui@microsoft.com> wrote:
> Intel folks added the generic code to support a TDX guest in April, 2022.
> This commit and some earlier commits from me add the Hyper-V specific
> code so that a TDX guest can run on Hyper-V.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> arch/x86/hyperv/hv_init.c | 19 +++++++++++++++----
> arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++++
> arch/x86/mm/pat/set_memory.c | 2 +-
> drivers/hv/connection.c | 4 +++-
> drivers/hv/hv.c | 25 +++++++++++++++++++++++++
> drivers/hv/ring_buffer.c | 2 +-
> 6 files changed, 55 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 05682c4e327f..694f7fb04e5d 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
> static int hv_cpu_init(unsigned int cpu)
> {
> union hv_vp_assist_msr_contents msr = { 0 };
> - struct hv_vp_assist_page **hvp =
> &hv_vp_assist_page[smp_processor_id()];
> + struct hv_vp_assist_page **hvp;
> int ret;
>
> ret = hv_common_cpu_init(cpu);
> @@ -87,6 +87,7 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> + hvp = &hv_vp_assist_page[smp_processor_id()];
> if (!*hvp) {
> if (hv_root_partition) {
> /*
> @@ -398,11 +399,21 @@ void __init hyperv_init(void)
> if (hv_common_init())
> return;
>
> - hv_vp_assist_page = kcalloc(num_possible_cpus(),
> - sizeof(*hv_vp_assist_page),
> GFP_KERNEL);
> + /*
> + * The VP assist page is useless to a TDX guest: the only use we
> + * would have for it is lazy EOI, which can not be used with
> TDX.
> + */
> + if (hv_isolation_type_tdx())
> + hv_vp_assist_page = NULL;
> + else
> + hv_vp_assist_page = kcalloc(num_possible_cpus(),
> + sizeof(*hv_vp_assist_page),
> + GFP_KERNEL);
> if (!hv_vp_assist_page) {
> ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> - goto common_free;
> +
> + if (!hv_isolation_type_tdx())
> + goto common_free;
> }
>
> if (hv_isolation_type_snp()) {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c
> b/arch/x86/kernel/cpu/mshyperv.c index dddccdbc5526..6f597b23ad3e 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -350,7 +350,17 @@ static void __init ms_hyperv_init_platform(void)
> case HV_ISOLATION_TYPE_TDX:
> static_branch_enable(&isolation_type_tdx);
>
> + cc_set_vendor(CC_VENDOR_INTEL);
> +
> ms_hyperv.shared_gpa_boundary =
> cc_mkdec(0); +
> + /* Don't use the unsafe Hyper-V TSC
> page */
> + ms_hyperv.features &=
> + ~HV_MSR_REFERENCE_TSC_AVAILABLE;
> +
> + /* HV_REGISTER_CRASH_CTL is unsupported
> */
> + ms_hyperv.misc_features &=
> +
> ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; break;
>
> default:
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 2e5a045731de..bb44aaddb230 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long
> addr, int numpages, bool enc)
> static int __set_memory_enc_dec(unsigned long addr, int numpages, bool
> enc) {
> - if (hv_is_isolation_supported())
> + if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
> return hv_set_mem_host_visibility(addr, numpages, !enc);
>
> if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
Let's say there will be four cases:
----
case a. SEV-SNP guest with paravisor
In the code, this case is represented by:
hv_is_isolation_supported() && hv_isolation_type_snp()
hv_is_isolation_supported() && !hv_isolation_type_tdx()
case b. TDX guest with paravisor
?
case c. SEV-SNP guest *without* paravisor
?
case d. TDX guest *without* paravisor
In the code, this case is represented by:
hv_is_isolation_supported() && hv_isolation_type_tdx()
----
1. It would be better to use "hv_is_isolation_supported() &&
hv_isolation_type_snp()" to represent case a to avoid confusion in the
above patch.
2. For now, hv_is_isolation_supported() only shows if the guest is a CC
guest or not. hv_isolation_type_*() only represent SNP or TDX but
not "w/ or w/o paravisor".
How are you going to represent case b and c in __set_memory_enc_dec()?
I think you are looking for something to show if the guest is running
with a paravisor or not here:
if (hv_is_isolation_supported() && hv_is_isolation_with_paravisor())
...
Thanks,
Zhi.
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 9dc27e5d367a..1ecc3c29e3f7 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -250,12 +250,14 @@ int vmbus_connect(void)
> * Isolation VM with AMD SNP needs to access monitor
> page via
> * address space above shared gpa boundary.
> */
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_isolation_type_tdx())
> { vmbus_connection.monitor_pages_pa[0] +=
> ms_hyperv.shared_gpa_boundary;
> vmbus_connection.monitor_pages_pa[1] +=
> ms_hyperv.shared_gpa_boundary;
> + }
>
> + if (hv_isolation_type_snp()) {
> vmbus_connection.monitor_pages[0]
> =
> memremap(vmbus_connection.monitor_pages_pa[0], HV_HYP_PAGE_SIZE,
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..03b3257bc1ab 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -18,6 +18,7 @@
> #include <linux/clockchips.h>
> #include <linux/delay.h>
> #include <linux/interrupt.h>
> +#include <linux/set_memory.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> #include "hyperv_vmbus.h"
> @@ -119,6 +120,7 @@ int hv_synic_alloc(void)
> {
> int cpu;
> struct hv_per_cpu_context *hv_cpu;
> + int ret;
>
> /*
> * First, zero all per-cpu memory areas so hv_synic_free() can
> @@ -168,6 +170,21 @@ int hv_synic_alloc(void)
> pr_err("Unable to allocate post msg page\n");
> goto err;
> }
> +
> +
> + if (hv_isolation_type_tdx()) {
> + ret = set_memory_decrypted(
> + (unsigned
> long)hv_cpu->synic_message_page, 1);
> + BUG_ON(ret);
> +
> + ret = set_memory_decrypted(
> + (unsigned
> long)hv_cpu->synic_event_page, 1);
> + BUG_ON(ret);
> +
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->post_msg_page,
> 1);
> + BUG_ON(ret);
> + }
> }
>
> return 0;
> @@ -225,6 +242,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> } else {
> simp.base_simp_gpa =
> virt_to_phys(hv_cpu->synic_message_page)
> >> HV_HYP_PAGE_SHIFT;
> +
> + if (hv_isolation_type_tdx())
> + simp.base_simp_gpa +=
> ms_hyperv.shared_gpa_boundary
> + >> HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> @@ -243,6 +264,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> } else {
> siefp.base_siefp_gpa =
> virt_to_phys(hv_cpu->synic_event_page)
> >> HV_HYP_PAGE_SHIFT;
> +
> + if (hv_isolation_type_tdx())
> + siefp.base_siefp_gpa +=
> ms_hyperv.shared_gpa_boundary
> + >> HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
> index c6692fd5ab15..a51da82316ce 100644
> --- a/drivers/hv/ring_buffer.c
> +++ b/drivers/hv/ring_buffer.c
> @@ -233,7 +233,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info
> *ring_info,
> ring_info->ring_buffer = (struct hv_ring_buffer *)
> vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
> - PAGE_KERNEL);
> + pgprot_decrypted(PAGE_KERNEL_NOENC));
>
> kfree(pages_wraparound);
> if (!ring_info->ring_buffer)
> From: Zhi Wang <zhi.wang.linux@gmail.com>
> Sent: Friday, January 6, 2023 3:00 AM
> > diff --git a/arch/x86/mm/pat/set_memory.c
> > @@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned
> long
> > addr, int numpages, bool enc)
> > static int __set_memory_enc_dec(unsigned long addr, int numpages, bool
> > enc) {
> > - if (hv_is_isolation_supported())
> > + if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
> > return hv_set_mem_host_visibility(addr, numpages, !enc);
> >
> > if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
The change here is kind of a hack to not call hv_set_mem_host_visibility()
for TDX guests on Hyper-V. The original change was also a hack to me to
call hv_set_mem_host_visibility() for SNP guests with pavavisor on Hyper-V.
> Let's say there will be four cases:
> ----
> case a. SEV-SNP guest with paravisor
>
> In the code, this case is represented by:
>
> hv_is_isolation_supported() && hv_isolation_type_snp()
> hv_is_isolation_supported() && !hv_isolation_type_tdx()
These look bad to me...
> case b. TDX guest with paravisor
> ?
As of now, this is not supported yet. I'll need to figure out how exactly
this scenario will look like.
> case c. SEV-SNP guest *without* paravisor
> ?
Tianyu Lan is working on this:
https://lwn.net/ml/linux-kernel/20221119034633.1728632-1-ltykernel@gmail.com/
set_memory_decrypted() calls __set_memory_enc_dec() directly. This
is the same as a SNP guest running on KVM.
> case d. TDX guest *without* paravisor
>
> In the code, this case is represented by:
>
> hv_is_isolation_supported() && hv_isolation_type_tdx()
This looks bad to me...
> ----
>
> 1. It would be better to use "hv_is_isolation_supported() &&
> hv_isolation_type_snp()" to represent case a to avoid confusion in the
> above patch.
>
> 2. For now, hv_is_isolation_supported() only shows if the guest is a CC
> guest or not. hv_isolation_type_*() only represent SNP or TDX but
> not "w/ or w/o paravisor".
>
> How are you going to represent case b and c in __set_memory_enc_dec()?
>
> I think you are looking for something to show if the guest is running
> with a paravisor or not here:
>
> if (hv_is_isolation_supported() && hv_is_isolation_with_paravisor())
> ...
>
> Thanks,
> Zhi.
Michael's patchset removes the special path for SNP with pavavisor on Hyper-V:
https://lwn.net/ml/linux-kernel/1669951831-4180-7-git-send-email-mikelley%40microsoft.com/
With Michael's patchset, I don't need the change to __set_memory_enc_dec()
at all. The plan was that Michael's patchset would be merged into the upstream
first and I would rebase my TDX patchset accordingly, but Michael's patchset
has been pending for almost 2 months...
so I probably need to post v3 with the below version, which looks a little
better to me because it hides the Hyper-V specific logic in a Hyper-V specific
file arch/x86/hyperv/ivm.c, and if necessary we can change the implementation
of hv_set_memory_enc_dec_needed() in future, e.g. Tianyu can change
hv_set_memory_enc_dec_needed() to distinguish between SNP with pavavisor
and SNP without pavavisor. Of course, I still hope Michael's patchset would
be merged soon so I can avoid this kind of mess...
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 07e4253b5809..4398042f10d5 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -258,6 +258,11 @@ bool hv_is_isolation_supported(void)
return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
}
+bool hv_set_memory_enc_dec_needed(void)
+{
+ return hv_is_isolation_supported() && !hv_isolation_type_tdx();
+}
+
DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
/*
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731de..5892196f8ade 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (hv_is_isolation_supported())
+ if (hv_set_memory_enc_dec_needed())
return hv_set_mem_host_visibility(addr, numpages, !enc);
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index a9a03ab04b97..192dcf295dfc 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -262,6 +262,12 @@ bool __weak hv_is_isolation_supported(void)
}
EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+bool __weak hv_set_memory_enc_dec_needed(void)
+{
+ return false;
+}
+EXPORT_SYMBOL_GPL(hv_set_memory_enc_dec_needed);
+
bool __weak hv_isolation_type_snp(void)
{
return false;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..b7b1b18c9854 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -262,6 +262,7 @@ bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
+bool hv_set_memory_enc_dec_needed(void);
bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
@@ -274,6 +275,7 @@ static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline bool hv_is_isolation_supported(void) { return false; }
+static inline bool hv_set_memory_enc_dec_needed(void) { return false; }
static inline enum hv_isolation_type hv_get_isolation_type(void)
{
return HV_ISOLATION_TYPE_NONE;
@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu)
{
union hv_vp_assist_msr_contents msr = { 0 };
- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+ struct hv_vp_assist_page **hvp;
int ret;
ret = hv_common_cpu_init(cpu);
@@ -87,6 +87,7 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
+ hvp = &hv_vp_assist_page[smp_processor_id()];
if (!*hvp) {
if (hv_root_partition) {
/*
@@ -398,11 +399,21 @@ void __init hyperv_init(void)
if (hv_common_init())
return;
- hv_vp_assist_page = kcalloc(num_possible_cpus(),
- sizeof(*hv_vp_assist_page), GFP_KERNEL);
+ /*
+ * The VP assist page is useless to a TDX guest: the only use we
+ * would have for it is lazy EOI, which can not be used with TDX.
+ */
+ if (hv_isolation_type_tdx())
+ hv_vp_assist_page = NULL;
+ else
+ hv_vp_assist_page = kcalloc(num_possible_cpus(),
+ sizeof(*hv_vp_assist_page),
+ GFP_KERNEL);
if (!hv_vp_assist_page) {
ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
- goto common_free;
+
+ if (!hv_isolation_type_tdx())
+ goto common_free;
}
if (hv_isolation_type_snp()) {
@@ -350,7 +350,17 @@ static void __init ms_hyperv_init_platform(void)
case HV_ISOLATION_TYPE_TDX:
static_branch_enable(&isolation_type_tdx);
+ cc_set_vendor(CC_VENDOR_INTEL);
+
ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
+
+ /* Don't use the unsafe Hyper-V TSC page */
+ ms_hyperv.features &=
+ ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+
+ /* HV_REGISTER_CRASH_CTL is unsupported */
+ ms_hyperv.misc_features &=
+ ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
break;
default:
@@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (hv_is_isolation_supported())
+ if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
return hv_set_mem_host_visibility(addr, numpages, !enc);
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
@@ -250,12 +250,14 @@ int vmbus_connect(void)
* Isolation VM with AMD SNP needs to access monitor page via
* address space above shared gpa boundary.
*/
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_isolation_type_tdx()) {
vmbus_connection.monitor_pages_pa[0] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages_pa[1] +=
ms_hyperv.shared_gpa_boundary;
+ }
+ if (hv_isolation_type_snp()) {
vmbus_connection.monitor_pages[0]
= memremap(vmbus_connection.monitor_pages_pa[0],
HV_HYP_PAGE_SIZE,
@@ -18,6 +18,7 @@
#include <linux/clockchips.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/set_memory.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -119,6 +120,7 @@ int hv_synic_alloc(void)
{
int cpu;
struct hv_per_cpu_context *hv_cpu;
+ int ret;
/*
* First, zero all per-cpu memory areas so hv_synic_free() can
@@ -168,6 +170,21 @@ int hv_synic_alloc(void)
pr_err("Unable to allocate post msg page\n");
goto err;
}
+
+
+ if (hv_isolation_type_tdx()) {
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_message_page, 1);
+ BUG_ON(ret);
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ BUG_ON(ret);
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->post_msg_page, 1);
+ BUG_ON(ret);
+ }
}
return 0;
@@ -225,6 +242,10 @@ void hv_synic_enable_regs(unsigned int cpu)
} else {
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> HV_HYP_PAGE_SHIFT;
+
+ if (hv_isolation_type_tdx())
+ simp.base_simp_gpa += ms_hyperv.shared_gpa_boundary
+ >> HV_HYP_PAGE_SHIFT;
}
hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
@@ -243,6 +264,10 @@ void hv_synic_enable_regs(unsigned int cpu)
} else {
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> HV_HYP_PAGE_SHIFT;
+
+ if (hv_isolation_type_tdx())
+ siefp.base_siefp_gpa += ms_hyperv.shared_gpa_boundary
+ >> HV_HYP_PAGE_SHIFT;
}
hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
@@ -233,7 +233,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
- PAGE_KERNEL);
+ pgprot_decrypted(PAGE_KERNEL_NOENC));
kfree(pages_wraparound);
if (!ring_info->ring_buffer)