[RFC,V3,10/16] x86/hyperv: Add smp support for sev-snp guest
Commit Message
From: Tianyu Lan <tiala@microsoft.com>
The wakeup_secondary_cpu callback was populated with wakeup_
cpu_via_vmgexit() which doesn't work for Hyper-V. Override it
with Hyper-V specific hook which uses HVCALL_START_VIRTUAL_
PROCESSOR hvcall to start AP with vmsa data structure.
Signed-off-by: Tianyu Lan <tiala@microsoft.com>
---
Change since RFC v2:
* Add helper function to initialize segment
* Fix some coding style
---
arch/x86/include/asm/mshyperv.h | 2 +
arch/x86/include/asm/sev.h | 13 ++++
arch/x86/include/asm/svm.h | 47 +++++++++++++
arch/x86/kernel/cpu/mshyperv.c | 112 ++++++++++++++++++++++++++++--
include/asm-generic/hyperv-tlfs.h | 19 +++++
5 files changed, 189 insertions(+), 4 deletions(-)
Comments
On 1/21/23 20:46, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
>
> The wakeup_secondary_cpu callback was populated with wakeup_
> cpu_via_vmgexit() which doesn't work for Hyper-V. Override it
An explanation as to why is doesn't work would be nice here.
> with Hyper-V specific hook which uses HVCALL_START_VIRTUAL_
> PROCESSOR hvcall to start AP with vmsa data structure.
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since RFC v2:
> * Add helper function to initialize segment
> * Fix some coding style
> ---
> arch/x86/include/asm/mshyperv.h | 2 +
> arch/x86/include/asm/sev.h | 13 ++++
> arch/x86/include/asm/svm.h | 47 +++++++++++++
> arch/x86/kernel/cpu/mshyperv.c | 112 ++++++++++++++++++++++++++++--
> include/asm-generic/hyperv-tlfs.h | 19 +++++
> 5 files changed, 189 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 7266d71d30d6..c69051eec0e1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -203,6 +203,8 @@ struct irq_domain *hv_create_pci_msi_domain(void);
> int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
> struct hv_interrupt_entry *entry);
> int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
> +int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
> +int hv_snp_boot_ap(int cpu, unsigned long start_ip);
>
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> void hv_ghcb_msr_write(u64 msr, u64 value);
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index ebc271bb6d8e..e34aaf730220 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -86,6 +86,19 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>
> #define RMPADJUST_VMSA_PAGE_BIT BIT(16)
>
> +union sev_rmp_adjust {
> + u64 as_uint64;
> + struct {
> + unsigned long target_vmpl : 8;
> + unsigned long enable_read : 1;
> + unsigned long enable_write : 1;
> + unsigned long enable_user_execute : 1;
> + unsigned long enable_kernel_execute : 1;
> + unsigned long reserved1 : 4;
> + unsigned long vmsa : 1;
> + };
> +};
> +
> /* SNP Guest message request */
> struct snp_req_data {
> unsigned long req_gpa;
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index cb1ee53ad3b1..f8b321a11ee4 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -336,6 +336,53 @@ struct vmcb_save_area {
Please don't update the vmcb_save_area, you should be using/updating the
sev_es_save_area structure for SNP.
> u64 last_excp_to;
> u8 reserved_0x298[72];
> u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
> + u8 reserved_7b[4];
> + u32 pkru;
> + u8 reserved_7a[20];
> + u64 reserved_8; /* rax already available at 0x01f8 */
> + u64 rcx;
> + u64 rdx;
> + u64 rbx;
> + u64 reserved_9; /* rsp already available at 0x01d8 */
> + u64 rbp;
> + u64 rsi;
> + u64 rdi;
> + u64 r8;
> + u64 r9;
> + u64 r10;
> + u64 r11;
> + u64 r12;
> + u64 r13;
> + u64 r14;
> + u64 r15;
> + u8 reserved_10[16];
> + u64 sw_exit_code;
> + u64 sw_exit_info_1;
> + u64 sw_exit_info_2;
> + u64 sw_scratch;
> + union {
> + u64 sev_features;
> + struct {
> + u64 sev_feature_snp : 1;
> + u64 sev_feature_vtom : 1;
> + u64 sev_feature_reflectvc : 1;
> + u64 sev_feature_restrict_injection : 1;
> + u64 sev_feature_alternate_injection : 1;
> + u64 sev_feature_full_debug : 1;
> + u64 sev_feature_reserved1 : 1;
> + u64 sev_feature_snpbtb_isolation : 1;
> + u64 sev_feature_resrved2 : 56;
For the bits definition, use:
u64 sev_feature_snp : 1,
sev_feature_vtom : 1,
sev_feature_reflectvc : 1,
...
Thanks,
Tom
> + };
> + };
> + u64 vintr_ctrl;
> + u64 guest_error_code;
> + u64 virtual_tom;
> + u64 tlb_id;
> + u64 pcpu_id;
> + u64 event_inject;
> + u64 xcr0;
> + u8 valid_bitmap[16];
> + u64 x87_state_gpa;
> } __packed;
>
> /* Save area definition for SEV-ES and SEV-SNP guests */
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 197c8f2ec4eb..9d547751a1a7 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -39,6 +39,13 @@
> #include <asm/realmode.h>
> #include <asm/e820/api.h>
>
> +/*
> + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> + * to start AP in enlightened SEV guest.
> + */
> +#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
> +#define HV_AP_SEGMENT_LIMIT 0xffffffff
> +
> /* Is Linux running as the root partition? */
> bool hv_root_partition;
> struct ms_hyperv_info ms_hyperv;
> @@ -230,6 +237,94 @@ static void __init hv_smp_prepare_boot_cpu(void)
> #endif
> }
>
> +static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
> +static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
> +
> +#define hv_populate_vmcb_seg(seg, gdtr_base) \
> +do { \
> + if (seg.selector) { \
> + seg.base = 0; \
> + seg.limit = HV_AP_SEGMENT_LIMIT; \
> + seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \
> + seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
> + } \
> +} while (0) \
> +
> +int hv_snp_boot_ap(int cpu, unsigned long start_ip)
> +{
> + struct vmcb_save_area *vmsa = (struct vmcb_save_area *)
> + __get_free_page(GFP_KERNEL | __GFP_ZERO);
> + struct desc_ptr gdtr;
> + u64 ret, retry = 5;
> + struct hv_start_virtual_processor_input *start_vp_input;
> + union sev_rmp_adjust rmp_adjust;
> + unsigned long flags;
> +
> + native_store_gdt(&gdtr);
> +
> + vmsa->gdtr.base = gdtr.address;
> + vmsa->gdtr.limit = gdtr.size;
> +
> + asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
> + hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
> +
> + asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
> + hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
> +
> + asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
> + hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
> +
> + asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
> + hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
> +
> + vmsa->efer = native_read_msr(MSR_EFER);
> +
> + asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
> + asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
> + asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
> +
> + vmsa->xcr0 = 1;
> + vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
> + vmsa->rip = (u64)secondary_startup_64_no_verify;
> + vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
> +
> + vmsa->sev_feature_snp = 1;
> + vmsa->sev_feature_restrict_injection = 1;
> +
> + rmp_adjust.as_uint64 = 0;
> + rmp_adjust.target_vmpl = 1;
> + rmp_adjust.vmsa = 1;
> + ret = rmpadjust((unsigned long)vmsa, RMP_PG_SIZE_4K,
> + rmp_adjust.as_uint64);
> + if (ret != 0) {
> + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
> + return ret;
> + }
> +
> + local_irq_save(flags);
> + start_vp_input =
> + (struct hv_start_virtual_processor_input *)ap_start_input_arg;
> + memset(start_vp_input, 0, sizeof(*start_vp_input));
> + start_vp_input->partitionid = -1;
> + start_vp_input->vpindex = cpu;
> + start_vp_input->targetvtl = ms_hyperv.vtl;
> + *(u64 *)&start_vp_input->context[0] = __pa(vmsa) | 1;
> +
> + do {
> + ret = hv_do_hypercall(HVCALL_START_VIRTUAL_PROCESSOR,
> + start_vp_input, NULL);
> + } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
> +
> + if (!hv_result_success(ret)) {
> + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
> + goto done;
> + }
> +
> +done:
> + local_irq_restore(flags);
> + return ret;
> +}
> +
> static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
> {
> #ifdef CONFIG_X86_64
> @@ -239,6 +334,16 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
>
> native_smp_prepare_cpus(max_cpus);
>
> + /*
> + * Override wakeup_secondary_cpu callback for SEV-SNP
> + * enlightened guest.
> + */
> + if (hv_isolation_type_en_snp())
> + apic->wakeup_secondary_cpu = hv_snp_boot_ap;
> +
> + if (!hv_root_partition)
> + return;
> +
> #ifdef CONFIG_X86_64
> for_each_present_cpu(i) {
> if (i == 0)
> @@ -475,8 +580,7 @@ static void __init ms_hyperv_init_platform(void)
>
> # ifdef CONFIG_SMP
> smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
> - if (hv_root_partition)
> - smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
> + smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
> # endif
>
> /*
> @@ -501,7 +605,7 @@ static void __init ms_hyperv_init_platform(void)
> if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
> mark_tsc_unstable("running on Hyper-V");
>
> - if (isolation_type_en_snp()) {
> + if (hv_isolation_type_en_snp()) {
> /*
> * Hyper-V enlightened snp guest boots kernel
> * directly without bootloader and so roms,
> @@ -511,7 +615,7 @@ static void __init ms_hyperv_init_platform(void)
> x86_platform.legacy.rtc = 0;
> x86_platform.set_wallclock = set_rtc_noop;
> x86_platform.get_wallclock = get_rtc_noop;
> - x86_platform.legacy.reserve_bios_regions = x86_init_noop;
> + x86_platform.legacy.reserve_bios_regions = 0;
> x86_init.resources.probe_roms = x86_init_noop;
> x86_init.resources.reserve_resources = x86_init_noop;
> x86_init.mpparse.find_smp_config = x86_init_noop;
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index c1cc3ec36ad5..3d7c67be9f56 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -148,6 +148,7 @@ union hv_reference_tsc_msr {
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
> #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
> #define HVCALL_SEND_IPI 0x000b
> +#define HVCALL_ENABLE_VP_VTL 0x000f
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
> #define HVCALL_SEND_IPI_EX 0x0015
> @@ -165,6 +166,7 @@ union hv_reference_tsc_msr {
> #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
> #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
> #define HVCALL_RETARGET_INTERRUPT 0x007e
> +#define HVCALL_START_VIRTUAL_PROCESSOR 0x0099
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
> @@ -219,6 +221,7 @@ enum HV_GENERIC_SET_FORMAT {
> #define HV_STATUS_INVALID_PORT_ID 17
> #define HV_STATUS_INVALID_CONNECTION_ID 18
> #define HV_STATUS_INSUFFICIENT_BUFFERS 19
> +#define HV_STATUS_TIME_OUT 0x78
>
> /*
> * The Hyper-V TimeRefCount register and the TSC
> @@ -778,6 +781,22 @@ struct hv_input_unmap_device_interrupt {
> struct hv_interrupt_entry interrupt_entry;
> } __packed;
>
> +struct hv_enable_vp_vtl_input {
> + u64 partitionid;
> + u32 vpindex;
> + u8 targetvtl;
> + u8 padding[3];
> + u8 context[0xe0];
> +} __packed;
> +
> +struct hv_start_virtual_processor_input {
> + u64 partitionid;
> + u32 vpindex;
> + u8 targetvtl;
> + u8 padding[3];
> + u8 context[0xe0];
> +} __packed;
> +
> #define HV_SOURCE_SHADOW_NONE 0x0
> #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1
>
From: Tianyu Lan <ltykernel@gmail.com> Sent: Saturday, January 21, 2023 6:46 PM
>
> The wakeup_secondary_cpu callback was populated with wakeup_
> cpu_via_vmgexit() which doesn't work for Hyper-V. Override it
> with Hyper-V specific hook which uses HVCALL_START_VIRTUAL_
> PROCESSOR hvcall to start AP with vmsa data structure.
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since RFC v2:
> * Add helper function to initialize segment
> * Fix some coding style
> ---
> arch/x86/include/asm/mshyperv.h | 2 +
> arch/x86/include/asm/sev.h | 13 ++++
> arch/x86/include/asm/svm.h | 47 +++++++++++++
> arch/x86/kernel/cpu/mshyperv.c | 112 ++++++++++++++++++++++++++++--
> include/asm-generic/hyperv-tlfs.h | 19 +++++
> 5 files changed, 189 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 7266d71d30d6..c69051eec0e1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -203,6 +203,8 @@ struct irq_domain *hv_create_pci_msi_domain(void);
> int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
> struct hv_interrupt_entry *entry);
> int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
> +int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
> +int hv_snp_boot_ap(int cpu, unsigned long start_ip);
>
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> void hv_ghcb_msr_write(u64 msr, u64 value);
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index ebc271bb6d8e..e34aaf730220 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -86,6 +86,19 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>
> #define RMPADJUST_VMSA_PAGE_BIT BIT(16)
>
> +union sev_rmp_adjust {
> + u64 as_uint64;
> + struct {
> + unsigned long target_vmpl : 8;
> + unsigned long enable_read : 1;
> + unsigned long enable_write : 1;
> + unsigned long enable_user_execute : 1;
> + unsigned long enable_kernel_execute : 1;
> + unsigned long reserved1 : 4;
> + unsigned long vmsa : 1;
> + };
> +};
> +
> /* SNP Guest message request */
> struct snp_req_data {
> unsigned long req_gpa;
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index cb1ee53ad3b1..f8b321a11ee4 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -336,6 +336,53 @@ struct vmcb_save_area {
> u64 last_excp_to;
> u8 reserved_0x298[72];
> u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
> + u8 reserved_7b[4];
> + u32 pkru;
> + u8 reserved_7a[20];
> + u64 reserved_8; /* rax already available at 0x01f8 */
> + u64 rcx;
> + u64 rdx;
> + u64 rbx;
> + u64 reserved_9; /* rsp already available at 0x01d8 */
> + u64 rbp;
> + u64 rsi;
> + u64 rdi;
> + u64 r8;
> + u64 r9;
> + u64 r10;
> + u64 r11;
> + u64 r12;
> + u64 r13;
> + u64 r14;
> + u64 r15;
> + u8 reserved_10[16];
> + u64 sw_exit_code;
> + u64 sw_exit_info_1;
> + u64 sw_exit_info_2;
> + u64 sw_scratch;
> + union {
> + u64 sev_features;
> + struct {
> + u64 sev_feature_snp : 1;
> + u64 sev_feature_vtom : 1;
> + u64 sev_feature_reflectvc : 1;
> + u64 sev_feature_restrict_injection : 1;
> + u64 sev_feature_alternate_injection : 1;
> + u64 sev_feature_full_debug : 1;
> + u64 sev_feature_reserved1 : 1;
> + u64 sev_feature_snpbtb_isolation : 1;
> + u64 sev_feature_resrved2 : 56;
> + };
> + };
> + u64 vintr_ctrl;
> + u64 guest_error_code;
> + u64 virtual_tom;
> + u64 tlb_id;
> + u64 pcpu_id;
> + u64 event_inject;
> + u64 xcr0;
> + u8 valid_bitmap[16];
> + u64 x87_state_gpa;
> } __packed;
>
> /* Save area definition for SEV-ES and SEV-SNP guests */
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 197c8f2ec4eb..9d547751a1a7 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -39,6 +39,13 @@
> #include <asm/realmode.h>
> #include <asm/e820/api.h>
>
> +/*
> + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> + * to start AP in enlightened SEV guest.
> + */
> +#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
> +#define HV_AP_SEGMENT_LIMIT 0xffffffff
If these values are defined by Hyper-V, they should probably go in
hyperv-tlfs.h.
> +
> /* Is Linux running as the root partition? */
> bool hv_root_partition;
> struct ms_hyperv_info ms_hyperv;
> @@ -230,6 +237,94 @@ static void __init hv_smp_prepare_boot_cpu(void)
> #endif
> }
>
> +static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
> +static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
> +
> +#define hv_populate_vmcb_seg(seg, gdtr_base) \
> +do { \
> + if (seg.selector) { \
> + seg.base = 0; \
> + seg.limit = HV_AP_SEGMENT_LIMIT; \
> + seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \
> + seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
> + } \
> +} while (0) \
> +
> +int hv_snp_boot_ap(int cpu, unsigned long start_ip)
> +{
> + struct vmcb_save_area *vmsa = (struct vmcb_save_area *)
> + __get_free_page(GFP_KERNEL | __GFP_ZERO);
> + struct desc_ptr gdtr;
> + u64 ret, retry = 5;
> + struct hv_start_virtual_processor_input *start_vp_input;
> + union sev_rmp_adjust rmp_adjust;
> + unsigned long flags;
> +
> + native_store_gdt(&gdtr);
> +
> + vmsa->gdtr.base = gdtr.address;
> + vmsa->gdtr.limit = gdtr.size;
> +
> + asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
> + hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
> +
> + asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
> + hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
> +
> + asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
> + hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
> +
> + asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
> + hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
> +
> + vmsa->efer = native_read_msr(MSR_EFER);
> +
> + asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
> + asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
> + asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
> +
> + vmsa->xcr0 = 1;
> + vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
> + vmsa->rip = (u64)secondary_startup_64_no_verify;
> + vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
> +
> + vmsa->sev_feature_snp = 1;
> + vmsa->sev_feature_restrict_injection = 1;
> +
> + rmp_adjust.as_uint64 = 0;
> + rmp_adjust.target_vmpl = 1;
> + rmp_adjust.vmsa = 1;
> + ret = rmpadjust((unsigned long)vmsa, RMP_PG_SIZE_4K,
> + rmp_adjust.as_uint64);
> + if (ret != 0) {
> + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
> + return ret;
> + }
> +
> + local_irq_save(flags);
> + start_vp_input =
> + (struct hv_start_virtual_processor_input *)ap_start_input_arg;
> + memset(start_vp_input, 0, sizeof(*start_vp_input));
> + start_vp_input->partitionid = -1;
> + start_vp_input->vpindex = cpu;
> + start_vp_input->targetvtl = ms_hyperv.vtl;
> + *(u64 *)&start_vp_input->context[0] = __pa(vmsa) | 1;
> +
> + do {
> + ret = hv_do_hypercall(HVCALL_START_VIRTUAL_PROCESSOR,
> + start_vp_input, NULL);
> + } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
> +
> + if (!hv_result_success(ret)) {
> + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
> + goto done;
> + }
> +
> +done:
> + local_irq_restore(flags);
> + return ret;
> +}
> +
Like a comment in an earlier patch, I'm wondering if the bulk of
this code could move to ivm.c, to avoid overloading mshyperv.c.
> static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
> {
> #ifdef CONFIG_X86_64
> @@ -239,6 +334,16 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
>
> native_smp_prepare_cpus(max_cpus);
>
> + /*
> + * Override wakeup_secondary_cpu callback for SEV-SNP
> + * enlightened guest.
> + */
> + if (hv_isolation_type_en_snp())
> + apic->wakeup_secondary_cpu = hv_snp_boot_ap;
> +
> + if (!hv_root_partition)
> + return;
> +
> #ifdef CONFIG_X86_64
> for_each_present_cpu(i) {
> if (i == 0)
> @@ -475,8 +580,7 @@ static void __init ms_hyperv_init_platform(void)
>
> # ifdef CONFIG_SMP
> smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
> - if (hv_root_partition)
> - smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
> + smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
> # endif
>
> /*
> @@ -501,7 +605,7 @@ static void __init ms_hyperv_init_platform(void)
> if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
> mark_tsc_unstable("running on Hyper-V");
>
> - if (isolation_type_en_snp()) {
> + if (hv_isolation_type_en_snp()) {
Also a bug fix to an earlier patch in this series.
> /*
> * Hyper-V enlightened snp guest boots kernel
> * directly without bootloader and so roms,
> @@ -511,7 +615,7 @@ static void __init ms_hyperv_init_platform(void)
> x86_platform.legacy.rtc = 0;
> x86_platform.set_wallclock = set_rtc_noop;
> x86_platform.get_wallclock = get_rtc_noop;
> - x86_platform.legacy.reserve_bios_regions = x86_init_noop;
> + x86_platform.legacy.reserve_bios_regions = 0;
This looks like a bug fix to Patch 8 of the series. It should be fixed
in patch 8.
> x86_init.resources.probe_roms = x86_init_noop;
> x86_init.resources.reserve_resources = x86_init_noop;
> x86_init.mpparse.find_smp_config = x86_init_noop;
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index c1cc3ec36ad5..3d7c67be9f56 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -148,6 +148,7 @@ union hv_reference_tsc_msr {
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
> #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
> #define HVCALL_SEND_IPI 0x000b
> +#define HVCALL_ENABLE_VP_VTL 0x000f
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
> #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
> #define HVCALL_SEND_IPI_EX 0x0015
> @@ -165,6 +166,7 @@ union hv_reference_tsc_msr {
> #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
> #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
> #define HVCALL_RETARGET_INTERRUPT 0x007e
> +#define HVCALL_START_VIRTUAL_PROCESSOR 0x0099
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
> @@ -219,6 +221,7 @@ enum HV_GENERIC_SET_FORMAT {
> #define HV_STATUS_INVALID_PORT_ID 17
> #define HV_STATUS_INVALID_CONNECTION_ID 18
> #define HV_STATUS_INSUFFICIENT_BUFFERS 19
> +#define HV_STATUS_TIME_OUT 0x78
>
> /*
> * The Hyper-V TimeRefCount register and the TSC
> @@ -778,6 +781,22 @@ struct hv_input_unmap_device_interrupt {
> struct hv_interrupt_entry interrupt_entry;
> } __packed;
>
> +struct hv_enable_vp_vtl_input {
> + u64 partitionid;
> + u32 vpindex;
> + u8 targetvtl;
> + u8 padding[3];
> + u8 context[0xe0];
> +} __packed;
> +
> +struct hv_start_virtual_processor_input {
> + u64 partitionid;
> + u32 vpindex;
> + u8 targetvtl;
> + u8 padding[3];
> + u8 context[0xe0];
> +} __packed;
> +
> #define HV_SOURCE_SHADOW_NONE 0x0
> #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1
>
> --
> 2.25.1
On 2/1/2023 2:34 AM, Michael Kelley (LINUX) wrote:
>> + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
>> + goto done;
>> + }
>> +
>> +done:
>> + local_irq_restore(flags);
>> + return ret;
>> +}
>> +
> Like a comment in an earlier patch, I'm wondering if the bulk of
> this code could move to ivm.c, to avoid overloading mshyperv.c.
Sure. Will update in the next version.
>
On 1/23/2023 11:30 PM, Tom Lendacky wrote:
> On 1/21/23 20:46, Tianyu Lan wrote:
>> From: Tianyu Lan <tiala@microsoft.com>
>>
>> The wakeup_secondary_cpu callback was populated with wakeup_
>> cpu_via_vmgexit() which doesn't work for Hyper-V. Override it
>
> An explanation as to why is doesn't work would be nice here.
Hi Thomas:
Thanks for your review. Good idea. Will update.
>> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
>> index cb1ee53ad3b1..f8b321a11ee4 100644
>> --- a/arch/x86/include/asm/svm.h
>> +++ b/arch/x86/include/asm/svm.h
>> @@ -336,6 +336,53 @@ struct vmcb_save_area {
>
> Please don't update the vmcb_save_area, you should be using/updating the
> sev_es_save_area structure for SNP.
OK. Will update in the next version.
>> u64 sev_feature_restrict_injection : 1;
>> + u64 sev_feature_alternate_injection : 1;
>> + u64 sev_feature_full_debug : 1;
>> + u64 sev_feature_reserved1 : 1;
>> + u64 sev_feature_snpbtb_isolation : 1;
>> + u64 sev_feature_resrved2 : 56;
>
> For the bits definition, use:
>
> u64 sev_feature_snp : 1,
> sev_feature_vtom : 1,
> sev_feature_reflectvc : 1,
> ...
>
Good suggestion. Thanks.
On Fri, Feb 03, 2023 at 03:00:44PM +0800, Tianyu Lan wrote:
> > For the bits definition, use:
> >
> > u64 sev_feature_snp : 1,
> > sev_feature_vtom : 1,
> > sev_feature_reflectvc : 1,
> > ...
> >
>
> Good suggestion. Thanks.
Actually, I'd prefer if you used a named union and drop all this
"sev_feature_" prefixes everywhere:
union {
struct {
u64 snp : 1;
u64 vtom : 1;
u64 reflectvc : 1;
u64 restrict_injection : 1;
u64 alternate_injection : 1;
u64 full_debug : 1;
u64 reserved1 : 1;
u64 snpbtb_isolation : 1;
u64 resrved2 : 56;
};
u64 val;
} sev_features;
so that you can do in code:
struct sev_es_save_area *sev;
...
sev->sev_features.snp = ...
and so on.
Thx.
On 2/7/2023 4:11 AM, Borislav Petkov wrote:
> On Fri, Feb 03, 2023 at 03:00:44PM +0800, Tianyu Lan wrote:
>>> For the bits definition, use:
>>>
>>> u64 sev_feature_snp : 1,
>>> sev_feature_vtom : 1,
>>> sev_feature_reflectvc : 1,
>>> ...
>>>
>>
>> Good suggestion. Thanks.
>
> Actually, I'd prefer if you used a named union and drop all this
> "sev_feature_" prefixes everywhere:
>
> union {
> struct {
> u64 snp : 1;
> u64 vtom : 1;
> u64 reflectvc : 1;
> u64 restrict_injection : 1;
> u64 alternate_injection : 1;
> u64 full_debug : 1;
> u64 reserved1 : 1;
> u64 snpbtb_isolation : 1;
> u64 resrved2 : 56;
> };
> u64 val;
> } sev_features;
>
>
>
> so that you can do in code:
>
> struct sev_es_save_area *sev;
>
> ...
>
> sev->sev_features.snp = ...
>
> and so on.
Hi Boris:
Thanks a lot for your suggestion. Will update.
@@ -203,6 +203,8 @@ struct irq_domain *hv_create_pci_msi_domain(void);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
struct hv_interrupt_entry *entry);
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
+int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
+int hv_snp_boot_ap(int cpu, unsigned long start_ip);
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
@@ -86,6 +86,19 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+union sev_rmp_adjust {
+ u64 as_uint64;
+ struct {
+ unsigned long target_vmpl : 8;
+ unsigned long enable_read : 1;
+ unsigned long enable_write : 1;
+ unsigned long enable_user_execute : 1;
+ unsigned long enable_kernel_execute : 1;
+ unsigned long reserved1 : 4;
+ unsigned long vmsa : 1;
+ };
+};
+
/* SNP Guest message request */
struct snp_req_data {
unsigned long req_gpa;
@@ -336,6 +336,53 @@ struct vmcb_save_area {
u64 last_excp_to;
u8 reserved_0x298[72];
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
+ u8 reserved_7b[4];
+ u32 pkru;
+ u8 reserved_7a[20];
+ u64 reserved_8; /* rax already available at 0x01f8 */
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_9; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_10[16];
+ u64 sw_exit_code;
+ u64 sw_exit_info_1;
+ u64 sw_exit_info_2;
+ u64 sw_scratch;
+ union {
+ u64 sev_features;
+ struct {
+ u64 sev_feature_snp : 1;
+ u64 sev_feature_vtom : 1;
+ u64 sev_feature_reflectvc : 1;
+ u64 sev_feature_restrict_injection : 1;
+ u64 sev_feature_alternate_injection : 1;
+ u64 sev_feature_full_debug : 1;
+ u64 sev_feature_reserved1 : 1;
+ u64 sev_feature_snpbtb_isolation : 1;
+ u64 sev_feature_resrved2 : 56;
+ };
+ };
+ u64 vintr_ctrl;
+ u64 guest_error_code;
+ u64 virtual_tom;
+ u64 tlb_id;
+ u64 pcpu_id;
+ u64 event_inject;
+ u64 xcr0;
+ u8 valid_bitmap[16];
+ u64 x87_state_gpa;
} __packed;
/* Save area definition for SEV-ES and SEV-SNP guests */
@@ -39,6 +39,13 @@
#include <asm/realmode.h>
#include <asm/e820/api.h>
+/*
+ * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
+ * to start AP in enlightened SEV guest.
+ */
+#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
+#define HV_AP_SEGMENT_LIMIT 0xffffffff
+
/* Is Linux running as the root partition? */
bool hv_root_partition;
struct ms_hyperv_info ms_hyperv;
@@ -230,6 +237,94 @@ static void __init hv_smp_prepare_boot_cpu(void)
#endif
}
+static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+#define hv_populate_vmcb_seg(seg, gdtr_base) \
+do { \
+ if (seg.selector) { \
+ seg.base = 0; \
+ seg.limit = HV_AP_SEGMENT_LIMIT; \
+ seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \
+ seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \
+ } \
+} while (0) \
+
+int hv_snp_boot_ap(int cpu, unsigned long start_ip)
+{
+ struct vmcb_save_area *vmsa = (struct vmcb_save_area *)
+ __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ struct desc_ptr gdtr;
+ u64 ret, retry = 5;
+ struct hv_start_virtual_processor_input *start_vp_input;
+ union sev_rmp_adjust rmp_adjust;
+ unsigned long flags;
+
+ native_store_gdt(&gdtr);
+
+ vmsa->gdtr.base = gdtr.address;
+ vmsa->gdtr.limit = gdtr.size;
+
+ asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
+ hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
+
+ asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
+ hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
+
+ asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
+ hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
+
+ asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
+ hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
+
+ vmsa->efer = native_read_msr(MSR_EFER);
+
+ asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4));
+ asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3));
+ asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0));
+
+ vmsa->xcr0 = 1;
+ vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT;
+ vmsa->rip = (u64)secondary_startup_64_no_verify;
+ vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE];
+
+ vmsa->sev_feature_snp = 1;
+ vmsa->sev_feature_restrict_injection = 1;
+
+ rmp_adjust.as_uint64 = 0;
+ rmp_adjust.target_vmpl = 1;
+ rmp_adjust.vmsa = 1;
+ ret = rmpadjust((unsigned long)vmsa, RMP_PG_SIZE_4K,
+ rmp_adjust.as_uint64);
+ if (ret != 0) {
+ pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
+ return ret;
+ }
+
+ local_irq_save(flags);
+ start_vp_input =
+ (struct hv_start_virtual_processor_input *)ap_start_input_arg;
+ memset(start_vp_input, 0, sizeof(*start_vp_input));
+ start_vp_input->partitionid = -1;
+ start_vp_input->vpindex = cpu;
+ start_vp_input->targetvtl = ms_hyperv.vtl;
+ *(u64 *)&start_vp_input->context[0] = __pa(vmsa) | 1;
+
+ do {
+ ret = hv_do_hypercall(HVCALL_START_VIRTUAL_PROCESSOR,
+ start_vp_input, NULL);
+ } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--);
+
+ if (!hv_result_success(ret)) {
+ pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret);
+ goto done;
+ }
+
+done:
+ local_irq_restore(flags);
+ return ret;
+}
+
static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
{
#ifdef CONFIG_X86_64
@@ -239,6 +334,16 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
+ /*
+ * Override wakeup_secondary_cpu callback for SEV-SNP
+ * enlightened guest.
+ */
+ if (hv_isolation_type_en_snp())
+ apic->wakeup_secondary_cpu = hv_snp_boot_ap;
+
+ if (!hv_root_partition)
+ return;
+
#ifdef CONFIG_X86_64
for_each_present_cpu(i) {
if (i == 0)
@@ -475,8 +580,7 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
- if (hv_root_partition)
- smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
+ smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif
/*
@@ -501,7 +605,7 @@ static void __init ms_hyperv_init_platform(void)
if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
mark_tsc_unstable("running on Hyper-V");
- if (isolation_type_en_snp()) {
+ if (hv_isolation_type_en_snp()) {
/*
* Hyper-V enlightened snp guest boots kernel
* directly without bootloader and so roms,
@@ -511,7 +615,7 @@ static void __init ms_hyperv_init_platform(void)
x86_platform.legacy.rtc = 0;
x86_platform.set_wallclock = set_rtc_noop;
x86_platform.get_wallclock = get_rtc_noop;
- x86_platform.legacy.reserve_bios_regions = x86_init_noop;
+ x86_platform.legacy.reserve_bios_regions = 0;
x86_init.resources.probe_roms = x86_init_noop;
x86_init.resources.reserve_resources = x86_init_noop;
x86_init.mpparse.find_smp_config = x86_init_noop;
@@ -148,6 +148,7 @@ union hv_reference_tsc_msr {
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_ENABLE_VP_VTL 0x000f
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
#define HVCALL_SEND_IPI_EX 0x0015
@@ -165,6 +166,7 @@ union hv_reference_tsc_msr {
#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_START_VIRTUAL_PROCESSOR 0x0099
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
@@ -219,6 +221,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+#define HV_STATUS_TIME_OUT 0x78
/*
* The Hyper-V TimeRefCount register and the TSC
@@ -778,6 +781,22 @@ struct hv_input_unmap_device_interrupt {
struct hv_interrupt_entry interrupt_entry;
} __packed;
+struct hv_enable_vp_vtl_input {
+ u64 partitionid;
+ u32 vpindex;
+ u8 targetvtl;
+ u8 padding[3];
+ u8 context[0xe0];
+} __packed;
+
+struct hv_start_virtual_processor_input {
+ u64 partitionid;
+ u32 vpindex;
+ u8 targetvtl;
+ u8 padding[3];
+ u8 context[0xe0];
+} __packed;
+
#define HV_SOURCE_SHADOW_NONE 0x0
#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1