[RFC,V4,08/17] x86/hyperv: Initialize cpu and memory for sev-snp enlightened guest

Message ID 20230403174406.4180472-9-ltykernel@gmail.com
State New
Headers
Series x86/hyperv/sev: Add AMD sev-snp enlightened guest support on hyperv |

Commit Message

Tianyu Lan April 3, 2023, 5:43 p.m. UTC
  From: Tianyu Lan <tiala@microsoft.com>

Read processor amd memory info from specific address which are
populated by Hyper-V. Initialize smp cpu related ops, pvalidate
system memory and add it into e820 table.

Signed-off-by: Tianyu Lan <tiala@microsoft.com>
---
 arch/x86/hyperv/ivm.c           | 78 +++++++++++++++++++++++++++++++++
 arch/x86/include/asm/mshyperv.h | 16 +++++++
 arch/x86/kernel/cpu/mshyperv.c  |  3 ++
 3 files changed, 97 insertions(+)
  

Comments

Michael Kelley (LINUX) April 12, 2023, 2:39 p.m. UTC | #1
From: Tianyu Lan <ltykernel@gmail.com> Sent: Monday, April 3, 2023 10:44 AM
> 
> Read processor amd memory info from specific address which are
> populated by Hyper-V. Initialize smp cpu related ops, pvalidate
> system memory and add it into e820 table.
> 
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
>  arch/x86/hyperv/ivm.c           | 78 +++++++++++++++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h | 16 +++++++
>  arch/x86/kernel/cpu/mshyperv.c  |  3 ++
>  3 files changed, 97 insertions(+)
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 368b2731950e..fa4de2761460 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -17,6 +17,11 @@
>  #include <asm/mem_encrypt.h>
>  #include <asm/mshyperv.h>
>  #include <asm/hypervisor.h>
> +#include <asm/coco.h>
> +#include <asm/io_apic.h>
> +#include <asm/sev.h>
> +#include <asm/realmode.h>
> +#include <asm/e820/api.h>
> 
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
> 
> @@ -57,6 +62,22 @@ union hv_ghcb {
> 
>  static u16 hv_ghcb_version __ro_after_init;
> 
> +static u32 processor_count;
> +
> +static __init void hv_snp_get_smp_config(unsigned int early)
> +{
> +	if (!early) {
> +		while (num_processors < processor_count) {
> +			early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
> +			early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
> +			physid_set(num_processors, phys_cpu_present_map);
> +			set_cpu_possible(num_processors, true);
> +			set_cpu_present(num_processors, true);
> +			num_processors++;
> +		}
> +	}
> +}
> +
>  u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
>  {
>  	union hv_ghcb *hv_ghcb;
> @@ -356,6 +377,63 @@ static bool hv_is_private_mmio(u64 addr)
>  	return false;
>  }
> 
> +__init void hv_sev_init_mem_and_cpu(void)
> +{
> +	struct memory_map_entry *entry;
> +	struct e820_entry *e820_entry;
> +	u64 e820_end;
> +	u64 ram_end;
> +	u64 page;
> +
> +	/*
> +	 * Hyper-V enlightened snp guest boots kernel
> +	 * directly without bootloader and so roms,
> +	 * bios regions and reserve resources are not
> +	 * available. Set these callback to NULL.
> +	 */
> +	x86_platform.legacy.reserve_bios_regions = 0;
> +	x86_init.resources.probe_roms = x86_init_noop;
> +	x86_init.resources.reserve_resources = x86_init_noop;
> +	x86_init.mpparse.find_smp_config = x86_init_noop;
> +	x86_init.mpparse.get_smp_config = hv_snp_get_smp_config;
> +
> +	/*
> +	 * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
> +	 * and legacy APIC page read/write. Switch to hv apic here.
> +	 */
> +	disable_ioapic_support();
> +
> +	/* Read processor number and memory layout. */
> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
> +	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
> +			+ sizeof(struct memory_map_entry));

Why is the first map entry being skipped?

> +
> +	/*
> +	 * E820 table in the memory just describes memory for
> +	 * kernel, ACPI table, cmdline, boot params and ramdisk.
> +	 * Hyper-V popoulates the rest memory layout in the EN_SEV_
> +	 * SNP_PROCESSOR_INFO_ADDR.
> +	 */
> +	for (; entry->numpages != 0; entry++) {
> +		e820_entry = &e820_table->entries[
> +				e820_table->nr_entries - 1];
> +		e820_end = e820_entry->addr + e820_entry->size;
> +		ram_end = (entry->starting_gpn +
> +			   entry->numpages) * PAGE_SIZE;
> +
> +		if (e820_end < entry->starting_gpn * PAGE_SIZE)
> +			e820_end = entry->starting_gpn * PAGE_SIZE;
> +
> +		if (e820_end < ram_end) {
> +			pr_info("Hyper-V: add e820 entry [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);
> +			e820__range_add(e820_end, ram_end - e820_end,
> +					E820_TYPE_RAM);
> +			for (page = e820_end; page < ram_end; page += PAGE_SIZE)
> +				pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
> +		}
> +	}
> +}
> +
>  void __init hv_vtom_init(void)
>  {
>  	/*
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 3c15e23162e7..a4a59007b5f2 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -41,6 +41,20 @@ extern bool hv_isolation_type_en_snp(void);
> 
>  extern union hv_ghcb * __percpu *hv_ghcb_pg;
> 
> +/*
> + * Hyper-V puts processor and memory layout info
> + * to this address in SEV-SNP enlightened guest.
> + */
> +#define EN_SEV_SNP_PROCESSOR_INFO_ADDR 0x802000
> +
> +struct memory_map_entry {
> +	u64 starting_gpn;
> +	u64 numpages;
> +	u16 type;
> +	u16 flags;
> +	u32 reserved;
> +};
> +
>  int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
>  int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
>  int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> @@ -246,12 +260,14 @@ void hv_ghcb_msr_read(u64 msr, u64 *value);
>  bool hv_ghcb_negotiate_protocol(void);
>  void hv_ghcb_terminate(unsigned int set, unsigned int reason);
>  void hv_vtom_init(void);
> +void hv_sev_init_mem_and_cpu(void);
>  #else
>  static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
>  static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
>  static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
>  static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
>  static inline void hv_vtom_init(void) {}
> +static inline void hv_sev_init_mem_and_cpu(void) {}
>  #endif
> 
>  extern bool hv_isolation_type_snp(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 2f2dcb2370b6..71820bbf9e90 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -529,6 +529,9 @@ static void __init ms_hyperv_init_platform(void)
>  	if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
>  		mark_tsc_unstable("running on Hyper-V");
> 
> +	if (hv_isolation_type_en_snp())
> +		hv_sev_init_mem_and_cpu();

This looks good now.  Moving the code into a separate function in
ivm.c works well.

> +
>  	hardlockup_detector_disable();
>  }
> 
> --
> 2.25.1
  
Dave Hansen April 12, 2023, 3:53 p.m. UTC | #2
On 4/3/23 10:43, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
> 
> Read processor amd memory info from specific address which are
> populated by Hyper-V. Initialize smp cpu related ops, pvalidate
> system memory and add it into e820 table.
> 
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
>  arch/x86/hyperv/ivm.c           | 78 +++++++++++++++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h | 16 +++++++
>  arch/x86/kernel/cpu/mshyperv.c  |  3 ++
>  3 files changed, 97 insertions(+)
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 368b2731950e..fa4de2761460 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -17,6 +17,11 @@
>  #include <asm/mem_encrypt.h>
>  #include <asm/mshyperv.h>
>  #include <asm/hypervisor.h>
> +#include <asm/coco.h>
> +#include <asm/io_apic.h>
> +#include <asm/sev.h>
> +#include <asm/realmode.h>
> +#include <asm/e820/api.h>
>  
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  
> @@ -57,6 +62,22 @@ union hv_ghcb {
>  
>  static u16 hv_ghcb_version __ro_after_init;
>  
> +static u32 processor_count;
> +
> +static __init void hv_snp_get_smp_config(unsigned int early)
> +{
> +	if (!early) {
> +		while (num_processors < processor_count) {
> +			early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
> +			early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
> +			physid_set(num_processors, phys_cpu_present_map);
> +			set_cpu_possible(num_processors, true);
> +			set_cpu_present(num_processors, true);
> +			num_processors++;
> +		}
> +	}
> +}

Folks, please minimize indentation:

	if (early)
		return;

It would also be nice to see *some* explanation in the changelog or
comments about why it's best and correct to just do nothing if early==1.

Also, this _consumes_ data from hv_sev_init_mem_and_cpu().  It would
make more sense to me to have them ordered the other way.
hv_sev_init_mem_and_cpu() first, this second.

>  u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
>  {
>  	union hv_ghcb *hv_ghcb;
> @@ -356,6 +377,63 @@ static bool hv_is_private_mmio(u64 addr)
>  	return false;
>  }
>  
> +__init void hv_sev_init_mem_and_cpu(void)
> +{
> +	struct memory_map_entry *entry;
> +	struct e820_entry *e820_entry;
> +	u64 e820_end;
> +	u64 ram_end;
> +	u64 page;
> +
> +	/*
> +	 * Hyper-V enlightened snp guest boots kernel
> +	 * directly without bootloader and so roms,
> +	 * bios regions and reserve resources are not
> +	 * available. Set these callback to NULL.
> +	 */
> +	x86_platform.legacy.reserve_bios_regions = 0;
> +	x86_init.resources.probe_roms = x86_init_noop;
> +	x86_init.resources.reserve_resources = x86_init_noop;
> +	x86_init.mpparse.find_smp_config = x86_init_noop;
> +	x86_init.mpparse.get_smp_config = hv_snp_get_smp_config;

This is one of those places that vertical alignment adds clarity:

> +	x86_init.resources.probe_roms	     = x86_init_noop;
> +	x86_init.resources.reserve_resources = x86_init_noop;
> +	x86_init.mpparse.find_smp_config     = x86_init_noop;
> +	x86_init.mpparse.get_smp_config      = hv_snp_get_smp_config;

See? 3 noops and only one actual implemented function.  Clear as day now.

> +	/*> +	 * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
> +	 * and legacy APIC page read/write. Switch to hv apic here.
> +	 */
> +	disable_ioapic_support();

Do these systems have X86_FEATURE_APIC set?  Why is this needed in
addition to the architectural enumeration that already exists?

Is there any other place in the kernel that has this one-off disabling
of the APIC?

> +	/* Read processor number and memory layout. */
> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
> +	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
> +			+ sizeof(struct memory_map_entry));

Ick.

There are a lot of ways to do this.  But, this is an awfully ugly way.

struct snp_processor_info {
	u32 processor_count;
	struct memory_map_entry[] entries;
}

struct snp_processor_info *snp_pi =
				__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
processor_count = snp_pi->processor_count;

Then, have your for() loop through snp_pi->entries;

Actually, I'm not _quite_ sure that processor_count and entries are next
to each other.  But, either way, I do think a struct makes sense.

Also, what guarantees that EN_SEV_SNP_PROCESSOR_INFO_ADDR is mapped?
It's up above 8MB which I don't remember off the top of my head as being
a special address.

> +	/*
> +	 * E820 table in the memory just describes memory for
> +	 * kernel, ACPI table, cmdline, boot params and ramdisk.
> +	 * Hyper-V popoulates the rest memory layout in the EN_SEV_
> +	 * SNP_PROCESSOR_INFO_ADDR.
> +	 */

Really?  That is not very cool.  We need a better explanation of why
there was no way to use the decades-old e820 or EFI memory map and why
this needs to be a special snowflake.

> +	for (; entry->numpages != 0; entry++) {
> +		e820_entry = &e820_table->entries[
> +				e820_table->nr_entries - 1];
> +		e820_end = e820_entry->addr + e820_entry->size;
> +		ram_end = (entry->starting_gpn +
> +			   entry->numpages) * PAGE_SIZE;
> +
> +		if (e820_end < entry->starting_gpn * PAGE_SIZE)
> +			e820_end = entry->starting_gpn * PAGE_SIZE;
> +
> +		if (e820_end < ram_end) {
> +			pr_info("Hyper-V: add e820 entry [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);
> +			e820__range_add(e820_end, ram_end - e820_end,
> +					E820_TYPE_RAM);
> +			for (page = e820_end; page < ram_end; page += PAGE_SIZE)
> +				pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
> +		}
> +	}
> +}

Oh, is this just about having a pre-accepted area and a non-accepted
area?  Is this basically another one-off implementation of unaccepted
memory ... that doesn't use the EFI standard?

>  void __init hv_vtom_init(void)
>  {
>  	/*
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 3c15e23162e7..a4a59007b5f2 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -41,6 +41,20 @@ extern bool hv_isolation_type_en_snp(void);
>  
>  extern union hv_ghcb * __percpu *hv_ghcb_pg;
>  
> +/*
> + * Hyper-V puts processor and memory layout info
> + * to this address in SEV-SNP enlightened guest.
> + */
> +#define EN_SEV_SNP_PROCESSOR_INFO_ADDR 0x802000
> +
> +struct memory_map_entry {
> +	u64 starting_gpn;
> +	u64 numpages;
> +	u16 type;
> +	u16 flags;
> +	u32 reserved;
> +};
> +
>  int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
>  int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
>  int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> @@ -246,12 +260,14 @@ void hv_ghcb_msr_read(u64 msr, u64 *value);
>  bool hv_ghcb_negotiate_protocol(void);
>  void hv_ghcb_terminate(unsigned int set, unsigned int reason);
>  void hv_vtom_init(void);
> +void hv_sev_init_mem_and_cpu(void);
>  #else
>  static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
>  static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
>  static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
>  static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
>  static inline void hv_vtom_init(void) {}
> +static inline void hv_sev_init_mem_and_cpu(void) {}
>  #endif
>  
>  extern bool hv_isolation_type_snp(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 2f2dcb2370b6..71820bbf9e90 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -529,6 +529,9 @@ static void __init ms_hyperv_init_platform(void)
>  	if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
>  		mark_tsc_unstable("running on Hyper-V");
>  
> +	if (hv_isolation_type_en_snp())
> +		hv_sev_init_mem_and_cpu();
> +
>  	hardlockup_detector_disable();
>  }
>
  
Tianyu Lan April 16, 2023, 7:21 a.m. UTC | #3
On 4/12/2023 10:39 PM, Michael Kelley (LINUX) wrote:
>> +	/* Read processor number and memory layout. */
>> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
>> +	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
>> +			+ sizeof(struct memory_map_entry));
> Why is the first map entry being skipped?

The first entry is populated with processor count by Hyper-V.
  
Tianyu Lan April 16, 2023, 7:23 a.m. UTC | #4
On 4/12/2023 10:39 PM, Michael Kelley (LINUX) wrote:
>> +    /* Read processor number and memory layout. */
>> +    processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
>> +    entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
>> +                    + sizeof(struct memory_map_entry));
> Why is the first map entry being skipped?

The first entry is populated with processor count by Hyper-V.
  
Michael Kelley (LINUX) April 17, 2023, 12:49 p.m. UTC | #5
From: Tianyu Lan <ltykernel@gmail.com> Sent: Sunday, April 16, 2023 12:21 AM
> 
> On 4/12/2023 10:39 PM, Michael Kelley (LINUX) wrote:
> >> +	/* Read processor number and memory layout. */
> >> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
> >> +	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
> >> +			+ sizeof(struct memory_map_entry));
> > Why is the first map entry being skipped?
> 
> The first entry is populated with processor count by Hyper-V.

Perhaps add a comment to acknowledge that the behavior
is a bit unexpected:

The 0th entry in the memory layout array contains just a 32-bit
processor count.  Read that value and then skip over the reminder
of the 0th entry.  Start processing memory_map_entry's with array
element 1.

Michael
  
Tianyu Lan April 18, 2023, 1:24 p.m. UTC | #6
On 4/12/2023 11:53 PM, Dave Hansen wrote:
>>   
>> +static u32 processor_count;
>> +
>> +static __init void hv_snp_get_smp_config(unsigned int early)
>> +{
>> +	if (!early) {
>> +		while (num_processors < processor_count) {
>> +			early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
>> +			early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
>> +			physid_set(num_processors, phys_cpu_present_map);
>> +			set_cpu_possible(num_processors, true);
>> +			set_cpu_present(num_processors, true);
>> +			num_processors++;
>> +		}
>> +	}
>> +}
> Folks, please minimize indentation:
> 
> 	if (early)
> 		return;
> 
> It would also be nice to see*some*  explanation in the changelog or
> comments about why it's best and correct to just do nothing if early==1.
> 
> Also, this_consumes_  data from hv_sev_init_mem_and_cpu().  It would
> make more sense to me to have them ordered the other way.
> hv_sev_init_mem_and_cpu() first, this second.

Hi Dave
	Thanks for your review. Good suggestion! Will update in the next
verison.

> 
>>   u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
>>   {
>>   	union hv_ghcb *hv_ghcb;
>> @@ -356,6 +377,63 @@ static bool hv_is_private_mmio(u64 addr)
>>   	return false;
>>   }
>>   
>> +__init void hv_sev_init_mem_and_cpu(void)
>> +{
>> +	struct memory_map_entry *entry;
>> +	struct e820_entry *e820_entry;
>> +	u64 e820_end;
>> +	u64 ram_end;
>> +	u64 page;
>> +
>> +	/*
>> +	 * Hyper-V enlightened snp guest boots kernel
>> +	 * directly without bootloader and so roms,
>> +	 * bios regions and reserve resources are not
>> +	 * available. Set these callback to NULL.
>> +	 */
>> +	x86_platform.legacy.reserve_bios_regions = 0;
>> +	x86_init.resources.probe_roms = x86_init_noop;
>> +	x86_init.resources.reserve_resources = x86_init_noop;
>> +	x86_init.mpparse.find_smp_config = x86_init_noop;
>> +	x86_init.mpparse.get_smp_config = hv_snp_get_smp_config;
> This is one of those places that vertical alignment adds clarity:
> 
>> +	x86_init.resources.probe_roms	     = x86_init_noop;
>> +	x86_init.resources.reserve_resources = x86_init_noop;
>> +	x86_init.mpparse.find_smp_config     = x86_init_noop;
>> +	x86_init.mpparse.get_smp_config      = hv_snp_get_smp_config;
> See? 3 noops and only one actual implemented function.  Clear as day now.
> 

Yes, this looks better. Will update.

>> +	/*> +	 * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
>> +	 * and legacy APIC page read/write. Switch to hv apic here.
>> +	 */
>> +	disable_ioapic_support();
> Do these systems have X86_FEATURE_APIC set?  Why is this needed in
> addition to the architectural enumeration that already exists?
>

X86_FEATURE_APIC is still set. Hyper-V provides parav-virtualized local
apic interface to replace APIC page opeartion. In the SEV-SNP guest.

> Is there any other place in the kernel that has this one-off disabling
> of the APIC?

In current kernel code, ioapic support still may be disabled when there 
is no MP table or ACPI MADT configuration. Please see 
__apic_intr_mode_select() and disable_smp() for detial where ioapic is 
disabled.

> 
>> +	/* Read processor number and memory layout. */
>> +	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
>> +	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
>> +			+ sizeof(struct memory_map_entry));
> Ick.
> 
> There are a lot of ways to do this.  But, this is an awfully ugly way.
> 
> struct snp_processor_info {
> 	u32 processor_count;
> 	struct memory_map_entry[] entries;
> }
> 
> struct snp_processor_info *snp_pi =
> 				__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
> processor_count = snp_pi->processor_count;
> 
> Then, have your for() loop through snp_pi->entries;
> 
> Actually, I'm not_quite_  sure that processor_count and entries are next
> to each other.  But, either way, I do think a struct makes sense.

Agree. Will update.

> 
> Also, what guarantees that EN_SEV_SNP_PROCESSOR_INFO_ADDR is mapped?
> It's up above 8MB which I don't remember off the top of my head as being
> a special address.

This EN_SEV_SNP_PROCESSOR_INFO_ADDR is specified by hypervisor tool.
Hypervisor populates mem and cpu info to the page in the memory and 
kernel may access it via adding PHYS_OFFSET_OFFSET directly.

> 
>> +	/*
>> +	 * E820 table in the memory just describes memory for
>> +	 * kernel, ACPI table, cmdline, boot params and ramdisk.
>> +	 * Hyper-V popoulates the rest memory layout in the EN_SEV_
>> +	 * SNP_PROCESSOR_INFO_ADDR.
>> +	 */
> Really?  That is not very cool.  We need a better explanation of why
> there was no way to use the decades-old e820 or EFI memory map and why
> this needs to be a special snowflake.

Agree. There should be a comment to describe that there is no virtual 
Bios in the guest and hypervisor boots Linux kernel directly. So kernel 
needs to populdate e820 tables which should be prepared by virtual Bios.

> 
>> +	for (; entry->numpages != 0; entry++) {
>> +		e820_entry = &e820_table->entries[
>> +				e820_table->nr_entries - 1];
>> +		e820_end = e820_entry->addr + e820_entry->size;
>> +		ram_end = (entry->starting_gpn +
>> +			   entry->numpages) * PAGE_SIZE;
>> +
>> +		if (e820_end < entry->starting_gpn * PAGE_SIZE)
>> +			e820_end = entry->starting_gpn * PAGE_SIZE;
>> +
>> +		if (e820_end < ram_end) {
>> +			pr_info("Hyper-V: add e820 entry [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);
>> +			e820__range_add(e820_end, ram_end - e820_end,
>> +					E820_TYPE_RAM);
>> +			for (page = e820_end; page < ram_end; page += PAGE_SIZE)
>> +				pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
>> +		}
>> +	}
>> +}
> Oh, is this just about having a pre-accepted area and a non-accepted
> area?  Is this basically another one-off implementation of unaccepted
> memory ... that doesn't use the EFI standard?

No, there is no virtual EFI firmware inside VM and so kernel gets mem 
and vcpu info directly from Hyper-V.
  
Tianyu Lan April 18, 2023, 2:12 p.m. UTC | #7
On 4/17/2023 8:49 PM, Michael Kelley (LINUX) wrote:
>>> Why is the first map entry being skipped?
>> The first entry is populated with processor count by Hyper-V.
> Perhaps add a comment to acknowledge that the behavior
> is a bit unexpected:
> 
> The 0th entry in the memory layout array contains just a 32-bit
> processor count.  Read that value and then skip over the reminder
> of the 0th entry.  Start processing memory_map_entry's with array
> element 1.
> 

Good suggestion! Will update in the next version.
  

Patch

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 368b2731950e..fa4de2761460 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -17,6 +17,11 @@ 
 #include <asm/mem_encrypt.h>
 #include <asm/mshyperv.h>
 #include <asm/hypervisor.h>
+#include <asm/coco.h>
+#include <asm/io_apic.h>
+#include <asm/sev.h>
+#include <asm/realmode.h>
+#include <asm/e820/api.h>
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
@@ -57,6 +62,22 @@  union hv_ghcb {
 
 static u16 hv_ghcb_version __ro_after_init;
 
+static u32 processor_count;
+
+static __init void hv_snp_get_smp_config(unsigned int early)
+{
+	if (!early) {
+		while (num_processors < processor_count) {
+			early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
+			early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
+			physid_set(num_processors, phys_cpu_present_map);
+			set_cpu_possible(num_processors, true);
+			set_cpu_present(num_processors, true);
+			num_processors++;
+		}
+	}
+}
+
 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
 {
 	union hv_ghcb *hv_ghcb;
@@ -356,6 +377,63 @@  static bool hv_is_private_mmio(u64 addr)
 	return false;
 }
 
+__init void hv_sev_init_mem_and_cpu(void)
+{
+	struct memory_map_entry *entry;
+	struct e820_entry *e820_entry;
+	u64 e820_end;
+	u64 ram_end;
+	u64 page;
+
+	/*
+	 * Hyper-V enlightened snp guest boots kernel
+	 * directly without bootloader and so roms,
+	 * bios regions and reserve resources are not
+	 * available. Set these callback to NULL.
+	 */
+	x86_platform.legacy.reserve_bios_regions = 0;
+	x86_init.resources.probe_roms = x86_init_noop;
+	x86_init.resources.reserve_resources = x86_init_noop;
+	x86_init.mpparse.find_smp_config = x86_init_noop;
+	x86_init.mpparse.get_smp_config = hv_snp_get_smp_config;
+
+	/*
+	 * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
+	 * and legacy APIC page read/write. Switch to hv apic here.
+	 */
+	disable_ioapic_support();
+
+	/* Read processor number and memory layout. */
+	processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);
+	entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
+			+ sizeof(struct memory_map_entry));
+
+	/*
+	 * E820 table in the memory just describes memory for
+	 * kernel, ACPI table, cmdline, boot params and ramdisk.
+	 * Hyper-V popoulates the rest memory layout in the EN_SEV_
+	 * SNP_PROCESSOR_INFO_ADDR.
+	 */
+	for (; entry->numpages != 0; entry++) {
+		e820_entry = &e820_table->entries[
+				e820_table->nr_entries - 1];
+		e820_end = e820_entry->addr + e820_entry->size;
+		ram_end = (entry->starting_gpn +
+			   entry->numpages) * PAGE_SIZE;
+
+		if (e820_end < entry->starting_gpn * PAGE_SIZE)
+			e820_end = entry->starting_gpn * PAGE_SIZE;
+
+		if (e820_end < ram_end) {
+			pr_info("Hyper-V: add e820 entry [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);
+			e820__range_add(e820_end, ram_end - e820_end,
+					E820_TYPE_RAM);
+			for (page = e820_end; page < ram_end; page += PAGE_SIZE)
+				pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
+		}
+	}
+}
+
 void __init hv_vtom_init(void)
 {
 	/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3c15e23162e7..a4a59007b5f2 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -41,6 +41,20 @@  extern bool hv_isolation_type_en_snp(void);
 
 extern union hv_ghcb * __percpu *hv_ghcb_pg;
 
+/*
+ * Hyper-V puts processor and memory layout info
+ * to this address in SEV-SNP enlightened guest.
+ */
+#define EN_SEV_SNP_PROCESSOR_INFO_ADDR 0x802000
+
+struct memory_map_entry {
+	u64 starting_gpn;
+	u64 numpages;
+	u16 type;
+	u16 flags;
+	u32 reserved;
+};
+
 int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
@@ -246,12 +260,14 @@  void hv_ghcb_msr_read(u64 msr, u64 *value);
 bool hv_ghcb_negotiate_protocol(void);
 void hv_ghcb_terminate(unsigned int set, unsigned int reason);
 void hv_vtom_init(void);
+void hv_sev_init_mem_and_cpu(void);
 #else
 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
 static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
 static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
 static inline void hv_vtom_init(void) {}
+static inline void hv_sev_init_mem_and_cpu(void) {}
 #endif
 
 extern bool hv_isolation_type_snp(void);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 2f2dcb2370b6..71820bbf9e90 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -529,6 +529,9 @@  static void __init ms_hyperv_init_platform(void)
 	if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
 		mark_tsc_unstable("running on Hyper-V");
 
+	if (hv_isolation_type_en_snp())
+		hv_sev_init_mem_and_cpu();
+
 	hardlockup_detector_disable();
 }