[v16,8/8] x86/smpboot: Allow parallel bringup for SEV-ES

Message ID 20230321194008.785922-9-usama.arif@bytedance.com
State New
Headers
Series Parallel CPU bringup for x86_64 |

Commit Message

Usama Arif March 21, 2023, 7:40 p.m. UTC
  From: David Woodhouse <dwmw@amazon.co.uk>

Enable parallel bringup for SEV-ES guests. The APs can't actually
execute the CPUID instruction directly during early startup, but they
can make the GHCB call directly instead, just as the VC trap handler
would do.

Thanks to Sabin for talking me through the way this works.

Suggested-by: Sabin Rapan <sabrapan@amazon.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/coco/core.c              |  5 ++++
 arch/x86/include/asm/coco.h       |  1 +
 arch/x86/include/asm/sev-common.h |  3 +++
 arch/x86/include/asm/smp.h        |  5 +++-
 arch/x86/kernel/head_64.S         | 30 ++++++++++++++++++++++++
 arch/x86/kernel/smpboot.c         | 39 ++++++++++++++++++++++++++-----
 6 files changed, 76 insertions(+), 7 deletions(-)
  

Comments

Borislav Petkov March 22, 2023, 10:47 p.m. UTC | #1
On Tue, Mar 21, 2023 at 07:40:08PM +0000, Usama Arif wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> Enable parallel bringup for SEV-ES guests. The APs can't actually
> execute the CPUID instruction directly during early startup, but they
> can make the GHCB call directly instead, just as the VC trap handler
> would do.
> 
> Thanks to Sabin for talking me through the way this works.
> 
> Suggested-by: Sabin Rapan <sabrapan@amazon.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>  arch/x86/coco/core.c              |  5 ++++
>  arch/x86/include/asm/coco.h       |  1 +
>  arch/x86/include/asm/sev-common.h |  3 +++
>  arch/x86/include/asm/smp.h        |  5 +++-
>  arch/x86/kernel/head_64.S         | 30 ++++++++++++++++++++++++
>  arch/x86/kernel/smpboot.c         | 39 ++++++++++++++++++++++++++-----
>  6 files changed, 76 insertions(+), 7 deletions(-)

How's the below (totally untested yet but it builds) instead after
applying those two prerequisites:

https://lore.kernel.org/r/20230318115634.9392-1-bp@alien8.de

---
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b63be696b776..0abf8a39cee1 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -70,6 +70,7 @@
 	/* GHCBData[63:12] */				\
 	(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
 
+#ifndef __ASSEMBLY__
 /*
  * SNP Page State Change Operation
  *
@@ -161,6 +162,8 @@ struct snp_psc_desc {
 
 #define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
 
+#endif /* __ASSEMBLY__ */
+
 /*
  * Error codes related to GHCB input that can be communicated back to the guest
  * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 1335781e4976..516bcb4f0719 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -200,6 +200,7 @@ void snp_set_wakeup_secondary_cpu(void);
 bool snp_init(struct boot_params *bp);
 void __init __noreturn snp_abort(void);
 int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
+u32 sev_get_cpuid_0b(void);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
@@ -224,6 +225,7 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
 {
 	return -ENOTTY;
 }
+static inline u32 sev_get_cpuid_0b(void) { return 0; }
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index defe76ee9e64..1584f04a7007 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -204,7 +204,10 @@ extern unsigned int smpboot_control;
 /* Control bits for startup_64 */
 #define STARTUP_APICID_CPUID_0B	0x80000000
 #define STARTUP_APICID_CPUID_01	0x40000000
+#define STARTUP_APICID_SEV_ES	0x20000000
 
-#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B)
+#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | \
+			       STARTUP_APICID_CPUID_0B | \
+			       STARTUP_APICID_SEV_ES)
 
 #endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ff3a5f008d8a..574bd56288bf 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -26,6 +26,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/fixmap.h>
 #include <asm/smp.h>
+#include <asm/sev-common.h>
 
 /*
  * We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -242,6 +243,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	 *
 	 * Bit 31	STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b)
 	 * Bit 30	STARTUP_APICID_CPUID_01 flag (use CPUID 0x01)
+	 * Bit 29	STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
 	 * Bit 0-24	CPU# if STARTUP_APICID_CPUID_xx flags are not set
 	 */
 	movl	smpboot_control(%rip), %ecx
@@ -249,6 +251,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	jnz	.Luse_cpuid_0b
 	testl	$STARTUP_APICID_CPUID_01, %ecx
 	jnz	.Luse_cpuid_01
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	testl	$STARTUP_APICID_SEV_ES, %ecx
+	jnz	.Luse_sev_cpuid_0b
+#endif
 	andl	$0x0FFFFFFF, %ecx
 	jmp	.Lsetup_cpu
 
@@ -259,6 +265,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	shr	$24, %edx
 	jmp	.Lsetup_AP
 
+.Luse_sev_cpuid_0b:
+	call sev_get_cpuid_0b
+	test %eax, %eax
+	jz 1f
+	movl %eax, %edx
+	jmp .Lsetup_AP
+
 .Luse_cpuid_0b:
 	mov	$0x0B, %eax
 	xorl	%ecx, %ecx
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index ce371f62167b..96ff63cb5622 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1142,6 +1142,24 @@ void snp_set_wakeup_secondary_cpu(void)
 	apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
 }
 
+u32 sev_get_cpuid_0b(void)
+{
+	u32 eax, edx;
+
+	/* Request CPUID 0xB_EDX through GHCB protocol */
+	native_wrmsr(MSR_AMD64_SEV_ES_GHCB,
+		     (GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ,
+		     0xb);
+	VMGEXIT();
+
+	native_rdmsr(MSR_AMD64_SEV_ES_GHCB, eax, edx);
+
+	if ((eax & GHCB_MSR_INFO_MASK) == GHCB_MSR_CPUID_RESP)
+		return edx;
+
+	return 0;
+}
+
 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
 {
 	u16 startup_cs, startup_ip;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0dba5c247be0..6734c26ad848 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -85,6 +85,7 @@
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 #include <asm/sev.h>
+#include <asm/coco.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -1513,15 +1514,36 @@ void __init smp_prepare_cpus_common(void)
  * We can do 64-bit AP bringup in parallel if the CPU reports its APIC
  * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC
  * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too
- * hard. And not for SEV-ES guests because they can't use CPUID that
- * early.
+ * hard.
  */
 static bool prepare_parallel_bringup(void)
 {
-	if (IS_ENABLED(CONFIG_X86_32) || cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+	bool has_sev_es = false;
+
+	if (IS_ENABLED(CONFIG_X86_32))
 		return false;
 
-	if (x2apic_mode) {
+	/*
+	 * Encrypted guests other than SEV-ES (in the future) will need to
+	 * implement an early way of finding the APIC ID, since they will
+	 * presumably block direct CPUID too. Be kind to our future selves
+	 * by warning here instead of just letting them break. Parallel
+	 * startup doesn't have to be in the first round of enabling patches
+	 * for any such technology.
+	 */
+	if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
+		switch (cc_vendor) {
+		case CC_VENDOR_AMD:
+			has_sev_es = true;
+			break;
+
+		default:
+			pr_info("Disabling parallel bringup due to guest state encryption\n");
+			return false;
+		}
+	}
+
+	if (x2apic_mode || has_sev_es) {
 		if (boot_cpu_data.cpuid_level < 0x0b)
 			return false;
 
@@ -1530,8 +1552,13 @@ static bool prepare_parallel_bringup(void)
 			return false;
 		}
 
-		pr_debug("Using CPUID 0xb for parallel CPU startup\n");
-		smpboot_control = STARTUP_APICID_CPUID_0B;
+		if (has_sev_es) {
+			pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n");
+			smpboot_control = STARTUP_APICID_SEV_ES;
+		} else {
+			pr_debug("Using CPUID 0xb for parallel CPU startup\n");
+			smpboot_control = STARTUP_APICID_CPUID_0B;
+		}
 	} else {
 		/* Without X2APIC, what's in CPUID 0x01 should suffice. */
 		if (boot_cpu_data.cpuid_level < 0x01)
  
David Woodhouse March 23, 2023, 8:32 a.m. UTC | #2
On Wed, 2023-03-22 at 23:47 +0100, Borislav Petkov wrote:
> 
> +.Luse_sev_cpuid_0b:
> +       call sev_get_cpuid_0b
> +       test %eax, %eax
> +       jz 1f
> +       movl %eax, %edx
> +       jmp .Lsetup_AP
> +

Can this code path never be taken for bringing up CPU0 even after a
hotplug? I'd rather have -1 as the error indication.

>  .Luse_cpuid_0b:
>         mov     $0x0B, %eax
>         xorl    %ecx, %ecx
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index ce371f62167b..96ff63cb5622 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -1142,6 +1142,24 @@ void snp_set_wakeup_secondary_cpu(void)
>         apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
>  }
>  
> +u32 sev_get_cpuid_0b(void)
> +{
> +       u32 eax, edx;
> +
> +       /* Request CPUID 0xB_EDX through GHCB protocol */
> +       native_wrmsr(MSR_AMD64_SEV_ES_GHCB,
> +                    (GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ,
> +                    0xb);
> +       VMGEXIT();
> +
> +       native_rdmsr(MSR_AMD64_SEV_ES_GHCB, eax, edx);
> +
> +       if ((eax & GHCB_MSR_INFO_MASK) == GHCB_MSR_CPUID_RESP)
> +               return edx;
> +
> +       return 0;
> +}
> +
>  int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
>  {

Perhaps put this in head64.c so it gets built with -pg when needed? And
then you'll spot that the other functions in there are marked __head to
put them in .head.text, and maybe find some other stuff to cargo-cult
to make it safe to run C code that early...
  
Borislav Petkov March 23, 2023, 8:51 a.m. UTC | #3
On Thu, Mar 23, 2023 at 08:32:49AM +0000, David Woodhouse wrote:
> and maybe find some other stuff to cargo-cult to make it safe to run
> C code that early...

I'm trying to have less shit asm, not more if possible. We've been
aiming to convert stuff to C for years.

WTF are you calling cargo-cult?
  
David Woodhouse March 23, 2023, 9:04 a.m. UTC | #4
On Thu, 2023-03-23 at 09:51 +0100, Borislav Petkov wrote:
> On Thu, Mar 23, 2023 at 08:32:49AM +0000, David Woodhouse wrote:
> > and maybe find some other stuff to cargo-cult to make it safe to run
> > C code that early...
> 
> I'm trying to have less shit asm, not more if possible. We've been
> aiming to convert stuff to C for years.

Absolutely, I understand. There are a few hoops we have to jump through
to be able to run C code this early, but it's worth it.

> WTF are you calling cargo-cult?

Off the top of my head, I don't actually *remember* all the hoops we
have to jump through to run C code this early.

But there is head64.c with examples. By 'cargo cult' I refer to the
process of copying what they do even if we don't really understand it.

I spotted that it builds with -pg, and that the functions in there are
tagged with __head to put them in the .head.text section. I didn't look
much further; there might be *other* details to copy… to 'cargo cult'.
  
Tom Lendacky March 23, 2023, 1:16 p.m. UTC | #5
On 3/22/23 17:47, Borislav Petkov wrote:
> On Tue, Mar 21, 2023 at 07:40:08PM +0000, Usama Arif wrote:
>> From: David Woodhouse <dwmw@amazon.co.uk>
>>
>> Enable parallel bringup for SEV-ES guests. The APs can't actually
>> execute the CPUID instruction directly during early startup, but they
>> can make the GHCB call directly instead, just as the VC trap handler
>> would do.
>>
>> Thanks to Sabin for talking me through the way this works.
>>
>> Suggested-by: Sabin Rapan <sabrapan@amazon.com>
>> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
>> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
>> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>   arch/x86/coco/core.c              |  5 ++++
>>   arch/x86/include/asm/coco.h       |  1 +
>>   arch/x86/include/asm/sev-common.h |  3 +++
>>   arch/x86/include/asm/smp.h        |  5 +++-
>>   arch/x86/kernel/head_64.S         | 30 ++++++++++++++++++++++++
>>   arch/x86/kernel/smpboot.c         | 39 ++++++++++++++++++++++++++-----
>>   6 files changed, 76 insertions(+), 7 deletions(-)
> 
> How's the below (totally untested yet but it builds) instead after
> applying those two prerequisites:
> 
> https://lore.kernel.org/r/20230318115634.9392-1-bp@alien8.de
> 
> ---

...

> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index ce371f62167b..96ff63cb5622 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -1142,6 +1142,24 @@ void snp_set_wakeup_secondary_cpu(void)
>   	apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
>   }
>   
> +u32 sev_get_cpuid_0b(void)

Maybe call this sev_early_get_cpuid_0b() or such to imply that the MSR 
protocol is being used and only retrieving / returning edx.

Also, since it is a function now and can be used at any point, the current 
GHCB MSR should be saved on entry and restored on exit.

Thanks,
Tom

> +{
> +	u32 eax, edx;
> +
> +	/* Request CPUID 0xB_EDX through GHCB protocol */
> +	native_wrmsr(MSR_AMD64_SEV_ES_GHCB,
> +		     (GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ,
> +		     0xb);
> +	VMGEXIT();
> +
> +	native_rdmsr(MSR_AMD64_SEV_ES_GHCB, eax, edx);
> +
> +	if ((eax & GHCB_MSR_INFO_MASK) == GHCB_MSR_CPUID_RESP)
> +		return edx;
> +
> +	return 0;
> +}
> +
>   int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
>   {
>   	u16 startup_cs, startup_ip;
  
Brian Gerst March 23, 2023, 2:23 p.m. UTC | #6
On Thu, Mar 23, 2023 at 5:04 AM David Woodhouse <dwmw2@infradead.org> wrote:
>
> On Thu, 2023-03-23 at 09:51 +0100, Borislav Petkov wrote:
> > On Thu, Mar 23, 2023 at 08:32:49AM +0000, David Woodhouse wrote:
> > > and maybe find some other stuff to cargo-cult to make it safe to run
> > > C code that early...
> >
> > I'm trying to have less shit asm, not more if possible. We've been
> > aiming to convert stuff to C for years.
>
> Absolutely, I understand. There are a few hoops we have to jump through
> to be able to run C code this early, but it's worth it.
>
> > WTF are you calling cargo-cult?
>
> Off the top of my head, I don't actually *remember* all the hoops we
> have to jump through to run C code this early.

Making sure that the stack protector is either disabled or properly
set up, and disabling any instrumentation/profiling/debug crap that
isn't initialized yet.

--
Brian Gerst
  
Tom Lendacky March 23, 2023, 6:28 p.m. UTC | #7
On 3/23/23 10:34, Woodhouse, David wrote:
> On Thu, 2023-03-23 at 08:16 -0500, Tom Lendacky wrote:
>>
>> Maybe call this sev_early_get_cpuid_0b() or such to imply that the MSR
>> protocol is being used and only retrieving / returning edx.
> 
> Or indeed sev_early_get_apicid() since that's what it's actually doing,
> and the caller doesn't care *how*.

Sounds good.

> 
>> Also, since it is a function now and can be used at any point, the current
>> GHCB MSR should be saved on entry and restored on exit.
> 
> Well, I don't know that it should be callable at any point. This is
> only supposed to be called from head_64.S.

I agree. But once it's there, someone somewhere in the future may look and 
go, oh, I can call this. So I think it either needs a nice comment above 
it about how it is currently used/called and what to do if it needs to be 
called from someplace other than head_64.S or the MSR needs to be 
saved/restored.

Thanks,
Tom

> 
> 
> 
> 
> 
> Amazon Development Centre (London) Ltd. Registered in England and Wales with registration number 04543232 with its registered office at 1 Principal Place, Worship Street, London EC2A 2FA, United Kingdom.
> 
> 
>
  
Borislav Petkov March 23, 2023, 10:13 p.m. UTC | #8
On March 23, 2023 7:28:38 PM GMT+01:00, Tom Lendacky <thomas.lendacky@amd.com> wrote:
>
>I agree. But once it's there, someone somewhere in the future may look and go, oh, I can call this. So I think it either needs a nice comment above it about how it is currently used/called and what to do if it needs to be called from someplace other than head_64.S or the MSR needs to be saved/restored.

Or it could be __init and get discarded when the system is up.
  
David Woodhouse March 23, 2023, 10:30 p.m. UTC | #9
On Thu, 2023-03-23 at 23:13 +0100, Borislav Petkov wrote:
> On March 23, 2023 7:28:38 PM GMT+01:00, Tom Lendacky
> <thomas.lendacky@amd.com> wrote:
> > 
> > I agree. But once it's there, someone somewhere in the future may
> > look and go, oh, I can call this. So I think it either needs a nice
> > comment above it about how it is currently used/called and what to
> > do if it needs to be called from someplace other than head_64.S or
> > the MSR needs to be saved/restored.
> 
> Or it could be __init and get discarded when the system is up.

Not if we have CPU hotplug. Or system suspend.
  
Borislav Petkov March 27, 2023, 5:47 p.m. UTC | #10
On Thu, Mar 23, 2023 at 10:23:02AM -0400, Brian Gerst wrote:
> Making sure that the stack protector is either disabled or properly
> set up, and disabling any instrumentation/profiling/debug crap that
> isn't initialized yet.

Lemme dump brain of what Tom and I were talking about today so that it
is documented somewhere.

* re: stack protector: I was thinking to mark this function

 __attribute__((no_stack_protector))

but gcc added the function attribute way later:

~/src/gcc/gcc.git> git tag --contains 346b302d09c1e6db56d9fe69048acb32fbb97845
basepoints/gcc-12
basepoints/gcc-13
releases/gcc-11.1.0
releases/gcc-11.2.0
releases/gcc-11.3.0
releases/gcc-12.1.0
releases/gcc-12.2.0

which means, that function would have to live somewhere in a file which
has stack protector disabled. One possible place would be
arch/x86/mm/mem_encrypt_identity.c which is kinda related.

* re: stack: in order to be able to call a C function that early, we'd
have to put the VA of the initial stack back into %rsp as we switch
pagetables a bit earlier in there (thx Tom).

So by then, doing all that cargo-cult just in order to not have a bunch
of lines in asm doesn't sound all that great anymore.

* The __head per-function attribute is easily solved by lifting the
__head define into a common header.

So meh, dunno. I guess we can do the asm thing for now, until a cleaner
solution without too many warts presents itself.

As to exporting cc_vendor:

https://lore.kernel.org/r/20230318115634.9392-1-bp@alien8.de

I'll redo those and the SEV-ES patch won't have to add cc_get_vendor().

Thx.
  
David Woodhouse March 27, 2023, 6:14 p.m. UTC | #11
On Mon, 2023-03-27 at 19:47 +0200, Borislav Petkov wrote:
> > Making sure that the stack protector is either disabled or properly
> > set up, and disabling any instrumentation/profiling/debug crap that
> > isn't initialized yet.
> 
> Lemme dump brain of what Tom and I were talking about today so that it
> is documented somewhere.
> 
> * re: stack protector: I was thinking to mark this function
> 
>  __attribute__((no_stack_protector))
> 
> but gcc added the function attribute way later:
> 
> ~/src/gcc/gcc.git> git tag --contains 346b302d09c1e6db56d9fe69048acb32fbb97845
> basepoints/gcc-12
> basepoints/gcc-13
> releases/gcc-11.1.0
> releases/gcc-11.2.0
> releases/gcc-11.3.0
> releases/gcc-12.1.0
> releases/gcc-12.2.0
> 
> which means, that function would have to live somewhere in a file which
> has stack protector disabled. One possible place would be
> arch/x86/mm/mem_encrypt_identity.c which is kinda related.

Shouldn't the rest of head64.c have the stack protector disabled, for
similar reasons?

> * re: stack: in order to be able to call a C function that early, we'd
> have to put the VA of the initial stack back into %rsp as we switch
> pagetables a bit earlier in there (thx Tom).

Hm, don't you have a stack at the point you added that call? I thought
you did? It doesn't have to be *the* stack for the AP in question.
Just "a" stack. And you have the lock on the real-mode one that you're
using.

> So by then, doing all that cargo-cult just in order to not have a bunch
> of lines in asm doesn't sound all that great anymore.
> 
> * The __head per-function attribute is easily solved by lifting the
> __head define into a common header.
> 
> So meh, dunno. I guess we can do the asm thing for now, until a cleaner
> solution without too many warts presents itself.

Hm, doesn't most of that just go away (or at least become "Already
Broken; Someone Else's Problem™") if you just concede to put your new C
function into head64.c along with a whole bunch of other existing
CONFIG_AMD_MEM_ENCRYPT support?

(We still have to fix it if it's Someone Else's Problem, of course.
It's just that you don't have to count that complexity towards your own
part.)
  
Tom Lendacky March 27, 2023, 7:14 p.m. UTC | #12
On 3/27/23 13:14, David Woodhouse wrote:
> On Mon, 2023-03-27 at 19:47 +0200, Borislav Petkov wrote:
>>> Making sure that the stack protector is either disabled or properly
>>> set up, and disabling any instrumentation/profiling/debug crap that
>>> isn't initialized yet.
>>
>> Lemme dump brain of what Tom and I were talking about today so that it
>> is documented somewhere.
>>
>> * re: stack protector: I was thinking to mark this function
>>
>>   __attribute__((no_stack_protector))
>>
>> but gcc added the function attribute way later:
>>
>> ~/src/gcc/gcc.git> git tag --contains 346b302d09c1e6db56d9fe69048acb32fbb97845
>> basepoints/gcc-12
>> basepoints/gcc-13
>> releases/gcc-11.1.0
>> releases/gcc-11.2.0
>> releases/gcc-11.3.0
>> releases/gcc-12.1.0
>> releases/gcc-12.2.0
>>
>> which means, that function would have to live somewhere in a file which
>> has stack protector disabled. One possible place would be
>> arch/x86/mm/mem_encrypt_identity.c which is kinda related.
> 
> Shouldn't the rest of head64.c have the stack protector disabled, for
> similar reasons?
> 
>> * re: stack: in order to be able to call a C function that early, we'd
>> have to put the VA of the initial stack back into %rsp as we switch
>> pagetables a bit earlier in there (thx Tom).
> 
> Hm, don't you have a stack at the point you added that call? I thought
> you did? It doesn't have to be *the* stack for the AP in question.
> Just "a" stack. And you have the lock on the real-mode one that you're
> using.

Unfortunately RSP has the identity mapped stack value and when the 
pagetable switch is performed the mapping to that stack is lost. It would 
need to be updated to the equivalent of __va(RSP) to get a stack that can 
be used without page faulting.

Thanks,
Tom

> 
>> So by then, doing all that cargo-cult just in order to not have a bunch
>> of lines in asm doesn't sound all that great anymore.
>>
>> * The __head per-function attribute is easily solved by lifting the
>> __head define into a common header.
>>
>> So meh, dunno. I guess we can do the asm thing for now, until a cleaner
>> solution without too many warts presents itself.
> 
> Hm, doesn't most of that just go away (or at least become "Already
> Broken; Someone Else's Problem™") if you just concede to put your new C
> function into head64.c along with a whole bunch of other existing
> CONFIG_AMD_MEM_ENCRYPT support?
> 
> (We still have to fix it if it's Someone Else's Problem, of course.
> It's just that you don't have to count that complexity towards your own
> part.)
  
Borislav Petkov March 27, 2023, 7:32 p.m. UTC | #13
On Mon, Mar 27, 2023 at 07:14:27PM +0100, David Woodhouse wrote:
> Shouldn't the rest of head64.c have the stack protector disabled, for
> similar reasons?

Not aware of any reason to that so far...

> Hm, doesn't most of that just go away (or at least become "Already
> Broken; Someone Else's Problem™") if you just concede to put your new C
> function into head64.c along with a whole bunch of other existing
> CONFIG_AMD_MEM_ENCRYPT support?

If it were only that, maybe, but we have to do the stack __va() thing as
Tom explained. So the jumping-through-hoops just to have a simple
function in C is not worth it... IMNSVHO, that is.

Thx.
  

Patch

diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 49b44f881484..0bab38efb15a 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -129,6 +129,11 @@  u64 cc_mkdec(u64 val)
 }
 EXPORT_SYMBOL_GPL(cc_mkdec);
 
+enum cc_vendor cc_get_vendor(void)
+{
+	return vendor;
+}
+
 __init void cc_set_vendor(enum cc_vendor v)
 {
 	vendor = v;
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 3d98c3a60d34..0428d9712c96 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -12,6 +12,7 @@  enum cc_vendor {
 };
 
 void cc_set_vendor(enum cc_vendor v);
+enum cc_vendor cc_get_vendor(void);
 void cc_set_mask(u64 mask);
 
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b63be696b776..0abf8a39cee1 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -70,6 +70,7 @@ 
 	/* GHCBData[63:12] */				\
 	(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
 
+#ifndef __ASSEMBLY__
 /*
  * SNP Page State Change Operation
  *
@@ -161,6 +162,8 @@  struct snp_psc_desc {
 
 #define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
 
+#endif /* __ASSEMBLY__ */
+
 /*
  * Error codes related to GHCB input that can be communicated back to the guest
  * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index defe76ee9e64..1584f04a7007 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -204,7 +204,10 @@  extern unsigned int smpboot_control;
 /* Control bits for startup_64 */
 #define STARTUP_APICID_CPUID_0B	0x80000000
 #define STARTUP_APICID_CPUID_01	0x40000000
+#define STARTUP_APICID_SEV_ES	0x20000000
 
-#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | STARTUP_APICID_CPUID_0B)
+#define STARTUP_PARALLEL_MASK (STARTUP_APICID_CPUID_01 | \
+			       STARTUP_APICID_CPUID_0B | \
+			       STARTUP_APICID_SEV_ES)
 
 #endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ff3a5f008d8a..9c38849fcac8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -26,6 +26,7 @@ 
 #include <asm/nospec-branch.h>
 #include <asm/fixmap.h>
 #include <asm/smp.h>
+#include <asm/sev-common.h>
 
 /*
  * We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -242,6 +243,7 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	 *
 	 * Bit 31	STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b)
 	 * Bit 30	STARTUP_APICID_CPUID_01 flag (use CPUID 0x01)
+	 * Bit 29	STARTUP_APICID_SEV_ES flag (CPUID 0x0b via GHCB MSR)
 	 * Bit 0-24	CPU# if STARTUP_APICID_CPUID_xx flags are not set
 	 */
 	movl	smpboot_control(%rip), %ecx
@@ -249,6 +251,10 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	jnz	.Luse_cpuid_0b
 	testl	$STARTUP_APICID_CPUID_01, %ecx
 	jnz	.Luse_cpuid_01
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	testl	$STARTUP_APICID_SEV_ES, %ecx
+	jnz	.Luse_sev_cpuid_0b
+#endif
 	andl	$0x0FFFFFFF, %ecx
 	jmp	.Lsetup_cpu
 
@@ -259,6 +265,30 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	shr	$24, %edx
 	jmp	.Lsetup_AP
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+.Luse_sev_cpuid_0b:
+	/* Set the GHCB MSR to request CPUID 0xB_EDX */
+	movl	$MSR_AMD64_SEV_ES_GHCB, %ecx
+	movl	$(GHCB_CPUID_REQ_EDX << 30) | GHCB_MSR_CPUID_REQ, %eax
+	movl	$0x0B, %edx
+	wrmsr
+
+	/* Perform GHCB MSR protocol */
+	rep; vmmcall		/* vmgexit */
+
+	/*
+	 * Get the result. After the RDMSR:
+	 *   EAX should be 0xc0000005
+	 *   EDX should have the CPUID register value and since EDX
+	 *   is the target register, no need to move the result.
+	 */
+	rdmsr
+	andl	$GHCB_MSR_INFO_MASK, %eax
+	cmpl	$GHCB_MSR_CPUID_RESP, %eax
+	jne	1f
+	jmp	.Lsetup_AP
+#endif
+
 .Luse_cpuid_0b:
 	mov	$0x0B, %eax
 	xorl	%ecx, %ecx
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0dba5c247be0..ef37356ab695 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -85,6 +85,7 @@ 
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 #include <asm/sev.h>
+#include <asm/coco.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -1513,15 +1514,36 @@  void __init smp_prepare_cpus_common(void)
  * We can do 64-bit AP bringup in parallel if the CPU reports its APIC
  * ID in CPUID (either leaf 0x0B if we need the full APIC ID in X2APIC
  * mode, or leaf 0x01 if 8 bits are sufficient). Otherwise it's too
- * hard. And not for SEV-ES guests because they can't use CPUID that
- * early.
+ * hard.
  */
 static bool prepare_parallel_bringup(void)
 {
-	if (IS_ENABLED(CONFIG_X86_32) || cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+	bool has_sev_es = false;
+
+	if (IS_ENABLED(CONFIG_X86_32))
 		return false;
 
-	if (x2apic_mode) {
+	/*
+	 * Encrypted guests other than SEV-ES (in the future) will need to
+	 * implement an early way of finding the APIC ID, since they will
+	 * presumably block direct CPUID too. Be kind to our future selves
+	 * by warning here instead of just letting them break. Parallel
+	 * startup doesn't have to be in the first round of enabling patches
+	 * for any such technology.
+	 */
+	if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
+		switch (cc_get_vendor()) {
+		case CC_VENDOR_AMD:
+			has_sev_es = true;
+			break;
+
+		default:
+			pr_info("Disabling parallel bringup due to guest state encryption\n");
+			return false;
+		}
+	}
+
+	if (x2apic_mode || has_sev_es) {
 		if (boot_cpu_data.cpuid_level < 0x0b)
 			return false;
 
@@ -1530,8 +1552,13 @@  static bool prepare_parallel_bringup(void)
 			return false;
 		}
 
-		pr_debug("Using CPUID 0xb for parallel CPU startup\n");
-		smpboot_control = STARTUP_APICID_CPUID_0B;
+		if (has_sev_es) {
+			pr_debug("Using SEV-ES CPUID 0xb for parallel CPU startup\n");
+			smpboot_control = STARTUP_APICID_SEV_ES;
+		} else {
+			pr_debug("Using CPUID 0xb for parallel CPU startup\n");
+			smpboot_control = STARTUP_APICID_CPUID_0B;
+		}
 	} else {
 		/* Without X2APIC, what's in CPUID 0x01 should suffice. */
 		if (boot_cpu_data.cpuid_level < 0x01)