[v6,09/14] x86: Secure Launch SMP bringup support

Message ID 20230504145023.835096-10-ross.philipson@oracle.com
State New
Headers
Series x86: Trenchboot secure dynamic launch Linux kernel support |

Commit Message

Ross Philipson May 4, 2023, 2:50 p.m. UTC
  On Intel, the APs are left in a well documented state after TXT performs
the late launch. Specifically they cannot have #INIT asserted on them so
a standard startup via INIT/SIPI/SIPI cannot be performed. Instead the
early SL stub code parked the APs in a pause/jmp loop waiting for an NMI.
The modified SMP boot code is called for the Secure Launch case. The
jump address for the RM piggy entry point is fixed up in the jump where
the APs are waiting and an NMI IPI is sent to the AP. The AP vectors to
the Secure Launch entry point in the RM piggy which mimics what the real
mode code would do then jumps to the standard RM piggy protected mode
entry point.

Signed-off-by: Ross Philipson <ross.philipson@oracle.com>
---
 arch/x86/include/asm/realmode.h      |  3 ++
 arch/x86/kernel/smpboot.c            | 86 ++++++++++++++++++++++++++++++++++++
 arch/x86/realmode/rm/header.S        |  3 ++
 arch/x86/realmode/rm/trampoline_64.S | 37 ++++++++++++++++
 4 files changed, 129 insertions(+)
  

Comments

Simon Horman May 5, 2023, 5:54 p.m. UTC | #1
On Thu, May 04, 2023 at 02:50:18PM +0000, Ross Philipson wrote:
> On Intel, the APs are left in a well documented state after TXT performs
> the late launch. Specifically they cannot have #INIT asserted on them so
> a standard startup via INIT/SIPI/SIPI cannot be performed. Instead the
> early SL stub code parked the APs in a pause/jmp loop waiting for an NMI.
> The modified SMP boot code is called for the Secure Launch case. The
> jump address for the RM piggy entry point is fixed up in the jump where
> the APs are waiting and an NMI IPI is sent to the AP. The AP vectors to
> the Secure Launch entry point in the RM piggy which mimics what the real
> mode code would do then jumps to the standard RM piggy protected mode
> entry point.
> 
> Signed-off-by: Ross Philipson <ross.philipson@oracle.com>

Hi Ross,

just one minor nit on this one.

>  /*
>   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>   * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>  	smp_mb();
>  
> +	/* With Intel TXT, the AP startup is totally different */
> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==

nit: spaces around '|'

> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
> +		goto txt_wake;
> +	}
  
Ross Philipson May 5, 2023, 6:59 p.m. UTC | #2
On 5/5/23 13:54, Simon Horman wrote:
> On Thu, May 04, 2023 at 02:50:18PM +0000, Ross Philipson wrote:
>> On Intel, the APs are left in a well documented state after TXT performs
>> the late launch. Specifically they cannot have #INIT asserted on them so
>> a standard startup via INIT/SIPI/SIPI cannot be performed. Instead the
>> early SL stub code parked the APs in a pause/jmp loop waiting for an NMI.
>> The modified SMP boot code is called for the Secure Launch case. The
>> jump address for the RM piggy entry point is fixed up in the jump where
>> the APs are waiting and an NMI IPI is sent to the AP. The AP vectors to
>> the Secure Launch entry point in the RM piggy which mimics what the real
>> mode code would do then jumps to the standard RM piggy protected mode
>> entry point.
>>
>> Signed-off-by: Ross Philipson <ross.philipson@oracle.com>
> 
> Hi Ross,
> 
> just one minor nit on this one.

Will fix, thanks.
Ross

> 
>>   /*
>>    * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>>    * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
>> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>>   	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>>   	smp_mb();
>>   
>> +	/* With Intel TXT, the AP startup is totally different */
>> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
> 
> nit: spaces around '|'
> 
>> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
>> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
>> +		goto txt_wake;
>> +	}
  
Jarkko Sakkinen May 10, 2023, 10:55 p.m. UTC | #3
On Thu May 4, 2023 at 5:50 PM EEST, Ross Philipson wrote:
> On Intel, the APs are left in a well documented state after TXT performs
> the late launch. Specifically they cannot have #INIT asserted on them so
> a standard startup via INIT/SIPI/SIPI cannot be performed. Instead the
> early SL stub code parked the APs in a pause/jmp loop waiting for an NMI.
> The modified SMP boot code is called for the Secure Launch case. The
> jump address for the RM piggy entry point is fixed up in the jump where
> the APs are waiting and an NMI IPI is sent to the AP. The AP vectors to
> the Secure Launch entry point in the RM piggy which mimics what the real
> mode code would do then jumps to the standard RM piggy protected mode
> entry point.
>
> Signed-off-by: Ross Philipson <ross.philipson@oracle.com>
> ---
>  arch/x86/include/asm/realmode.h      |  3 ++
>  arch/x86/kernel/smpboot.c            | 86 ++++++++++++++++++++++++++++++++++++
>  arch/x86/realmode/rm/header.S        |  3 ++
>  arch/x86/realmode/rm/trampoline_64.S | 37 ++++++++++++++++
>  4 files changed, 129 insertions(+)
>
> diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
> index f6a1737..576fe62 100644
> --- a/arch/x86/include/asm/realmode.h
> +++ b/arch/x86/include/asm/realmode.h
> @@ -38,6 +38,9 @@ struct real_mode_header {
>  #ifdef CONFIG_X86_64
>  	u32	machine_real_restart_seg;
>  #endif
> +#ifdef CONFIG_SECURE_LAUNCH
> +	u32	sl_trampoline_start32;
> +#endif

Cool I was implementing this relocatable realmode blob back in 2012 :-)

>  };
>  
>  /* This must match data at realmode/rm/trampoline_{32,64}.S */
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 352f0ce..07d740be 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -57,6 +57,7 @@
>  #include <linux/pgtable.h>
>  #include <linux/overflow.h>
>  #include <linux/stackprotector.h>
> +#include <linux/slaunch.h>
>  
>  #include <asm/acpi.h>
>  #include <asm/cacheinfo.h>
> @@ -1068,6 +1069,83 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_SECURE_LAUNCH
> +
> +static atomic_t first_ap_only = {1};

This should be documented.

> +
> +/*
> + * Called to fix the long jump address for the waiting APs to vector to
> + * the correct startup location in the Secure Launch stub in the rmpiggy.
> + */
> +static int
> +slaunch_fixup_jump_vector(void)

Please put the same line.

> +{
> +	struct sl_ap_wake_info *ap_wake_info;
> +	u32 *ap_jmp_ptr = NULL;
> +
> +	if (!atomic_dec_and_test(&first_ap_only))
> +		return 0;
> +
> +	ap_wake_info = slaunch_get_ap_wake_info();
> +
> +	ap_jmp_ptr = (u32 *)__va(ap_wake_info->ap_wake_block +
> +				 ap_wake_info->ap_jmp_offset);
> +
> +	*ap_jmp_ptr = real_mode_header->sl_trampoline_start32;
> +
> +	pr_debug("TXT AP long jump address updated\n");
> +
> +	return 0;
> +}
> +
> +/*
> + * TXT AP startup is quite different than normal. The APs cannot have #INIT
> + * asserted on them or receive SIPIs. The early Secure Launch code has parked
> + * the APs in a pause loop waiting to receive an NMI. This will wake the APs
> + * and have them jump to the protected mode code in the rmpiggy where the rest
> + * of the SMP boot of the AP will proceed normally.
> + */
> +static int
> +slaunch_wakeup_cpu_from_txt(int cpu, int apicid)

Ditto.


> +{
> +	unsigned long send_status = 0, accept_status = 0;

I would put these to separate lines. Maybe a matter of taste but
it is easier to spot initializations.

> +
> +	/* Only done once */
> +	if (slaunch_fixup_jump_vector())
> +		return -1;
> +
> +	/* Send NMI IPI to idling AP and wake it up */
> +	apic_icr_write(APIC_DM_NMI, apicid);
> +
> +	if (init_udelay == 0)
> +		udelay(10);
> +	else
> +		udelay(300);
> +
> +	send_status = safe_apic_wait_icr_idle();
> +
> +	if (init_udelay == 0)
> +		udelay(10);
> +	else
> +		udelay(300);

Magic numbers and no inline comment.

> +
> +	accept_status = (apic_read(APIC_ESR) & 0xEF);
> +
> +	if (send_status)
> +		pr_err("Secure Launch IPI never delivered???\n");
> +	if (accept_status)
> +		pr_err("Secure Launch IPI delivery error (%lx)\n",
> +			accept_status);
> +
> +	return (send_status | accept_status);
> +}
> +
> +#else
> +
> +#define slaunch_wakeup_cpu_from_txt(cpu, apicid)	0
> +
> +#endif  /* !CONFIG_SECURE_LAUNCH */
> +
>  /*
>   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>   * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>  	smp_mb();
>  
> +	/* With Intel TXT, the AP startup is totally different */
> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
> +		goto txt_wake;
> +	}
> +
>  	/*
>  	 * Wake up a CPU in difference cases:
>  	 * - Use a method from the APIC driver if one defined, with wakeup
> @@ -1147,6 +1232,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>  						     cpu0_nmi_registered);
>  
> +txt_wake:
>  	if (!boot_error) {
>  		/*
>  		 * Wait 10s total for first sign of life from AP
> diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
> index 2eb62be..3b5cbcb 100644
> --- a/arch/x86/realmode/rm/header.S
> +++ b/arch/x86/realmode/rm/header.S
> @@ -37,6 +37,9 @@ SYM_DATA_START(real_mode_header)
>  #ifdef CONFIG_X86_64
>  	.long	__KERNEL32_CS
>  #endif
> +#ifdef CONFIG_SECURE_LAUNCH
> +	.long	pa_sl_trampoline_start32
> +#endif
>  SYM_DATA_END(real_mode_header)
>  
>  	/* End signature, used to verify integrity */
> diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
> index e38d61d..8bb4b0d 100644
> --- a/arch/x86/realmode/rm/trampoline_64.S
> +++ b/arch/x86/realmode/rm/trampoline_64.S
> @@ -104,6 +104,43 @@ SYM_CODE_END(sev_es_trampoline_start)
>  
>  	.section ".text32","ax"
>  	.code32
> +#ifdef CONFIG_SECURE_LAUNCH
> +	.balign 4
> +SYM_CODE_START(sl_trampoline_start32)
> +	/*
> +	 * The early secure launch stub AP wakeup code has taken care of all
> +	 * the vagaries of launching out of TXT. This bit just mimics what the
> +	 * 16b entry code does and jumps off to the real startup_32.
> +	 */
> +	cli
> +	wbinvd
> +
> +	/*
> +	 * The %ebx provided is not terribly useful since it is the physical
> +	 * address of tb_trampoline_start and not the base of the image.
> +	 * Use pa_real_mode_base, which is fixed up, to get a run time
> +	 * base register to use for offsets to location that do not have
> +	 * pa_ symbols.
> +	 */
> +	movl    $pa_real_mode_base, %ebx
> +
> +	/*
> +	 * This may seem a little odd but this is what %esp would have had in
> +	 * it on the jmp from real mode because all real mode fixups were done
> +	 * via the code segment. The base is added at the 32b entry.
> +	 */
> +	movl	rm_stack_end, %esp
> +
> +	lgdt    tr_gdt(%ebx)
> +	lidt    tr_idt(%ebx)
> +
> +	movw	$__KERNEL_DS, %dx	# Data segment descriptor
> +
> +	/* Jump to where the 16b code would have jumped */
> +	ljmpl	$__KERNEL32_CS, $pa_startup_32
> +SYM_CODE_END(sl_trampoline_start32)
> +#endif
> +
>  	.balign 4
>  SYM_CODE_START(startup_32)
>  	movl	%edx, %ss
> -- 
> 1.8.3.1


The trampoline_64.S changes look reasonable to me (with a quick look).

BR, Jarkko
  
Ross Philipson May 11, 2023, 4:21 p.m. UTC | #4
On 5/10/23 18:55, Jarkko Sakkinen wrote:
> On Thu May 4, 2023 at 5:50 PM EEST, Ross Philipson wrote:
>> On Intel, the APs are left in a well documented state after TXT performs
>> the late launch. Specifically they cannot have #INIT asserted on them so
>> a standard startup via INIT/SIPI/SIPI cannot be performed. Instead the
>> early SL stub code parked the APs in a pause/jmp loop waiting for an NMI.
>> The modified SMP boot code is called for the Secure Launch case. The
>> jump address for the RM piggy entry point is fixed up in the jump where
>> the APs are waiting and an NMI IPI is sent to the AP. The AP vectors to
>> the Secure Launch entry point in the RM piggy which mimics what the real
>> mode code would do then jumps to the standard RM piggy protected mode
>> entry point.
>>
>> Signed-off-by: Ross Philipson <ross.philipson@oracle.com>
>> ---
>>   arch/x86/include/asm/realmode.h      |  3 ++
>>   arch/x86/kernel/smpboot.c            | 86 ++++++++++++++++++++++++++++++++++++
>>   arch/x86/realmode/rm/header.S        |  3 ++
>>   arch/x86/realmode/rm/trampoline_64.S | 37 ++++++++++++++++
>>   4 files changed, 129 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
>> index f6a1737..576fe62 100644
>> --- a/arch/x86/include/asm/realmode.h
>> +++ b/arch/x86/include/asm/realmode.h
>> @@ -38,6 +38,9 @@ struct real_mode_header {
>>   #ifdef CONFIG_X86_64
>>   	u32	machine_real_restart_seg;
>>   #endif
>> +#ifdef CONFIG_SECURE_LAUNCH
>> +	u32	sl_trampoline_start32;
>> +#endif
> 
> Cool I was implementing this relocatable realmode blob back in 2012 :-)

It is fun stuff :)

> 
>>   };
>>   
>>   /* This must match data at realmode/rm/trampoline_{32,64}.S */
>> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
>> index 352f0ce..07d740be 100644
>> --- a/arch/x86/kernel/smpboot.c
>> +++ b/arch/x86/kernel/smpboot.c
>> @@ -57,6 +57,7 @@
>>   #include <linux/pgtable.h>
>>   #include <linux/overflow.h>
>>   #include <linux/stackprotector.h>
>> +#include <linux/slaunch.h>
>>   
>>   #include <asm/acpi.h>
>>   #include <asm/cacheinfo.h>
>> @@ -1068,6 +1069,83 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
>>   	return 0;
>>   }
>>   
>> +#ifdef CONFIG_SECURE_LAUNCH
>> +
>> +static atomic_t first_ap_only = {1};
> 
> This should be documented.

Will do

> 
>> +
>> +/*
>> + * Called to fix the long jump address for the waiting APs to vector to
>> + * the correct startup location in the Secure Launch stub in the rmpiggy.
>> + */
>> +static int
>> +slaunch_fixup_jump_vector(void)
> 
> Please put the same line.

Ack

> 
>> +{
>> +	struct sl_ap_wake_info *ap_wake_info;
>> +	u32 *ap_jmp_ptr = NULL;
>> +
>> +	if (!atomic_dec_and_test(&first_ap_only))
>> +		return 0;
>> +
>> +	ap_wake_info = slaunch_get_ap_wake_info();
>> +
>> +	ap_jmp_ptr = (u32 *)__va(ap_wake_info->ap_wake_block +
>> +				 ap_wake_info->ap_jmp_offset);
>> +
>> +	*ap_jmp_ptr = real_mode_header->sl_trampoline_start32;
>> +
>> +	pr_debug("TXT AP long jump address updated\n");
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * TXT AP startup is quite different than normal. The APs cannot have #INIT
>> + * asserted on them or receive SIPIs. The early Secure Launch code has parked
>> + * the APs in a pause loop waiting to receive an NMI. This will wake the APs
>> + * and have them jump to the protected mode code in the rmpiggy where the rest
>> + * of the SMP boot of the AP will proceed normally.
>> + */
>> +static int
>> +slaunch_wakeup_cpu_from_txt(int cpu, int apicid)
> 
> Ditto.
>

Ack

> 
>> +{
>> +	unsigned long send_status = 0, accept_status = 0;
> 
> I would put these to separate lines. Maybe a matter of taste but
> it is easier to spot initializations.

Sure

> 
>> +
>> +	/* Only done once */
>> +	if (slaunch_fixup_jump_vector())
>> +		return -1;
>> +
>> +	/* Send NMI IPI to idling AP and wake it up */
>> +	apic_icr_write(APIC_DM_NMI, apicid);
>> +
>> +	if (init_udelay == 0)
>> +		udelay(10);
>> +	else
>> +		udelay(300);
>> +
>> +	send_status = safe_apic_wait_icr_idle();
>> +
>> +	if (init_udelay == 0)
>> +		udelay(10);
>> +	else
>> +		udelay(300);
> 
> Magic numbers and no inline comment.

Much of this was copied as is from another function in this module. They 
did not have comments either. I will have to try to track down what 
motivated the delay logic.

> 
>> +
>> +	accept_status = (apic_read(APIC_ESR) & 0xEF);
>> +
>> +	if (send_status)
>> +		pr_err("Secure Launch IPI never delivered???\n");
>> +	if (accept_status)
>> +		pr_err("Secure Launch IPI delivery error (%lx)\n",
>> +			accept_status);
>> +
>> +	return (send_status | accept_status);
>> +}
>> +
>> +#else
>> +
>> +#define slaunch_wakeup_cpu_from_txt(cpu, apicid)	0
>> +
>> +#endif  /* !CONFIG_SECURE_LAUNCH */
>> +
>>   /*
>>    * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>>    * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
>> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>>   	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>>   	smp_mb();
>>   
>> +	/* With Intel TXT, the AP startup is totally different */
>> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
>> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
>> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
>> +		goto txt_wake;
>> +	}
>> +
>>   	/*
>>   	 * Wake up a CPU in difference cases:
>>   	 * - Use a method from the APIC driver if one defined, with wakeup
>> @@ -1147,6 +1232,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>>   		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>>   						     cpu0_nmi_registered);
>>   
>> +txt_wake:
>>   	if (!boot_error) {
>>   		/*
>>   		 * Wait 10s total for first sign of life from AP
>> diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
>> index 2eb62be..3b5cbcb 100644
>> --- a/arch/x86/realmode/rm/header.S
>> +++ b/arch/x86/realmode/rm/header.S
>> @@ -37,6 +37,9 @@ SYM_DATA_START(real_mode_header)
>>   #ifdef CONFIG_X86_64
>>   	.long	__KERNEL32_CS
>>   #endif
>> +#ifdef CONFIG_SECURE_LAUNCH
>> +	.long	pa_sl_trampoline_start32
>> +#endif
>>   SYM_DATA_END(real_mode_header)
>>   
>>   	/* End signature, used to verify integrity */
>> diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
>> index e38d61d..8bb4b0d 100644
>> --- a/arch/x86/realmode/rm/trampoline_64.S
>> +++ b/arch/x86/realmode/rm/trampoline_64.S
>> @@ -104,6 +104,43 @@ SYM_CODE_END(sev_es_trampoline_start)
>>   
>>   	.section ".text32","ax"
>>   	.code32
>> +#ifdef CONFIG_SECURE_LAUNCH
>> +	.balign 4
>> +SYM_CODE_START(sl_trampoline_start32)
>> +	/*
>> +	 * The early secure launch stub AP wakeup code has taken care of all
>> +	 * the vagaries of launching out of TXT. This bit just mimics what the
>> +	 * 16b entry code does and jumps off to the real startup_32.
>> +	 */
>> +	cli
>> +	wbinvd
>> +
>> +	/*
>> +	 * The %ebx provided is not terribly useful since it is the physical
>> +	 * address of tb_trampoline_start and not the base of the image.
>> +	 * Use pa_real_mode_base, which is fixed up, to get a run time
>> +	 * base register to use for offsets to location that do not have
>> +	 * pa_ symbols.
>> +	 */
>> +	movl    $pa_real_mode_base, %ebx
>> +
>> +	/*
>> +	 * This may seem a little odd but this is what %esp would have had in
>> +	 * it on the jmp from real mode because all real mode fixups were done
>> +	 * via the code segment. The base is added at the 32b entry.
>> +	 */
>> +	movl	rm_stack_end, %esp
>> +
>> +	lgdt    tr_gdt(%ebx)
>> +	lidt    tr_idt(%ebx)
>> +
>> +	movw	$__KERNEL_DS, %dx	# Data segment descriptor
>> +
>> +	/* Jump to where the 16b code would have jumped */
>> +	ljmpl	$__KERNEL32_CS, $pa_startup_32
>> +SYM_CODE_END(sl_trampoline_start32)
>> +#endif
>> +
>>   	.balign 4
>>   SYM_CODE_START(startup_32)
>>   	movl	%edx, %ss
>> -- 
>> 1.8.3.1
> 
> 
> The trampoline_64.S changes look reasonable to me (with a quick look).
> 
> BR, Jarkko

Thanks for the review,
Ross
  
Thomas Gleixner May 12, 2023, 6:02 p.m. UTC | #5
On Thu, May 04 2023 at 14:50, Ross Philipson wrote:
>  
> +#ifdef CONFIG_SECURE_LAUNCH
> +
> +static atomic_t first_ap_only = {1};

ATOMIC_INIT(1) if at all.

> +
> +/*
> + * Called to fix the long jump address for the waiting APs to vector to
> + * the correct startup location in the Secure Launch stub in the rmpiggy.
> + */
> +static int
> +slaunch_fixup_jump_vector(void)

One line please.

> +{
> +	struct sl_ap_wake_info *ap_wake_info;
> +	u32 *ap_jmp_ptr = NULL;
> +
> +	if (!atomic_dec_and_test(&first_ap_only))
> +		return 0;

Why does this need an atomic? CPU bringup is fully serialized and even
with the upcoming parallel bootup work, there is no concurrency on this
function.

Aside of that. Why isn't this initialized during boot in a __init function?

> +	ap_wake_info = slaunch_get_ap_wake_info();
> +
> +	ap_jmp_ptr = (u32 *)__va(ap_wake_info->ap_wake_block +
> +				 ap_wake_info->ap_jmp_offset);
> +
> +	*ap_jmp_ptr = real_mode_header->sl_trampoline_start32;
> +
> +	pr_debug("TXT AP long jump address updated\n");
> +
> +	return 0;

Why does this need a return code of all return paths return 0?

> +}
> +
> +/*
> + * TXT AP startup is quite different than normal. The APs cannot have #INIT
> + * asserted on them or receive SIPIs. The early Secure Launch code has parked
> + * the APs in a pause loop waiting to receive an NMI. This will wake the APs
> + * and have them jump to the protected mode code in the rmpiggy where the rest
> + * of the SMP boot of the AP will proceed normally.
> + */
> +static int
> +slaunch_wakeup_cpu_from_txt(int cpu, int apicid)
> +{
> +	unsigned long send_status = 0, accept_status = 0;
> +
> +	/* Only done once */

Yes. But not here.

> +	if (slaunch_fixup_jump_vector())
> +		return -1;
> +
> +	/* Send NMI IPI to idling AP and wake it up */
> +	apic_icr_write(APIC_DM_NMI, apicid);
> +
> +	if (init_udelay == 0)
> +		udelay(10);
> +	else
> +		udelay(300);

The wonders of copy & pasta. This condition is pointless because this
code only runs on systems which force init_udelay to 0.

> +	send_status = safe_apic_wait_icr_idle();

Moar copy & pasta. As this is guaranteed to be X2APIC mode, this
function is a nop and returns 0 unconditionally.

> +	if (init_udelay == 0)
> +		udelay(10);
> +	else
> +		udelay(300);
> +
> +	accept_status = (apic_read(APIC_ESR) & 0xEF);

The point of this is? Bit 0-3 are Pentium and P6 only.

Bit 4 Tried to send low prio IPI but not supported
Bit 5 Illegal Vector sent
Bit 6 Illegal Vector received
Bit 7 X2APIC illegal register access

IOW, there is no accept error here. That would be bit 2 which is never set
on anything modern

But aside of that the read is moot anyway because the CPU has the APIC
error vector enabled so if this would happen the APIC error interrupt
would have swallowed and cleared the error condition.

IOW. Everything except the apic_icr_write() here is completely useless.

> +#else
> +
> +#define slaunch_wakeup_cpu_from_txt(cpu, apicid)	0

inline stub please. 

> +
> +#endif  /* !CONFIG_SECURE_LAUNCH */
> +
>  /*
>   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>   * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>  	smp_mb();
>  
> +	/* With Intel TXT, the AP startup is totally different */
> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {

Stick this condition into a helper function please

> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
> +		goto txt_wake;
> +	}
> +
>  	/*
>  	 * Wake up a CPU in difference cases:
>  	 * - Use a method from the APIC driver if one defined, with wakeup
> @@ -1147,6 +1232,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>  						     cpu0_nmi_registered);
>  
> +txt_wake:

Sorry, but what has this to do with TXT ? And why can't the above just
be yet another if clause in the existing if/else if maze?

Now that brings me to another question. How is this supposed to work
with CPU hotplug post boot?

It will simply not work at all because once a CPU is offlined it is
going to sit in an endless loop and wait for INIT/SIPI/SIPI. So it will
get that NMI and go back to wait.

So you need a TXT specific cpu_play_dead() implementation, which should
preferrably use monitor/mwait where each "offline" CPU sits and waits
until a condition becomes true. Then you don't need a NMI for wakeup at
all. Just writing the condition into that per CPU cache line should be
enough.

Thanks,

        tglx
  
Ross Philipson May 15, 2023, 8:19 p.m. UTC | #6
On 5/12/23 14:02, Thomas Gleixner wrote:
> On Thu, May 04 2023 at 14:50, Ross Philipson wrote:
>>   
>> +#ifdef CONFIG_SECURE_LAUNCH
>> +
>> +static atomic_t first_ap_only = {1};
> 
> ATOMIC_INIT(1) if at all.
> 
>> +
>> +/*
>> + * Called to fix the long jump address for the waiting APs to vector to
>> + * the correct startup location in the Secure Launch stub in the rmpiggy.
>> + */
>> +static int
>> +slaunch_fixup_jump_vector(void)
> 
> One line please.
> 
>> +{
>> +	struct sl_ap_wake_info *ap_wake_info;
>> +	u32 *ap_jmp_ptr = NULL;
>> +
>> +	if (!atomic_dec_and_test(&first_ap_only))
>> +		return 0;
> 
> Why does this need an atomic? CPU bringup is fully serialized and even
> with the upcoming parallel bootup work, there is no concurrency on this
> function.
> 
> Aside of that. Why isn't this initialized during boot in a __init function?
> 
>> +	ap_wake_info = slaunch_get_ap_wake_info();
>> +
>> +	ap_jmp_ptr = (u32 *)__va(ap_wake_info->ap_wake_block +
>> +				 ap_wake_info->ap_jmp_offset);
>> +
>> +	*ap_jmp_ptr = real_mode_header->sl_trampoline_start32;
>> +
>> +	pr_debug("TXT AP long jump address updated\n");
>> +
>> +	return 0;
> 
> Why does this need a return code of all return paths return 0?
> 
>> +}
>> +
>> +/*
>> + * TXT AP startup is quite different than normal. The APs cannot have #INIT
>> + * asserted on them or receive SIPIs. The early Secure Launch code has parked
>> + * the APs in a pause loop waiting to receive an NMI. This will wake the APs
>> + * and have them jump to the protected mode code in the rmpiggy where the rest
>> + * of the SMP boot of the AP will proceed normally.
>> + */
>> +static int
>> +slaunch_wakeup_cpu_from_txt(int cpu, int apicid)
>> +{
>> +	unsigned long send_status = 0, accept_status = 0;
>> +
>> +	/* Only done once */
> 
> Yes. But not here.
> 
>> +	if (slaunch_fixup_jump_vector())
>> +		return -1;
>> +
>> +	/* Send NMI IPI to idling AP and wake it up */
>> +	apic_icr_write(APIC_DM_NMI, apicid);
>> +
>> +	if (init_udelay == 0)
>> +		udelay(10);
>> +	else
>> +		udelay(300);
> 
> The wonders of copy & pasta. This condition is pointless because this
> code only runs on systems which force init_udelay to 0.
> 
>> +	send_status = safe_apic_wait_icr_idle();
> 
> Moar copy & pasta. As this is guaranteed to be X2APIC mode, this
> function is a nop and returns 0 unconditionally.
> 
>> +	if (init_udelay == 0)
>> +		udelay(10);
>> +	else
>> +		udelay(300);
>> +
>> +	accept_status = (apic_read(APIC_ESR) & 0xEF);
> 
> The point of this is? Bit 0-3 are Pentium and P6 only.
> 
> Bit 4 Tried to send low prio IPI but not supported
> Bit 5 Illegal Vector sent
> Bit 6 Illegal Vector received
> Bit 7 X2APIC illegal register access
> 
> IOW, there is no accept error here. That would be bit 2 which is never set
> on anything modern
> 
> But aside of that the read is moot anyway because the CPU has the APIC
> error vector enabled so if this would happen the APIC error interrupt
> would have swallowed and cleared the error condition.
> 
> IOW. Everything except the apic_icr_write() here is completely useless.
> 
>> +#else
>> +
>> +#define slaunch_wakeup_cpu_from_txt(cpu, apicid)	0
> 
> inline stub please.
> 
>> +
>> +#endif  /* !CONFIG_SECURE_LAUNCH */
>> +
>>   /*
>>    * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
>>    * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
>> @@ -1132,6 +1210,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>>   	cpumask_clear_cpu(cpu, cpu_initialized_mask);
>>   	smp_mb();
>>   
>> +	/* With Intel TXT, the AP startup is totally different */
>> +	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
>> +	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
> 
> Stick this condition into a helper function please
> 
>> +		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
>> +		goto txt_wake;
>> +	}
>> +
>>   	/*
>>   	 * Wake up a CPU in difference cases:
>>   	 * - Use a method from the APIC driver if one defined, with wakeup
>> @@ -1147,6 +1232,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>>   		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>>   						     cpu0_nmi_registered);
>>   
>> +txt_wake:
> 
> Sorry, but what has this to do with TXT ? And why can't the above just
> be yet another if clause in the existing if/else if maze?
> 
> Now that brings me to another question. How is this supposed to work
> with CPU hotplug post boot?
> 
> It will simply not work at all because once a CPU is offlined it is
> going to sit in an endless loop and wait for INIT/SIPI/SIPI. So it will
> get that NMI and go back to wait.
> 
> So you need a TXT specific cpu_play_dead() implementation, which should
> preferrably use monitor/mwait where each "offline" CPU sits and waits
> until a condition becomes true. Then you don't need a NMI for wakeup at
> all. Just writing the condition into that per CPU cache line should be
> enough.
> 
> Thanks,
> 
>          tglx
> 
There is a lot here to think about. It sounds like you are suggesting we 
design all of this differently and we can definitely do that. We need 
time to go over this and your parallel startup series before we can 
really get down to how best to approach this.

I am going on vacation and will be back the first week of June. I will 
get back to you then once I have had time to go over all of this and 
your other patches.

Thank you for all your responses.

Ross
  

Patch

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index f6a1737..576fe62 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -38,6 +38,9 @@  struct real_mode_header {
 #ifdef CONFIG_X86_64
 	u32	machine_real_restart_seg;
 #endif
+#ifdef CONFIG_SECURE_LAUNCH
+	u32	sl_trampoline_start32;
+#endif
 };
 
 /* This must match data at realmode/rm/trampoline_{32,64}.S */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 352f0ce..07d740be 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,6 +57,7 @@ 
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
 #include <linux/stackprotector.h>
+#include <linux/slaunch.h>
 
 #include <asm/acpi.h>
 #include <asm/cacheinfo.h>
@@ -1068,6 +1069,83 @@  int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
+#ifdef CONFIG_SECURE_LAUNCH
+
+static atomic_t first_ap_only = {1};
+
+/*
+ * Called to fix the long jump address for the waiting APs to vector to
+ * the correct startup location in the Secure Launch stub in the rmpiggy.
+ */
+static int
+slaunch_fixup_jump_vector(void)
+{
+	struct sl_ap_wake_info *ap_wake_info;
+	u32 *ap_jmp_ptr = NULL;
+
+	if (!atomic_dec_and_test(&first_ap_only))
+		return 0;
+
+	ap_wake_info = slaunch_get_ap_wake_info();
+
+	ap_jmp_ptr = (u32 *)__va(ap_wake_info->ap_wake_block +
+				 ap_wake_info->ap_jmp_offset);
+
+	*ap_jmp_ptr = real_mode_header->sl_trampoline_start32;
+
+	pr_debug("TXT AP long jump address updated\n");
+
+	return 0;
+}
+
+/*
+ * TXT AP startup is quite different than normal. The APs cannot have #INIT
+ * asserted on them or receive SIPIs. The early Secure Launch code has parked
+ * the APs in a pause loop waiting to receive an NMI. This will wake the APs
+ * and have them jump to the protected mode code in the rmpiggy where the rest
+ * of the SMP boot of the AP will proceed normally.
+ */
+static int
+slaunch_wakeup_cpu_from_txt(int cpu, int apicid)
+{
+	unsigned long send_status = 0, accept_status = 0;
+
+	/* Only done once */
+	if (slaunch_fixup_jump_vector())
+		return -1;
+
+	/* Send NMI IPI to idling AP and wake it up */
+	apic_icr_write(APIC_DM_NMI, apicid);
+
+	if (init_udelay == 0)
+		udelay(10);
+	else
+		udelay(300);
+
+	send_status = safe_apic_wait_icr_idle();
+
+	if (init_udelay == 0)
+		udelay(10);
+	else
+		udelay(300);
+
+	accept_status = (apic_read(APIC_ESR) & 0xEF);
+
+	if (send_status)
+		pr_err("Secure Launch IPI never delivered???\n");
+	if (accept_status)
+		pr_err("Secure Launch IPI delivery error (%lx)\n",
+			accept_status);
+
+	return (send_status | accept_status);
+}
+
+#else
+
+#define slaunch_wakeup_cpu_from_txt(cpu, apicid)	0
+
+#endif  /* !CONFIG_SECURE_LAUNCH */
+
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -1132,6 +1210,13 @@  static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	smp_mb();
 
+	/* With Intel TXT, the AP startup is totally different */
+	if ((slaunch_get_flags() & (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) ==
+	   (SL_FLAG_ACTIVE|SL_FLAG_ARCH_TXT)) {
+		boot_error = slaunch_wakeup_cpu_from_txt(cpu, apicid);
+		goto txt_wake;
+	}
+
 	/*
 	 * Wake up a CPU in difference cases:
 	 * - Use a method from the APIC driver if one defined, with wakeup
@@ -1147,6 +1232,7 @@  static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
 						     cpu0_nmi_registered);
 
+txt_wake:
 	if (!boot_error) {
 		/*
 		 * Wait 10s total for first sign of life from AP
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 2eb62be..3b5cbcb 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -37,6 +37,9 @@  SYM_DATA_START(real_mode_header)
 #ifdef CONFIG_X86_64
 	.long	__KERNEL32_CS
 #endif
+#ifdef CONFIG_SECURE_LAUNCH
+	.long	pa_sl_trampoline_start32
+#endif
 SYM_DATA_END(real_mode_header)
 
 	/* End signature, used to verify integrity */
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index e38d61d..8bb4b0d 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -104,6 +104,43 @@  SYM_CODE_END(sev_es_trampoline_start)
 
 	.section ".text32","ax"
 	.code32
+#ifdef CONFIG_SECURE_LAUNCH
+	.balign 4
+SYM_CODE_START(sl_trampoline_start32)
+	/*
+	 * The early secure launch stub AP wakeup code has taken care of all
+	 * the vagaries of launching out of TXT. This bit just mimics what the
+	 * 16b entry code does and jumps off to the real startup_32.
+	 */
+	cli
+	wbinvd
+
+	/*
+	 * The %ebx provided is not terribly useful since it is the physical
+	 * address of tb_trampoline_start and not the base of the image.
+	 * Use pa_real_mode_base, which is fixed up, to get a run time
+	 * base register to use for offsets to location that do not have
+	 * pa_ symbols.
+	 */
+	movl    $pa_real_mode_base, %ebx
+
+	/*
+	 * This may seem a little odd but this is what %esp would have had in
+	 * it on the jmp from real mode because all real mode fixups were done
+	 * via the code segment. The base is added at the 32b entry.
+	 */
+	movl	rm_stack_end, %esp
+
+	lgdt    tr_gdt(%ebx)
+	lidt    tr_idt(%ebx)
+
+	movw	$__KERNEL_DS, %dx	# Data segment descriptor
+
+	/* Jump to where the 16b code would have jumped */
+	ljmpl	$__KERNEL32_CS, $pa_startup_32
+SYM_CODE_END(sl_trampoline_start32)
+#endif
+
 	.balign 4
 SYM_CODE_START(startup_32)
 	movl	%edx, %ss