[v3,4/6] x86/gsseg: move local_irq_save/restore() into asm_load_gs_index()

Message ID 20221013200134.1487-5-xin3.li@intel.com
State New
Headers
Series Enable LKGS instruction |

Commit Message

Li, Xin3 Oct. 13, 2022, 8:01 p.m. UTC
  From: "H. Peter Anvin (Intel)" <hpa@zytor.com>

The need to disable/enable interrupts around asm_load_gs_index() is a
consequence of the implementation. Prepare for using the LKGS
instruction, which is locally atomic and therefore doesn't need
interrupts disabled.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/entry/entry_64.S            | 26 +++++++++++++++++++++-----
 arch/x86/include/asm/special_insns.h |  4 ----
 2 files changed, 21 insertions(+), 9 deletions(-)
  

Comments

Thomas Gleixner Oct. 15, 2022, 8:51 a.m. UTC | #1
On Thu, Oct 13 2022 at 13:01, Xin Li wrote:
> From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
>
> The need to disable/enable interrupts around asm_load_gs_index() is a
> consequence of the implementation. Prepare for using the LKGS
> instruction, which is locally atomic and therefore doesn't need
> interrupts disabled.

*Shudder*. We want less ASM not more.

>  static inline void native_load_gs_index(unsigned int selector)
>  {
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
>  	asm_load_gs_index(selector);
> -	local_irq_restore(flags);
>  }

static inline void native_load_gs_index(unsigned int selector)
{
	unsigned long flags;

        if (cpu_feature_enabled(LKGS)) {
              native_lkgs(selector);
        } else {
	      local_irq_save(flags);
              asm_load_gs_index(selector);
	      local_irq_restore(flags);
       }
}

For paravirt enabled kernels we want during feature detection:

        if (cpu_feature_enabled(LKGS)))
        	pv_ops.cpu.load_gs_index = native_lkgs;

No?

Thanks,

        tglx
  
Li, Xin3 Oct. 18, 2022, 5:25 p.m. UTC | #2
> >  static inline void native_load_gs_index(unsigned int selector)  {
> > -	unsigned long flags;
> > -
> > -	local_irq_save(flags);
> >  	asm_load_gs_index(selector);
> > -	local_irq_restore(flags);
> >  }
> 
> static inline void native_load_gs_index(unsigned int selector) {
> 	unsigned long flags;
> 
>         if (cpu_feature_enabled(LKGS)) {
>               native_lkgs(selector);
>         } else {
> 	      local_irq_save(flags);
>               asm_load_gs_index(selector);
> 	      local_irq_restore(flags);
>        }
> }
> 
> For paravirt enabled kernels we want during feature detection:
> 
>         if (cpu_feature_enabled(LKGS)))
>         	pv_ops.cpu.load_gs_index = native_lkgs;

If we use static_cpu_has in native_load_gs_index
       if (static_cpu_has(X86_FEATURE_LKGS)) {
               native_lkgs(selector);
       }

We don't have to change pv_ops.cpu.load_gs_index, right?

Thanks!
Xin

> 
> No?
> 
> Thanks,
> 
>         tglx
  
H. Peter Anvin Oct. 18, 2022, 6:13 p.m. UTC | #3
On October 18, 2022 10:25:31 AM PDT, "Li, Xin3" <xin3.li@intel.com> wrote:
>> >  static inline void native_load_gs_index(unsigned int selector)  {
>> > -	unsigned long flags;
>> > -
>> > -	local_irq_save(flags);
>> >  	asm_load_gs_index(selector);
>> > -	local_irq_restore(flags);
>> >  }
>> 
>> static inline void native_load_gs_index(unsigned int selector) {
>> 	unsigned long flags;
>> 
>>         if (cpu_feature_enabled(LKGS)) {
>>               native_lkgs(selector);
>>         } else {
>> 	      local_irq_save(flags);
>>               asm_load_gs_index(selector);
>> 	      local_irq_restore(flags);
>>        }
>> }
>> 
>> For paravirt enabled kernels we want during feature detection:
>> 
>>         if (cpu_feature_enabled(LKGS)))
>>         	pv_ops.cpu.load_gs_index = native_lkgs;
>
>If we use static_cpu_has in native_load_gs_index
>       if (static_cpu_has(X86_FEATURE_LKGS)) {
>               native_lkgs(selector);
>       }
>
>We don't have to change pv_ops.cpu.load_gs_index, right?
>
>Thanks!
>Xin
>
>> 
>> No?
>> 
>> Thanks,
>> 
>>         tglx
>

You don't *have* to, but it would mean a branch to a branch, so it would be more efficient. It would strictly be an optimization.
  
Li, Xin3 Oct. 18, 2022, 9:29 p.m. UTC | #4
> On October 18, 2022 10:25:31 AM PDT, "Li, Xin3" <xin3.li@intel.com> wrote:
> >> >  static inline void native_load_gs_index(unsigned int selector)  {
> >> > -	unsigned long flags;
> >> > -
> >> > -	local_irq_save(flags);
> >> >  	asm_load_gs_index(selector);
> >> > -	local_irq_restore(flags);
> >> >  }
> >>
> >> static inline void native_load_gs_index(unsigned int selector) {
> >> 	unsigned long flags;
> >>
> >>         if (cpu_feature_enabled(LKGS)) {
> >>               native_lkgs(selector);
> >>         } else {
> >> 	      local_irq_save(flags);
> >>               asm_load_gs_index(selector);
> >> 	      local_irq_restore(flags);
> >>        }
> >> }
> >>
> >> For paravirt enabled kernels we want during feature detection:
> >>
> >>         if (cpu_feature_enabled(LKGS)))
> >>         	pv_ops.cpu.load_gs_index = native_lkgs;
> >
> >If we use static_cpu_has in native_load_gs_index
> >       if (static_cpu_has(X86_FEATURE_LKGS)) {
> >               native_lkgs(selector);
> >       }
> >
> >We don't have to change pv_ops.cpu.load_gs_index, right?
> >
> >Thanks!
> >Xin
> >
> >>
> >> No?
> >>
> >> Thanks,
> >>
> >>         tglx
> >
> 
> You don't *have* to, but it would mean a branch to a branch, so it would be
> more efficient. It would strictly be an optimization.

Right, the generated object file shows that static_cpu_has is less efficient than ALTERNATIVE.
  

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index e0c48998d2fb..cc6ba6672418 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -778,19 +778,35 @@  SYM_CODE_END(common_interrupt_return)
 _ASM_NOKPROBE(common_interrupt_return)
 
 /*
- * Reload gs selector with exception handling
+ * Reload gs selector with exception handling. This is used only on
+ * native, so using swapgs, pushf, popf, cli, sti, ... directly is fine.
+ *
  *  di:  new selector
+ * rax:  scratch register
  *
  * Is in entry.text as it shouldn't be instrumented.
+ *
+ * Note: popf is slow, so use pushf to read IF and then execute cli/sti
+ * if necessary.
  */
 SYM_FUNC_START(asm_load_gs_index)
 	FRAME_BEGIN
+	pushf
+	pop	%rax
+	andl	$X86_EFLAGS_IF, %eax	/* Interrupts enabled? */
+	jz	1f
+	cli
+1:
 	swapgs
 .Lgs_change:
 	ANNOTATE_NOENDBR // error_entry
 	movl	%edi, %gs
 2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	swapgs
+	testl	%eax, %eax
+	jz	3f
+	sti
+3:
 	FRAME_END
 	RET
 
@@ -799,12 +815,12 @@  SYM_FUNC_START(asm_load_gs_index)
 	swapgs					/* switch back to user gs */
 .macro ZAP_GS
 	/* This can't be a string because the preprocessor needs to see it. */
-	movl $__USER_DS, %eax
-	movl %eax, %gs
+	movl $__USER_DS, %edi
+	movl %edi, %gs
 .endm
 	ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
-	xorl	%eax, %eax
-	movl	%eax, %gs
+	xorl	%edi, %edi
+	movl	%edi, %gs
 	jmp	2b
 
 	_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index a71d0e8d4684..6de00dec6564 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -124,11 +124,7 @@  extern asmlinkage void asm_load_gs_index(u16 selector);
 
 static inline void native_load_gs_index(unsigned int selector)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	asm_load_gs_index(selector);
-	local_irq_restore(flags);
 }
 
 static inline unsigned long __read_cr4(void)