[v3,4/6] x86/gsseg: move local_irq_save/restore() into asm_load_gs_index()
Commit Message
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
The need to disable/enable interrupts around asm_load_gs_index() is a
consequence of the implementation. Prepare for using the LKGS
instruction, which is locally atomic and therefore doesn't need
interrupts disabled.
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
arch/x86/entry/entry_64.S | 26 +++++++++++++++++++++-----
arch/x86/include/asm/special_insns.h | 4 ----
2 files changed, 21 insertions(+), 9 deletions(-)
Comments
On Thu, Oct 13 2022 at 13:01, Xin Li wrote:
> From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
>
> The need to disable/enable interrupts around asm_load_gs_index() is a
> consequence of the implementation. Prepare for using the LKGS
> instruction, which is locally atomic and therefore doesn't need
> interrupts disabled.
*Shudder*. We want less ASM not more.
> static inline void native_load_gs_index(unsigned int selector)
> {
> - unsigned long flags;
> -
> - local_irq_save(flags);
> asm_load_gs_index(selector);
> - local_irq_restore(flags);
> }
static inline void native_load_gs_index(unsigned int selector)
{
unsigned long flags;
if (cpu_feature_enabled(LKGS)) {
native_lkgs(selector);
} else {
local_irq_save(flags);
asm_load_gs_index(selector);
local_irq_restore(flags);
}
}
For paravirt enabled kernels we want during feature detection:
if (cpu_feature_enabled(LKGS)))
pv_ops.cpu.load_gs_index = native_lkgs;
No?
Thanks,
tglx
> > static inline void native_load_gs_index(unsigned int selector) {
> > - unsigned long flags;
> > -
> > - local_irq_save(flags);
> > asm_load_gs_index(selector);
> > - local_irq_restore(flags);
> > }
>
> static inline void native_load_gs_index(unsigned int selector) {
> unsigned long flags;
>
> if (cpu_feature_enabled(LKGS)) {
> native_lkgs(selector);
> } else {
> local_irq_save(flags);
> asm_load_gs_index(selector);
> local_irq_restore(flags);
> }
> }
>
> For paravirt enabled kernels we want during feature detection:
>
> if (cpu_feature_enabled(LKGS)))
> pv_ops.cpu.load_gs_index = native_lkgs;
If we use static_cpu_has in native_load_gs_index
if (static_cpu_has(X86_FEATURE_LKGS)) {
native_lkgs(selector);
}
We don't have to change pv_ops.cpu.load_gs_index, right?
Thanks!
Xin
>
> No?
>
> Thanks,
>
> tglx
On October 18, 2022 10:25:31 AM PDT, "Li, Xin3" <xin3.li@intel.com> wrote:
>> > static inline void native_load_gs_index(unsigned int selector) {
>> > - unsigned long flags;
>> > -
>> > - local_irq_save(flags);
>> > asm_load_gs_index(selector);
>> > - local_irq_restore(flags);
>> > }
>>
>> static inline void native_load_gs_index(unsigned int selector) {
>> unsigned long flags;
>>
>> if (cpu_feature_enabled(LKGS)) {
>> native_lkgs(selector);
>> } else {
>> local_irq_save(flags);
>> asm_load_gs_index(selector);
>> local_irq_restore(flags);
>> }
>> }
>>
>> For paravirt enabled kernels we want during feature detection:
>>
>> if (cpu_feature_enabled(LKGS)))
>> pv_ops.cpu.load_gs_index = native_lkgs;
>
>If we use static_cpu_has in native_load_gs_index
> if (static_cpu_has(X86_FEATURE_LKGS)) {
> native_lkgs(selector);
> }
>
>We don't have to change pv_ops.cpu.load_gs_index, right?
>
>Thanks!
>Xin
>
>>
>> No?
>>
>> Thanks,
>>
>> tglx
>
You don't *have* to, but it would mean a branch to a branch, so it would be more efficient. It would strictly be an optimization.
> On October 18, 2022 10:25:31 AM PDT, "Li, Xin3" <xin3.li@intel.com> wrote:
> >> > static inline void native_load_gs_index(unsigned int selector) {
> >> > - unsigned long flags;
> >> > -
> >> > - local_irq_save(flags);
> >> > asm_load_gs_index(selector);
> >> > - local_irq_restore(flags);
> >> > }
> >>
> >> static inline void native_load_gs_index(unsigned int selector) {
> >> unsigned long flags;
> >>
> >> if (cpu_feature_enabled(LKGS)) {
> >> native_lkgs(selector);
> >> } else {
> >> local_irq_save(flags);
> >> asm_load_gs_index(selector);
> >> local_irq_restore(flags);
> >> }
> >> }
> >>
> >> For paravirt enabled kernels we want during feature detection:
> >>
> >> if (cpu_feature_enabled(LKGS)))
> >> pv_ops.cpu.load_gs_index = native_lkgs;
> >
> >If we use static_cpu_has in native_load_gs_index
> > if (static_cpu_has(X86_FEATURE_LKGS)) {
> > native_lkgs(selector);
> > }
> >
> >We don't have to change pv_ops.cpu.load_gs_index, right?
> >
> >Thanks!
> >Xin
> >
> >>
> >> No?
> >>
> >> Thanks,
> >>
> >> tglx
> >
>
> You don't *have* to, but it would mean a branch to a branch, so it would be
> more efficient. It would strictly be an optimization.
Right, the generated object file shows that static_cpu_has is less efficient than ALTERNATIVE.
@@ -778,19 +778,35 @@ SYM_CODE_END(common_interrupt_return)
_ASM_NOKPROBE(common_interrupt_return)
/*
- * Reload gs selector with exception handling
+ * Reload gs selector with exception handling. This is used only on
+ * native, so using swapgs, pushf, popf, cli, sti, ... directly is fine.
+ *
* di: new selector
+ * rax: scratch register
*
* Is in entry.text as it shouldn't be instrumented.
+ *
+ * Note: popf is slow, so use pushf to read IF and then execute cli/sti
+ * if necessary.
*/
SYM_FUNC_START(asm_load_gs_index)
FRAME_BEGIN
+ pushf
+ pop %rax
+ andl $X86_EFLAGS_IF, %eax /* Interrupts enabled? */
+ jz 1f
+ cli
+1:
swapgs
.Lgs_change:
ANNOTATE_NOENDBR // error_entry
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
swapgs
+ testl %eax, %eax
+ jz 3f
+ sti
+3:
FRAME_END
RET
@@ -799,12 +815,12 @@ SYM_FUNC_START(asm_load_gs_index)
swapgs /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
- movl $__USER_DS, %eax
- movl %eax, %gs
+ movl $__USER_DS, %edi
+ movl %edi, %gs
.endm
ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
- xorl %eax, %eax
- movl %eax, %gs
+ xorl %edi, %edi
+ movl %edi, %gs
jmp 2b
_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
@@ -124,11 +124,7 @@ extern asmlinkage void asm_load_gs_index(u16 selector);
static inline void native_load_gs_index(unsigned int selector)
{
- unsigned long flags;
-
- local_irq_save(flags);
asm_load_gs_index(selector);
- local_irq_restore(flags);
}
static inline unsigned long __read_cr4(void)