[1/4] x86/percpu: Use explicit segment registers in lib/cmpxchg{8,16}b_emu.S

Message ID 20231012161237.114733-2-ubizjak@gmail.com
State New
Headers
Series Introduce %rip-relative addressing to PER_CPU_VAR macro |

Commit Message

Uros Bizjak Oct. 12, 2023, 4:10 p.m. UTC
  PER_CPU_VAR macro is intended to be applied to a symbol, it is not
intended to be used as a selector between %fs and %gs segment
registers for general operands.

The address to these emulation functions is passed in a register, so
use explicit segment registers to access percpu variable instead.

Also add a missing function comment to this_cpu_cmpxchg8b_emu.

No functional changes intended.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
 arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------
 arch/x86/lib/cmpxchg8b_emu.S  | 30 +++++++++++++++++++++---------
 2 files changed, 27 insertions(+), 15 deletions(-)
  

Comments

Brian Gerst Oct. 12, 2023, 5:44 p.m. UTC | #1
On Thu, Oct 12, 2023 at 12:13 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> PER_CPU_VAR macro is intended to be applied to a symbol, it is not
> intended to be used as a selector between %fs and %gs segment
> registers for general operands.
>
> The address to these emulation functions is passed in a register, so
> use explicit segment registers to access percpu variable instead.
>
> Also add a missing function comment to this_cpu_cmpxchg8b_emu.
>
> No functional changes intended.
>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> ---
>  arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------
>  arch/x86/lib/cmpxchg8b_emu.S  | 30 +++++++++++++++++++++---------
>  2 files changed, 27 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
> index 6962df315793..2bd8b89bce75 100644
> --- a/arch/x86/lib/cmpxchg16b_emu.S
> +++ b/arch/x86/lib/cmpxchg16b_emu.S
> @@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
>         cli
>
>         /* if (*ptr == old) */
> -       cmpq    PER_CPU_VAR(0(%rsi)), %rax
> +       cmpq    %gs:(%rsi), %rax
>         jne     .Lnot_same
> -       cmpq    PER_CPU_VAR(8(%rsi)), %rdx
> +       cmpq    %gs:8(%rsi), %rdx
>         jne     .Lnot_same
>
>         /* *ptr = new */
> -       movq    %rbx, PER_CPU_VAR(0(%rsi))
> -       movq    %rcx, PER_CPU_VAR(8(%rsi))
> +       movq    %rbx, %gs:(%rsi)
> +       movq    %rcx, %gs:8(%rsi)
>
>         /* set ZF in EFLAGS to indicate success */
>         orl     $X86_EFLAGS_ZF, (%rsp)
> @@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
>         /* *ptr != old */
>
>         /* old = *ptr */
> -       movq    PER_CPU_VAR(0(%rsi)), %rax
> -       movq    PER_CPU_VAR(8(%rsi)), %rdx
> +       movq    %gs:(%rsi), %rax
> +       movq    %gs:8(%rsi), %rdx
>
>         /* clear ZF in EFLAGS to indicate failure */
>         andl    $(~X86_EFLAGS_ZF), (%rsp)
> diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
> index 49805257b125..b7d68d5e2d31 100644
> --- a/arch/x86/lib/cmpxchg8b_emu.S
> +++ b/arch/x86/lib/cmpxchg8b_emu.S
> @@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu)
>         pushfl
>         cli
>
> -       cmpl    0(%esi), %eax
> +       cmpl    (%esi), %eax
>         jne     .Lnot_same
>         cmpl    4(%esi), %edx
>         jne     .Lnot_same
>
> -       movl    %ebx, 0(%esi)
> +       movl    %ebx, (%esi)
>         movl    %ecx, 4(%esi)
>
>         orl     $X86_EFLAGS_ZF, (%esp)
> @@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
>         RET
>
>  .Lnot_same:
> -       movl    0(%esi), %eax
> +       movl    (%esi), %eax
>         movl    4(%esi), %edx
>
>         andl    $(~X86_EFLAGS_ZF), (%esp)
> @@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu)
>
>  #ifndef CONFIG_UML
>
> +/*
> + * Emulate 'cmpxchg8b %fs:(%esi)'
> + *
> + * Inputs:
> + * %esi : memory location to compare
> + * %eax : low 32 bits of old value
> + * %edx : high 32 bits of old value
> + * %ebx : low 32 bits of new value
> + * %ecx : high 32 bits of new value
> + *
> + * Notably this is not LOCK prefixed and is not safe against NMIs
> + */
>  SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
>
>         pushfl
>         cli
>
> -       cmpl    PER_CPU_VAR(0(%esi)), %eax
> +       cmpl    %fs:(%esi), %eax
>         jne     .Lnot_same2
> -       cmpl    PER_CPU_VAR(4(%esi)), %edx
> +       cmpl    %fs:4(%esi), %edx
>         jne     .Lnot_same2
>
> -       movl    %ebx, PER_CPU_VAR(0(%esi))
> -       movl    %ecx, PER_CPU_VAR(4(%esi))
> +       movl    %ebx, %fs:(%esi)
> +       movl    %ecx, %fs:4(%esi)
>
>         orl     $X86_EFLAGS_ZF, (%esp)
>
> @@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
>         RET
>
>  .Lnot_same2:
> -       movl    PER_CPU_VAR(0(%esi)), %eax
> -       movl    PER_CPU_VAR(4(%esi)), %edx
> +       movl    %fs:(%esi), %eax
> +       movl    %fs:4(%esi), %edx
>
>         andl    $(~X86_EFLAGS_ZF), (%esp)
>
> --
> 2.41.0
>

This will break on !SMP builds, where per-cpu variables are just
regular data and not accessed with a segment prefix.

Brian Gerst
  
Uros Bizjak Oct. 12, 2023, 5:54 p.m. UTC | #2
On Thu, Oct 12, 2023 at 7:45 PM Brian Gerst <brgerst@gmail.com> wrote:
>
> On Thu, Oct 12, 2023 at 12:13 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > PER_CPU_VAR macro is intended to be applied to a symbol, it is not
> > intended to be used as a selector between %fs and %gs segment
> > registers for general operands.
> >
> > The address to these emulation functions is passed in a register, so
> > use explicit segment registers to access percpu variable instead.
> >
> > Also add a missing function comment to this_cpu_cmpxchg8b_emu.
> >
> > No functional changes intended.
> >
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: Borislav Petkov <bp@alien8.de>
> > Cc: Dave Hansen <dave.hansen@linux.intel.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > ---
> >  arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------
> >  arch/x86/lib/cmpxchg8b_emu.S  | 30 +++++++++++++++++++++---------
> >  2 files changed, 27 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
> > index 6962df315793..2bd8b89bce75 100644
> > --- a/arch/x86/lib/cmpxchg16b_emu.S
> > +++ b/arch/x86/lib/cmpxchg16b_emu.S
> > @@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> >         cli
> >
> >         /* if (*ptr == old) */
> > -       cmpq    PER_CPU_VAR(0(%rsi)), %rax
> > +       cmpq    %gs:(%rsi), %rax
> >         jne     .Lnot_same
> > -       cmpq    PER_CPU_VAR(8(%rsi)), %rdx
> > +       cmpq    %gs:8(%rsi), %rdx
> >         jne     .Lnot_same
> >
> >         /* *ptr = new */
> > -       movq    %rbx, PER_CPU_VAR(0(%rsi))
> > -       movq    %rcx, PER_CPU_VAR(8(%rsi))
> > +       movq    %rbx, %gs:(%rsi)
> > +       movq    %rcx, %gs:8(%rsi)
> >
> >         /* set ZF in EFLAGS to indicate success */
> >         orl     $X86_EFLAGS_ZF, (%rsp)
> > @@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> >         /* *ptr != old */
> >
> >         /* old = *ptr */
> > -       movq    PER_CPU_VAR(0(%rsi)), %rax
> > -       movq    PER_CPU_VAR(8(%rsi)), %rdx
> > +       movq    %gs:(%rsi), %rax
> > +       movq    %gs:8(%rsi), %rdx
> >
> >         /* clear ZF in EFLAGS to indicate failure */
> >         andl    $(~X86_EFLAGS_ZF), (%rsp)
> > diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
> > index 49805257b125..b7d68d5e2d31 100644
> > --- a/arch/x86/lib/cmpxchg8b_emu.S
> > +++ b/arch/x86/lib/cmpxchg8b_emu.S
> > @@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu)
> >         pushfl
> >         cli
> >
> > -       cmpl    0(%esi), %eax
> > +       cmpl    (%esi), %eax
> >         jne     .Lnot_same
> >         cmpl    4(%esi), %edx
> >         jne     .Lnot_same
> >
> > -       movl    %ebx, 0(%esi)
> > +       movl    %ebx, (%esi)
> >         movl    %ecx, 4(%esi)
> >
> >         orl     $X86_EFLAGS_ZF, (%esp)
> > @@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
> >         RET
> >
> >  .Lnot_same:
> > -       movl    0(%esi), %eax
> > +       movl    (%esi), %eax
> >         movl    4(%esi), %edx
> >
> >         andl    $(~X86_EFLAGS_ZF), (%esp)
> > @@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu)
> >
> >  #ifndef CONFIG_UML
> >
> > +/*
> > + * Emulate 'cmpxchg8b %fs:(%esi)'
> > + *
> > + * Inputs:
> > + * %esi : memory location to compare
> > + * %eax : low 32 bits of old value
> > + * %edx : high 32 bits of old value
> > + * %ebx : low 32 bits of new value
> > + * %ecx : high 32 bits of new value
> > + *
> > + * Notably this is not LOCK prefixed and is not safe against NMIs
> > + */
> >  SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> >
> >         pushfl
> >         cli
> >
> > -       cmpl    PER_CPU_VAR(0(%esi)), %eax
> > +       cmpl    %fs:(%esi), %eax
> >         jne     .Lnot_same2
> > -       cmpl    PER_CPU_VAR(4(%esi)), %edx
> > +       cmpl    %fs:4(%esi), %edx
> >         jne     .Lnot_same2
> >
> > -       movl    %ebx, PER_CPU_VAR(0(%esi))
> > -       movl    %ecx, PER_CPU_VAR(4(%esi))
> > +       movl    %ebx, %fs:(%esi)
> > +       movl    %ecx, %fs:4(%esi)
> >
> >         orl     $X86_EFLAGS_ZF, (%esp)
> >
> > @@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> >         RET
> >
> >  .Lnot_same2:
> > -       movl    PER_CPU_VAR(0(%esi)), %eax
> > -       movl    PER_CPU_VAR(4(%esi)), %edx
> > +       movl    %fs:(%esi), %eax
> > +       movl    %fs:4(%esi), %edx
> >
> >         andl    $(~X86_EFLAGS_ZF), (%esp)
> >
> > --
> > 2.41.0
> >
>
> This will break on !SMP builds, where per-cpu variables are just
> regular data and not accessed with a segment prefix.

Ugh, indeed. Let me rethink this a bit.

Thanks,
Uros.
  
Uros Bizjak Oct. 12, 2023, 6:39 p.m. UTC | #3
On Thu, Oct 12, 2023 at 7:54 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > This will break on !SMP builds, where per-cpu variables are just
> > regular data and not accessed with a segment prefix.
>
> Ugh, indeed. Let me rethink this a bit.

Something like this:

#ifdef CONFIG_SMP
#define PER_CPU_ARG(arg)    %__percpu_seg:arg
#define PER_CPU_VAR(var)    %__percpu_seg:(var)##__percpu_rel
#else /* ! SMP */
#define PER_CPU_ARG(arg)    arg
#define PER_CPU_VAR(var)    (var)##__percpu_rel
#endif    /* SMP */

and using the above PER_CPU_ARG in /lib/cmpxchg{8,16}b_emu.S will
solve the issue.

I will prepare a v2.

Thanks,
Uros.
  
H. Peter Anvin Oct. 12, 2023, 9:02 p.m. UTC | #4
On October 12, 2023 9:10:36 AM PDT, Uros Bizjak <ubizjak@gmail.com> wrote:
>PER_CPU_VAR macro is intended to be applied to a symbol, it is not
>intended to be used as a selector between %fs and %gs segment
>registers for general operands.
>
>The address to these emulation functions is passed in a register, so
>use explicit segment registers to access percpu variable instead.
>
>Also add a missing function comment to this_cpu_cmpxchg8b_emu.
>
>No functional changes intended.
>
>Cc: Thomas Gleixner <tglx@linutronix.de>
>Cc: Ingo Molnar <mingo@redhat.com>
>Cc: Borislav Petkov <bp@alien8.de>
>Cc: Dave Hansen <dave.hansen@linux.intel.com>
>Cc: "H. Peter Anvin" <hpa@zytor.com>
>Cc: Peter Zijlstra <peterz@infradead.org>
>Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
>---
> arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------
> arch/x86/lib/cmpxchg8b_emu.S  | 30 +++++++++++++++++++++---------
> 2 files changed, 27 insertions(+), 15 deletions(-)
>
>diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
>index 6962df315793..2bd8b89bce75 100644
>--- a/arch/x86/lib/cmpxchg16b_emu.S
>+++ b/arch/x86/lib/cmpxchg16b_emu.S
>@@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> 	cli
> 
> 	/* if (*ptr == old) */
>-	cmpq	PER_CPU_VAR(0(%rsi)), %rax
>+	cmpq	%gs:(%rsi), %rax
> 	jne	.Lnot_same
>-	cmpq	PER_CPU_VAR(8(%rsi)), %rdx
>+	cmpq	%gs:8(%rsi), %rdx
> 	jne	.Lnot_same
> 
> 	/* *ptr = new */
>-	movq	%rbx, PER_CPU_VAR(0(%rsi))
>-	movq	%rcx, PER_CPU_VAR(8(%rsi))
>+	movq	%rbx, %gs:(%rsi)
>+	movq	%rcx, %gs:8(%rsi)
> 
> 	/* set ZF in EFLAGS to indicate success */
> 	orl	$X86_EFLAGS_ZF, (%rsp)
>@@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> 	/* *ptr != old */
> 
> 	/* old = *ptr */
>-	movq	PER_CPU_VAR(0(%rsi)), %rax
>-	movq	PER_CPU_VAR(8(%rsi)), %rdx
>+	movq	%gs:(%rsi), %rax
>+	movq	%gs:8(%rsi), %rdx
> 
> 	/* clear ZF in EFLAGS to indicate failure */
> 	andl	$(~X86_EFLAGS_ZF), (%rsp)
>diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
>index 49805257b125..b7d68d5e2d31 100644
>--- a/arch/x86/lib/cmpxchg8b_emu.S
>+++ b/arch/x86/lib/cmpxchg8b_emu.S
>@@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu)
> 	pushfl
> 	cli
> 
>-	cmpl	0(%esi), %eax
>+	cmpl	(%esi), %eax
> 	jne	.Lnot_same
> 	cmpl	4(%esi), %edx
> 	jne	.Lnot_same
> 
>-	movl	%ebx, 0(%esi)
>+	movl	%ebx, (%esi)
> 	movl	%ecx, 4(%esi)
> 
> 	orl	$X86_EFLAGS_ZF, (%esp)
>@@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
> 	RET
> 
> .Lnot_same:
>-	movl	0(%esi), %eax
>+	movl	(%esi), %eax
> 	movl	4(%esi), %edx
> 
> 	andl	$(~X86_EFLAGS_ZF), (%esp)
>@@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu)
> 
> #ifndef CONFIG_UML
> 
>+/*
>+ * Emulate 'cmpxchg8b %fs:(%esi)'
>+ *
>+ * Inputs:
>+ * %esi : memory location to compare
>+ * %eax : low 32 bits of old value
>+ * %edx : high 32 bits of old value
>+ * %ebx : low 32 bits of new value
>+ * %ecx : high 32 bits of new value
>+ *
>+ * Notably this is not LOCK prefixed and is not safe against NMIs
>+ */
> SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> 
> 	pushfl
> 	cli
> 
>-	cmpl	PER_CPU_VAR(0(%esi)), %eax
>+	cmpl	%fs:(%esi), %eax
> 	jne	.Lnot_same2
>-	cmpl	PER_CPU_VAR(4(%esi)), %edx
>+	cmpl	%fs:4(%esi), %edx
> 	jne	.Lnot_same2
> 
>-	movl	%ebx, PER_CPU_VAR(0(%esi))
>-	movl	%ecx, PER_CPU_VAR(4(%esi))
>+	movl	%ebx, %fs:(%esi)
>+	movl	%ecx, %fs:4(%esi)
> 
> 	orl	$X86_EFLAGS_ZF, (%esp)
> 
>@@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> 	RET
> 
> .Lnot_same2:
>-	movl	PER_CPU_VAR(0(%esi)), %eax
>-	movl	PER_CPU_VAR(4(%esi)), %edx
>+	movl	%fs:(%esi), %eax
>+	movl	%fs:4(%esi), %edx
> 
> 	andl	$(~X86_EFLAGS_ZF), (%esp)
> 

%fs??
  
Uros Bizjak Oct. 12, 2023, 9:05 p.m. UTC | #5
On Thu, Oct 12, 2023 at 11:02 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> On October 12, 2023 9:10:36 AM PDT, Uros Bizjak <ubizjak@gmail.com> wrote:
> >PER_CPU_VAR macro is intended to be applied to a symbol, it is not
> >intended to be used as a selector between %fs and %gs segment
> >registers for general operands.
> >
> >The address to these emulation functions is passed in a register, so
> >use explicit segment registers to access percpu variable instead.
> >
> >Also add a missing function comment to this_cpu_cmpxchg8b_emu.
> >
> >No functional changes intended.
> >
> >Cc: Thomas Gleixner <tglx@linutronix.de>
> >Cc: Ingo Molnar <mingo@redhat.com>
> >Cc: Borislav Petkov <bp@alien8.de>
> >Cc: Dave Hansen <dave.hansen@linux.intel.com>
> >Cc: "H. Peter Anvin" <hpa@zytor.com>
> >Cc: Peter Zijlstra <peterz@infradead.org>
> >Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> >---
> > arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------
> > arch/x86/lib/cmpxchg8b_emu.S  | 30 +++++++++++++++++++++---------
> > 2 files changed, 27 insertions(+), 15 deletions(-)
> >
> >diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
> >index 6962df315793..2bd8b89bce75 100644
> >--- a/arch/x86/lib/cmpxchg16b_emu.S
> >+++ b/arch/x86/lib/cmpxchg16b_emu.S
> >@@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> >       cli
> >
> >       /* if (*ptr == old) */
> >-      cmpq    PER_CPU_VAR(0(%rsi)), %rax
> >+      cmpq    %gs:(%rsi), %rax
> >       jne     .Lnot_same
> >-      cmpq    PER_CPU_VAR(8(%rsi)), %rdx
> >+      cmpq    %gs:8(%rsi), %rdx
> >       jne     .Lnot_same
> >
> >       /* *ptr = new */
> >-      movq    %rbx, PER_CPU_VAR(0(%rsi))
> >-      movq    %rcx, PER_CPU_VAR(8(%rsi))
> >+      movq    %rbx, %gs:(%rsi)
> >+      movq    %rcx, %gs:8(%rsi)
> >
> >       /* set ZF in EFLAGS to indicate success */
> >       orl     $X86_EFLAGS_ZF, (%rsp)
> >@@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
> >       /* *ptr != old */
> >
> >       /* old = *ptr */
> >-      movq    PER_CPU_VAR(0(%rsi)), %rax
> >-      movq    PER_CPU_VAR(8(%rsi)), %rdx
> >+      movq    %gs:(%rsi), %rax
> >+      movq    %gs:8(%rsi), %rdx
> >
> >       /* clear ZF in EFLAGS to indicate failure */
> >       andl    $(~X86_EFLAGS_ZF), (%rsp)
> >diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
> >index 49805257b125..b7d68d5e2d31 100644
> >--- a/arch/x86/lib/cmpxchg8b_emu.S
> >+++ b/arch/x86/lib/cmpxchg8b_emu.S
> >@@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu)
> >       pushfl
> >       cli
> >
> >-      cmpl    0(%esi), %eax
> >+      cmpl    (%esi), %eax
> >       jne     .Lnot_same
> >       cmpl    4(%esi), %edx
> >       jne     .Lnot_same
> >
> >-      movl    %ebx, 0(%esi)
> >+      movl    %ebx, (%esi)
> >       movl    %ecx, 4(%esi)
> >
> >       orl     $X86_EFLAGS_ZF, (%esp)
> >@@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
> >       RET
> >
> > .Lnot_same:
> >-      movl    0(%esi), %eax
> >+      movl    (%esi), %eax
> >       movl    4(%esi), %edx
> >
> >       andl    $(~X86_EFLAGS_ZF), (%esp)
> >@@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu)
> >
> > #ifndef CONFIG_UML
> >
> >+/*
> >+ * Emulate 'cmpxchg8b %fs:(%esi)'
> >+ *
> >+ * Inputs:
> >+ * %esi : memory location to compare
> >+ * %eax : low 32 bits of old value
> >+ * %edx : high 32 bits of old value
> >+ * %ebx : low 32 bits of new value
> >+ * %ecx : high 32 bits of new value
> >+ *
> >+ * Notably this is not LOCK prefixed and is not safe against NMIs
> >+ */
> > SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> >
> >       pushfl
> >       cli
> >
> >-      cmpl    PER_CPU_VAR(0(%esi)), %eax
> >+      cmpl    %fs:(%esi), %eax
> >       jne     .Lnot_same2
> >-      cmpl    PER_CPU_VAR(4(%esi)), %edx
> >+      cmpl    %fs:4(%esi), %edx
> >       jne     .Lnot_same2
> >
> >-      movl    %ebx, PER_CPU_VAR(0(%esi))
> >-      movl    %ecx, PER_CPU_VAR(4(%esi))
> >+      movl    %ebx, %fs:(%esi)
> >+      movl    %ecx, %fs:4(%esi)
> >
> >       orl     $X86_EFLAGS_ZF, (%esp)
> >
> >@@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
> >       RET
> >
> > .Lnot_same2:
> >-      movl    PER_CPU_VAR(0(%esi)), %eax
> >-      movl    PER_CPU_VAR(4(%esi)), %edx
> >+      movl    %fs:(%esi), %eax
> >+      movl    %fs:4(%esi), %edx
> >
> >       andl    $(~X86_EFLAGS_ZF), (%esp)
> >
>
> %fs??

Yes, 32-bit targets default to %fs. Please also note a new version
(v2) of the patch that reimplements this part.

Uros.
  
H. Peter Anvin Oct. 12, 2023, 9:05 p.m. UTC | #6
On 10/12/23 14:02, H. Peter Anvin wrote:>
> %fs??
 >

Nevermind, I forgot that we changed from %gs to %fs on i386 at some 
point in the now-distant past.

	-hpa
  
kernel test robot Oct. 26, 2023, 7:01 a.m. UTC | #7
Hello,

kernel test robot noticed "general_protection_fault:#[##]" on:

commit: 33c7952d925e905f7af1fb7628e48e03f59885da ("[PATCH 1/4] x86/percpu: Use explicit segment registers in lib/cmpxchg{8,16}b_emu.S")
url: https://github.com/intel-lab-lkp/linux/commits/Uros-Bizjak/x86-percpu-Use-explicit-segment-registers-in-lib-cmpxchg-8-16-b_emu-S/20231017-111304
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 92fe9bb77b0c9fade150350fdb0629a662f0923f
patch link: https://lore.kernel.org/all/20231012161237.114733-2-ubizjak@gmail.com/
patch subject: [PATCH 1/4] x86/percpu: Use explicit segment registers in lib/cmpxchg{8,16}b_emu.S

in testcase: boot

compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+------------------------------------------+------------+------------+
|                                          | 92fe9bb77b | 33c7952d92 |
+------------------------------------------+------------+------------+
| boot_successes                           | 7          | 0          |
| boot_failures                            | 0          | 7          |
| general_protection_fault:#[##]           | 0          | 7          |
| EIP:this_cpu_cmpxchg8b_emu               | 0          | 7          |
| Kernel_panic-not_syncing:Fatal_exception | 0          | 7          |
+------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202310261417.b269d37e-oliver.sang@intel.com


[    0.186570][    T0] stackdepot hash table entries: 65536 (order: 6, 262144 bytes, linear)
[    0.187499][    T0] Initializing HighMem for node 0 (0002ebfe:000bffe0)
[    1.727965][    T0] Initializing Movable for node 0 (00000000:00000000)
[    1.943274][    T0] Checking if this processor honours the WP bit even in supervisor mode...Ok.
[    1.944313][    T0] Memory: 2896220K/3145208K available (16182K kernel code, 5537K rwdata, 11756K rodata, 816K init, 9720K bss, 248988K reserved, 0K cma-reserved, 2379656K highmem)
[    1.947172][    T0] general protection fault: 0000 [#1] PREEMPT
[    1.947900][    T0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc1-00024-g33c7952d925e #1 8d4b014f9a0a85cc9a3f6a52ed8e88f1e431f74e
[    1.949317][    T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 1.950480][ T0] EIP: this_cpu_cmpxchg8b_emu (kbuild/src/consumer/arch/x86/lib/cmpxchg8b_emu.S:73) 
[ 1.951093][ T0] Code: ff ff ff 8d b4 26 00 00 00 00 66 90 83 c6 01 3c 3d 0f 95 c0 0f b6 c0 83 c0 01 e9 56 ff ff ff bf ff ff ff ff eb a6 cc cc 9c fa <64> 3b 06 75 13 64 3b 56 04 75 0d 64 89 1e 64 89 4e 04 83 0c 24 40
All code
========
   0:	ff                   	(bad)
   1:	ff                   	(bad)
   2:	ff 8d b4 26 00 00    	decl   0x26b4(%rbp)
   8:	00 00                	add    %al,(%rax)
   a:	66 90                	xchg   %ax,%ax
   c:	83 c6 01             	add    $0x1,%esi
   f:	3c 3d                	cmp    $0x3d,%al
  11:	0f 95 c0             	setne  %al
  14:	0f b6 c0             	movzbl %al,%eax
  17:	83 c0 01             	add    $0x1,%eax
  1a:	e9 56 ff ff ff       	jmp    0xffffffffffffff75
  1f:	bf ff ff ff ff       	mov    $0xffffffff,%edi
  24:	eb a6                	jmp    0xffffffffffffffcc
  26:	cc                   	int3
  27:	cc                   	int3
  28:	9c                   	pushf
  29:	fa                   	cli
  2a:*	64 3b 06             	cmp    %fs:(%rsi),%eax		<-- trapping instruction
  2d:	75 13                	jne    0x42
  2f:	64 3b 56 04          	cmp    %fs:0x4(%rsi),%edx
  33:	75 0d                	jne    0x42
  35:	64 89 1e             	mov    %ebx,%fs:(%rsi)
  38:	64 89 4e 04          	mov    %ecx,%fs:0x4(%rsi)
  3c:	83 0c 24 40          	orl    $0x40,(%rsp)

Code starting with the faulting instruction
===========================================
   0:	64 3b 06             	cmp    %fs:(%rsi),%eax
   3:	75 13                	jne    0x18
   5:	64 3b 56 04          	cmp    %fs:0x4(%rsi),%edx
   9:	75 0d                	jne    0x18
   b:	64 89 1e             	mov    %ebx,%fs:(%rsi)
   e:	64 89 4e 04          	mov    %ecx,%fs:0x4(%rsi)
  12:	83 0c 24 40          	orl    $0x40,(%rsp)
[    1.953397][    T0] EAX: c3c01100 EBX: c3c01180 ECX: 00000004 EDX: 00000003
[    1.954231][    T0] ESI: e52cd090 EDI: e52cd090 EBP: c2b4bf00 ESP: c2b4bec4
[    1.955060][    T0] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00210082
[    1.955949][    T0] CR0: 80050033 CR2: ffdeb000 CR3: 031b5000 CR4: 00000090
[    1.956783][    T0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[    1.957641][    T0] DR6: fffe0ff0 DR7: 00000400
[    1.958190][    T0] Call Trace:
[ 1.958554][ T0] ? show_regs (kbuild/src/consumer/arch/x86/kernel/dumpstack.c:479) 
[ 1.959026][ T0] ? die_addr (kbuild/src/consumer/arch/x86/kernel/dumpstack.c:421 kbuild/src/consumer/arch/x86/kernel/dumpstack.c:460) 
[ 1.959480][ T0] ? exc_general_protection (kbuild/src/consumer/arch/x86/kernel/traps.c:697 kbuild/src/consumer/arch/x86/kernel/traps.c:642) 
[ 1.960101][ T0] ? exc_bounds (kbuild/src/consumer/arch/x86/kernel/traps.c:642) 
[ 1.960579][ T0] ? handle_exception (kbuild/src/consumer/arch/x86/entry/entry_32.S:1049) 


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231026/202310261417.b269d37e-oliver.sang@intel.com
  
Uros Bizjak Oct. 26, 2023, 7:15 a.m. UTC | #8
On Thu, Oct 26, 2023 at 9:01 AM kernel test robot <oliver.sang@intel.com> wrote:
>
>
>
> Hello,
>
> kernel test robot noticed "general_protection_fault:#[##]" on:
>
> commit: 33c7952d925e905f7af1fb7628e48e03f59885da ("[PATCH 1/4] x86/percpu: Use explicit segment registers in lib/cmpxchg{8,16}b_emu.S")
> url: https://github.com/intel-lab-lkp/linux/commits/Uros-Bizjak/x86-percpu-Use-explicit-segment-registers-in-lib-cmpxchg-8-16-b_emu-S/20231017-111304
> base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 92fe9bb77b0c9fade150350fdb0629a662f0923f
> patch link: https://lore.kernel.org/all/20231012161237.114733-2-ubizjak@gmail.com/
> patch subject: [PATCH 1/4] x86/percpu: Use explicit segment registers in lib/cmpxchg{8,16}b_emu.S
>
> in testcase: boot
>
> compiler: gcc-12
> test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

This problem was fixed in a -v3 version of the patch [1], that was
committed to the tip/percpu branch and later merged into tip/master.

[1] https://lore.kernel.org/lkml/20231017162811.200569-3-ubizjak@gmail.com/

Thanks,
Uros.

>
> (please refer to attached dmesg/kmsg for entire log/backtrace)
>
>
> +------------------------------------------+------------+------------+
> |                                          | 92fe9bb77b | 33c7952d92 |
> +------------------------------------------+------------+------------+
> | boot_successes                           | 7          | 0          |
> | boot_failures                            | 0          | 7          |
> | general_protection_fault:#[##]           | 0          | 7          |
> | EIP:this_cpu_cmpxchg8b_emu               | 0          | 7          |
> | Kernel_panic-not_syncing:Fatal_exception | 0          | 7          |
> +------------------------------------------+------------+------------+
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <oliver.sang@intel.com>
> | Closes: https://lore.kernel.org/oe-lkp/202310261417.b269d37e-oliver.sang@intel.com
>
>
> [    0.186570][    T0] stackdepot hash table entries: 65536 (order: 6, 262144 bytes, linear)
> [    0.187499][    T0] Initializing HighMem for node 0 (0002ebfe:000bffe0)
> [    1.727965][    T0] Initializing Movable for node 0 (00000000:00000000)
> [    1.943274][    T0] Checking if this processor honours the WP bit even in supervisor mode...Ok.
> [    1.944313][    T0] Memory: 2896220K/3145208K available (16182K kernel code, 5537K rwdata, 11756K rodata, 816K init, 9720K bss, 248988K reserved, 0K cma-reserved, 2379656K highmem)
> [    1.947172][    T0] general protection fault: 0000 [#1] PREEMPT
> [    1.947900][    T0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc1-00024-g33c7952d925e #1 8d4b014f9a0a85cc9a3f6a52ed8e88f1e431f74e
> [    1.949317][    T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [ 1.950480][ T0] EIP: this_cpu_cmpxchg8b_emu (kbuild/src/consumer/arch/x86/lib/cmpxchg8b_emu.S:73)
> [ 1.951093][ T0] Code: ff ff ff 8d b4 26 00 00 00 00 66 90 83 c6 01 3c 3d 0f 95 c0 0f b6 c0 83 c0 01 e9 56 ff ff ff bf ff ff ff ff eb a6 cc cc 9c fa <64> 3b 06 75 13 64 3b 56 04 75 0d 64 89 1e 64 89 4e 04 83 0c 24 40
> All code
> ========
>    0:   ff                      (bad)
>    1:   ff                      (bad)
>    2:   ff 8d b4 26 00 00       decl   0x26b4(%rbp)
>    8:   00 00                   add    %al,(%rax)
>    a:   66 90                   xchg   %ax,%ax
>    c:   83 c6 01                add    $0x1,%esi
>    f:   3c 3d                   cmp    $0x3d,%al
>   11:   0f 95 c0                setne  %al
>   14:   0f b6 c0                movzbl %al,%eax
>   17:   83 c0 01                add    $0x1,%eax
>   1a:   e9 56 ff ff ff          jmp    0xffffffffffffff75
>   1f:   bf ff ff ff ff          mov    $0xffffffff,%edi
>   24:   eb a6                   jmp    0xffffffffffffffcc
>   26:   cc                      int3
>   27:   cc                      int3
>   28:   9c                      pushf
>   29:   fa                      cli
>   2a:*  64 3b 06                cmp    %fs:(%rsi),%eax          <-- trapping instruction
>   2d:   75 13                   jne    0x42
>   2f:   64 3b 56 04             cmp    %fs:0x4(%rsi),%edx
>   33:   75 0d                   jne    0x42
>   35:   64 89 1e                mov    %ebx,%fs:(%rsi)
>   38:   64 89 4e 04             mov    %ecx,%fs:0x4(%rsi)
>   3c:   83 0c 24 40             orl    $0x40,(%rsp)
>
> Code starting with the faulting instruction
> ===========================================
>    0:   64 3b 06                cmp    %fs:(%rsi),%eax
>    3:   75 13                   jne    0x18
>    5:   64 3b 56 04             cmp    %fs:0x4(%rsi),%edx
>    9:   75 0d                   jne    0x18
>    b:   64 89 1e                mov    %ebx,%fs:(%rsi)
>    e:   64 89 4e 04             mov    %ecx,%fs:0x4(%rsi)
>   12:   83 0c 24 40             orl    $0x40,(%rsp)
> [    1.953397][    T0] EAX: c3c01100 EBX: c3c01180 ECX: 00000004 EDX: 00000003
> [    1.954231][    T0] ESI: e52cd090 EDI: e52cd090 EBP: c2b4bf00 ESP: c2b4bec4
> [    1.955060][    T0] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00210082
> [    1.955949][    T0] CR0: 80050033 CR2: ffdeb000 CR3: 031b5000 CR4: 00000090
> [    1.956783][    T0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [    1.957641][    T0] DR6: fffe0ff0 DR7: 00000400
> [    1.958190][    T0] Call Trace:
> [ 1.958554][ T0] ? show_regs (kbuild/src/consumer/arch/x86/kernel/dumpstack.c:479)
> [ 1.959026][ T0] ? die_addr (kbuild/src/consumer/arch/x86/kernel/dumpstack.c:421 kbuild/src/consumer/arch/x86/kernel/dumpstack.c:460)
> [ 1.959480][ T0] ? exc_general_protection (kbuild/src/consumer/arch/x86/kernel/traps.c:697 kbuild/src/consumer/arch/x86/kernel/traps.c:642)
> [ 1.960101][ T0] ? exc_bounds (kbuild/src/consumer/arch/x86/kernel/traps.c:642)
> [ 1.960579][ T0] ? handle_exception (kbuild/src/consumer/arch/x86/entry/entry_32.S:1049)
>
>
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20231026/202310261417.b269d37e-oliver.sang@intel.com
>
>
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
>
  

Patch

diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 6962df315793..2bd8b89bce75 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -23,14 +23,14 @@  SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 	cli
 
 	/* if (*ptr == old) */
-	cmpq	PER_CPU_VAR(0(%rsi)), %rax
+	cmpq	%gs:(%rsi), %rax
 	jne	.Lnot_same
-	cmpq	PER_CPU_VAR(8(%rsi)), %rdx
+	cmpq	%gs:8(%rsi), %rdx
 	jne	.Lnot_same
 
 	/* *ptr = new */
-	movq	%rbx, PER_CPU_VAR(0(%rsi))
-	movq	%rcx, PER_CPU_VAR(8(%rsi))
+	movq	%rbx, %gs:(%rsi)
+	movq	%rcx, %gs:8(%rsi)
 
 	/* set ZF in EFLAGS to indicate success */
 	orl	$X86_EFLAGS_ZF, (%rsp)
@@ -42,8 +42,8 @@  SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 	/* *ptr != old */
 
 	/* old = *ptr */
-	movq	PER_CPU_VAR(0(%rsi)), %rax
-	movq	PER_CPU_VAR(8(%rsi)), %rdx
+	movq	%gs:(%rsi), %rax
+	movq	%gs:8(%rsi), %rdx
 
 	/* clear ZF in EFLAGS to indicate failure */
 	andl	$(~X86_EFLAGS_ZF), (%rsp)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 49805257b125..b7d68d5e2d31 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -24,12 +24,12 @@  SYM_FUNC_START(cmpxchg8b_emu)
 	pushfl
 	cli
 
-	cmpl	0(%esi), %eax
+	cmpl	(%esi), %eax
 	jne	.Lnot_same
 	cmpl	4(%esi), %edx
 	jne	.Lnot_same
 
-	movl	%ebx, 0(%esi)
+	movl	%ebx, (%esi)
 	movl	%ecx, 4(%esi)
 
 	orl	$X86_EFLAGS_ZF, (%esp)
@@ -38,7 +38,7 @@  SYM_FUNC_START(cmpxchg8b_emu)
 	RET
 
 .Lnot_same:
-	movl	0(%esi), %eax
+	movl	(%esi), %eax
 	movl	4(%esi), %edx
 
 	andl	$(~X86_EFLAGS_ZF), (%esp)
@@ -53,18 +53,30 @@  EXPORT_SYMBOL(cmpxchg8b_emu)
 
 #ifndef CONFIG_UML
 
+/*
+ * Emulate 'cmpxchg8b %fs:(%esi)'
+ *
+ * Inputs:
+ * %esi : memory location to compare
+ * %eax : low 32 bits of old value
+ * %edx : high 32 bits of old value
+ * %ebx : low 32 bits of new value
+ * %ecx : high 32 bits of new value
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
+ */
 SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
 
 	pushfl
 	cli
 
-	cmpl	PER_CPU_VAR(0(%esi)), %eax
+	cmpl	%fs:(%esi), %eax
 	jne	.Lnot_same2
-	cmpl	PER_CPU_VAR(4(%esi)), %edx
+	cmpl	%fs:4(%esi), %edx
 	jne	.Lnot_same2
 
-	movl	%ebx, PER_CPU_VAR(0(%esi))
-	movl	%ecx, PER_CPU_VAR(4(%esi))
+	movl	%ebx, %fs:(%esi)
+	movl	%ecx, %fs:4(%esi)
 
 	orl	$X86_EFLAGS_ZF, (%esp)
 
@@ -72,8 +84,8 @@  SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
 	RET
 
 .Lnot_same2:
-	movl	PER_CPU_VAR(0(%esi)), %eax
-	movl	PER_CPU_VAR(4(%esi)), %edx
+	movl	%fs:(%esi), %eax
+	movl	%fs:4(%esi), %edx
 
 	andl	$(~X86_EFLAGS_ZF), (%esp)