[11/13] x86_64: Remove pointless set_64bit() usage

Message ID 20221022114425.168036718@infradead.org
State New
Headers
Series Clean up pmd_get_atomic() and i386-PAE |

Commit Message

Peter Zijlstra Oct. 22, 2022, 11:14 a.m. UTC
  The use of set_64bit() in X86_64 only code is pretty pointless, seeing
how it's a direct assignment. Remove all this nonsense.

Additionally, since x86_64 unconditionally has HAVE_CMPXCHG_DOUBLE,
there is no point in even having that fallback.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/um/include/asm/pgtable-3level.h |    8 --------
 arch/x86/include/asm/cmpxchg_64.h    |    5 -----
 drivers/iommu/intel/irq_remapping.c  |   10 ++--------
 3 files changed, 2 insertions(+), 21 deletions(-)
  

Comments

Linus Torvalds Oct. 22, 2022, 5:55 p.m. UTC | #1
On Sat, Oct 22, 2022 at 4:48 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> The use of set_64bit() in X86_64 only code is pretty pointless, seeing
> how it's a direct assignment. Remove all this nonsense.

Thanks. That was really confusing code, using set_64bit() in exactly
the situation where it was _not_ needed.

                  Linus
  
Nathan Chancellor Nov. 3, 2022, 7:09 p.m. UTC | #2
Hi Peter,

On Sat, Oct 22, 2022 at 01:14:14PM +0200, Peter Zijlstra wrote:
> The use of set_64bit() in X86_64 only code is pretty pointless, seeing
> how it's a direct assignment. Remove all this nonsense.
> 
> Additionally, since x86_64 unconditionally has HAVE_CMPXCHG_DOUBLE,
> there is no point in even having that fallback.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/um/include/asm/pgtable-3level.h |    8 --------
>  arch/x86/include/asm/cmpxchg_64.h    |    5 -----
>  drivers/iommu/intel/irq_remapping.c  |   10 ++--------
>  3 files changed, 2 insertions(+), 21 deletions(-)
> 
> --- a/arch/um/include/asm/pgtable-3level.h
> +++ b/arch/um/include/asm/pgtable-3level.h
> @@ -58,11 +58,7 @@
>  #define pud_populate(mm, pud, pmd) \
>  	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
>  
> -#ifdef CONFIG_64BIT
> -#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
> -#else
>  #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
> -#endif
>  
>  static inline int pgd_newpage(pgd_t pgd)
>  {
> @@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
>  
>  static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
>  
> -#ifdef CONFIG_64BIT
> -#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
> -#else
>  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
> -#endif
>  
>  static inline void pud_clear (pud_t *pud)
>  {
> --- a/arch/x86/include/asm/cmpxchg_64.h
> +++ b/arch/x86/include/asm/cmpxchg_64.h
> @@ -2,11 +2,6 @@
>  #ifndef _ASM_X86_CMPXCHG_64_H
>  #define _ASM_X86_CMPXCHG_64_H
>  
> -static inline void set_64bit(volatile u64 *ptr, u64 val)
> -{
> -	*ptr = val;
> -}
> -
>  #define arch_cmpxchg64(ptr, o, n)					\
>  ({									\
>  	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> --- a/drivers/iommu/intel/irq_remapping.c
> +++ b/drivers/iommu/intel/irq_remapping.c
> @@ -173,7 +173,6 @@ static int modify_irte(struct irq_2_iomm
>  	index = irq_iommu->irte_index + irq_iommu->sub_handle;
>  	irte = &iommu->ir_table->base[index];
>  
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
>  	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
>  		bool ret;
>  
> @@ -187,11 +186,6 @@ static int modify_irte(struct irq_2_iomm
>  		 * same as the old value.
>  		 */
>  		WARN_ON(!ret);
> -	} else
> -#endif
> -	{
> -		set_64bit(&irte->low, irte_modified->low);
> -		set_64bit(&irte->high, irte_modified->high);
>  	}
>  	__iommu_flush_cache(iommu, irte, sizeof(*irte));
>  
> @@ -249,8 +243,8 @@ static int clear_entries(struct irq_2_io
>  	end = start + (1 << irq_iommu->irte_mask);
>  
>  	for (entry = start; entry < end; entry++) {
> -		set_64bit(&entry->low, 0);
> -		set_64bit(&entry->high, 0);
> +		WRITE_ONCE(entry->low, 0);
> +		WRITE_ONCE(entry->high, 0);
>  	}
>  	bitmap_release_region(iommu->ir_table->bitmap, index,
>  			      irq_iommu->irte_mask);
> 
> 

This commit is now in -next as commit 0475a2d10fc7 ("x86_64: Remove
pointless set_64bit() usage") and I just bisect a boot failure on my
Intel test desktop to it.

# bad: [81214a573d19ae2fa5b528286ba23cd1cb17feec] Add linux-next specific files for 20221103
# good: [8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a] Merge tag 'parisc-for-6.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
git bisect start '81214a573d19ae2fa5b528286ba23cd1cb17feec' '8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a'
# good: [8c13089d26d070fef87a64b48191cb7ae6dfbdb2] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good 8c13089d26d070fef87a64b48191cb7ae6dfbdb2
# bad: [1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git bisect bad 1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0
# good: [748c419c7ade509684ce5bcf74f50e13e0447afd] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
git bisect good 748c419c7ade509684ce5bcf74f50e13e0447afd
# good: [0acc81a3bf9f875c5ef03037ff5431d37f536f05] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
git bisect good 0acc81a3bf9f875c5ef03037ff5431d37f536f05
# bad: [c0fb84e0698d2ce57f9391c7f4112f6e17676f99] Merge branch into tip/master: 'x86/cleanups'
git bisect bad c0fb84e0698d2ce57f9391c7f4112f6e17676f99
# good: [7212c34aac1ec6abadf8b439824c8307ef0dd338] Merge branch 'x86/core' into x86/paravirt, to resolve conflicts
git bisect good 7212c34aac1ec6abadf8b439824c8307ef0dd338
# good: [e1f2ac1d285d963a783a027a1b109420b07f30c1] Merge branch into tip/master: 'x86/cpu'
git bisect good e1f2ac1d285d963a783a027a1b109420b07f30c1
# good: [306b75edbf25b86fe8189a4f96c217e49483f8ae] Merge branch into tip/master: 'x86/cleanups'
git bisect good 306b75edbf25b86fe8189a4f96c217e49483f8ae
# good: [8f28b415703e1935457a4bf0be7f03dc5471d09f] mm: Rename GUP_GET_PTE_LOW_HIGH
git bisect good 8f28b415703e1935457a4bf0be7f03dc5471d09f
# bad: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
git bisect bad 0475a2d10fc7ced3268cd0f0551390b5858f90cd
# good: [a677802d5b0258f93f54620e1cd181b56547c36c] x86/mm/pae: Don't (ab)use atomic64
git bisect good a677802d5b0258f93f54620e1cd181b56547c36c
# good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
# first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
# good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
# first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage

Unfortunately, I see no output on the screen it is attached to so I
assume it is happening pretty early during the boot sequence, which will
probably make getting logs somewhat hard. I can provide information
about the system if that would help reveal anything. If there is
anything I can test, I am more than happy to do so.

Cheers,
Nathan
  
Uros Bizjak Nov. 3, 2022, 7:23 p.m. UTC | #3
On Thu, Nov 3, 2022 at 8:09 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> Hi Peter,
>
> On Sat, Oct 22, 2022 at 01:14:14PM +0200, Peter Zijlstra wrote:
> > The use of set_64bit() in X86_64 only code is pretty pointless, seeing
> > how it's a direct assignment. Remove all this nonsense.
> >
> > Additionally, since x86_64 unconditionally has HAVE_CMPXCHG_DOUBLE,
> > there is no point in even having that fallback.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/um/include/asm/pgtable-3level.h |    8 --------
> >  arch/x86/include/asm/cmpxchg_64.h    |    5 -----
> >  drivers/iommu/intel/irq_remapping.c  |   10 ++--------
> >  3 files changed, 2 insertions(+), 21 deletions(-)
> >
> > --- a/arch/um/include/asm/pgtable-3level.h
> > +++ b/arch/um/include/asm/pgtable-3level.h
> > @@ -58,11 +58,7 @@
> >  #define pud_populate(mm, pud, pmd) \
> >       set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
> >
> > -#ifdef CONFIG_64BIT
> > -#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
> > -#else
> >  #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
> > -#endif
> >
> >  static inline int pgd_newpage(pgd_t pgd)
> >  {
> > @@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
> >
> >  static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
> >
> > -#ifdef CONFIG_64BIT
> > -#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
> > -#else
> >  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
> > -#endif
> >
> >  static inline void pud_clear (pud_t *pud)
> >  {
> > --- a/arch/x86/include/asm/cmpxchg_64.h
> > +++ b/arch/x86/include/asm/cmpxchg_64.h
> > @@ -2,11 +2,6 @@
> >  #ifndef _ASM_X86_CMPXCHG_64_H
> >  #define _ASM_X86_CMPXCHG_64_H
> >
> > -static inline void set_64bit(volatile u64 *ptr, u64 val)
> > -{
> > -     *ptr = val;
> > -}
> > -
> >  #define arch_cmpxchg64(ptr, o, n)                                    \
> >  ({                                                                   \
> >       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
> > --- a/drivers/iommu/intel/irq_remapping.c
> > +++ b/drivers/iommu/intel/irq_remapping.c
> > @@ -173,7 +173,6 @@ static int modify_irte(struct irq_2_iomm
> >       index = irq_iommu->irte_index + irq_iommu->sub_handle;
> >       irte = &iommu->ir_table->base[index];
> >
> > -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
> >       if ((irte->pst == 1) || (irte_modified->pst == 1)) {
> >               bool ret;
> >
> > @@ -187,11 +186,6 @@ static int modify_irte(struct irq_2_iomm
> >                * same as the old value.
> >                */
> >               WARN_ON(!ret);
> > -     } else
> > -#endif
> > -     {
> > -             set_64bit(&irte->low, irte_modified->low);
> > -             set_64bit(&irte->high, irte_modified->high);
> >       }
> >       __iommu_flush_cache(iommu, irte, sizeof(*irte));

It looks to me that the above part should not be removed, but
set_64bit should be substituted with WRITE_ONCE. Only #if/#endif lines
should be removed.

Uros.

> >
> > @@ -249,8 +243,8 @@ static int clear_entries(struct irq_2_io
> >       end = start + (1 << irq_iommu->irte_mask);
> >
> >       for (entry = start; entry < end; entry++) {
> > -             set_64bit(&entry->low, 0);
> > -             set_64bit(&entry->high, 0);
> > +             WRITE_ONCE(entry->low, 0);
> > +             WRITE_ONCE(entry->high, 0);
> >       }
> >       bitmap_release_region(iommu->ir_table->bitmap, index,
> >                             irq_iommu->irte_mask);
> >
> >
>
> This commit is now in -next as commit 0475a2d10fc7 ("x86_64: Remove
> pointless set_64bit() usage") and I just bisect a boot failure on my
> Intel test desktop to it.
>
> # bad: [81214a573d19ae2fa5b528286ba23cd1cb17feec] Add linux-next specific files for 20221103
> # good: [8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a] Merge tag 'parisc-for-6.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
> git bisect start '81214a573d19ae2fa5b528286ba23cd1cb17feec' '8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a'
> # good: [8c13089d26d070fef87a64b48191cb7ae6dfbdb2] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
> git bisect good 8c13089d26d070fef87a64b48191cb7ae6dfbdb2
> # bad: [1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
> git bisect bad 1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0
> # good: [748c419c7ade509684ce5bcf74f50e13e0447afd] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
> git bisect good 748c419c7ade509684ce5bcf74f50e13e0447afd
> # good: [0acc81a3bf9f875c5ef03037ff5431d37f536f05] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
> git bisect good 0acc81a3bf9f875c5ef03037ff5431d37f536f05
> # bad: [c0fb84e0698d2ce57f9391c7f4112f6e17676f99] Merge branch into tip/master: 'x86/cleanups'
> git bisect bad c0fb84e0698d2ce57f9391c7f4112f6e17676f99
> # good: [7212c34aac1ec6abadf8b439824c8307ef0dd338] Merge branch 'x86/core' into x86/paravirt, to resolve conflicts
> git bisect good 7212c34aac1ec6abadf8b439824c8307ef0dd338
> # good: [e1f2ac1d285d963a783a027a1b109420b07f30c1] Merge branch into tip/master: 'x86/cpu'
> git bisect good e1f2ac1d285d963a783a027a1b109420b07f30c1
> # good: [306b75edbf25b86fe8189a4f96c217e49483f8ae] Merge branch into tip/master: 'x86/cleanups'
> git bisect good 306b75edbf25b86fe8189a4f96c217e49483f8ae
> # good: [8f28b415703e1935457a4bf0be7f03dc5471d09f] mm: Rename GUP_GET_PTE_LOW_HIGH
> git bisect good 8f28b415703e1935457a4bf0be7f03dc5471d09f
> # bad: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
> git bisect bad 0475a2d10fc7ced3268cd0f0551390b5858f90cd
> # good: [a677802d5b0258f93f54620e1cd181b56547c36c] x86/mm/pae: Don't (ab)use atomic64
> git bisect good a677802d5b0258f93f54620e1cd181b56547c36c
> # good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
> git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
> # first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
> # good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
> git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
> # first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
>
> Unfortunately, I see no output on the screen it is attached to so I
> assume it is happening pretty early during the boot sequence, which will
> probably make getting logs somewhat hard. I can provide information
> about the system if that would help reveal anything. If there is
> anything I can test, I am more than happy to do so.
>
> Cheers,
> Nathan
  
Nathan Chancellor Nov. 3, 2022, 7:35 p.m. UTC | #4
On Thu, Nov 03, 2022 at 08:23:41PM +0100, Uros Bizjak wrote:
> On Thu, Nov 3, 2022 at 8:09 PM Nathan Chancellor <nathan@kernel.org> wrote:
> >
> > Hi Peter,
> >
> > On Sat, Oct 22, 2022 at 01:14:14PM +0200, Peter Zijlstra wrote:
> > > The use of set_64bit() in X86_64 only code is pretty pointless, seeing
> > > how it's a direct assignment. Remove all this nonsense.
> > >
> > > Additionally, since x86_64 unconditionally has HAVE_CMPXCHG_DOUBLE,
> > > there is no point in even having that fallback.
> > >
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > >  arch/um/include/asm/pgtable-3level.h |    8 --------
> > >  arch/x86/include/asm/cmpxchg_64.h    |    5 -----
> > >  drivers/iommu/intel/irq_remapping.c  |   10 ++--------
> > >  3 files changed, 2 insertions(+), 21 deletions(-)
> > >
> > > --- a/arch/um/include/asm/pgtable-3level.h
> > > +++ b/arch/um/include/asm/pgtable-3level.h
> > > @@ -58,11 +58,7 @@
> > >  #define pud_populate(mm, pud, pmd) \
> > >       set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
> > >
> > > -#ifdef CONFIG_64BIT
> > > -#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
> > > -#else
> > >  #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
> > > -#endif
> > >
> > >  static inline int pgd_newpage(pgd_t pgd)
> > >  {
> > > @@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
> > >
> > >  static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
> > >
> > > -#ifdef CONFIG_64BIT
> > > -#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
> > > -#else
> > >  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
> > > -#endif
> > >
> > >  static inline void pud_clear (pud_t *pud)
> > >  {
> > > --- a/arch/x86/include/asm/cmpxchg_64.h
> > > +++ b/arch/x86/include/asm/cmpxchg_64.h
> > > @@ -2,11 +2,6 @@
> > >  #ifndef _ASM_X86_CMPXCHG_64_H
> > >  #define _ASM_X86_CMPXCHG_64_H
> > >
> > > -static inline void set_64bit(volatile u64 *ptr, u64 val)
> > > -{
> > > -     *ptr = val;
> > > -}
> > > -
> > >  #define arch_cmpxchg64(ptr, o, n)                                    \
> > >  ({                                                                   \
> > >       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
> > > --- a/drivers/iommu/intel/irq_remapping.c
> > > +++ b/drivers/iommu/intel/irq_remapping.c
> > > @@ -173,7 +173,6 @@ static int modify_irte(struct irq_2_iomm
> > >       index = irq_iommu->irte_index + irq_iommu->sub_handle;
> > >       irte = &iommu->ir_table->base[index];
> > >
> > > -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
> > >       if ((irte->pst == 1) || (irte_modified->pst == 1)) {
> > >               bool ret;
> > >
> > > @@ -187,11 +186,6 @@ static int modify_irte(struct irq_2_iomm
> > >                * same as the old value.
> > >                */
> > >               WARN_ON(!ret);
> > > -     } else
> > > -#endif
> > > -     {
> > > -             set_64bit(&irte->low, irte_modified->low);
> > > -             set_64bit(&irte->high, irte_modified->high);
> > >       }
> > >       __iommu_flush_cache(iommu, irte, sizeof(*irte));
> 
> It looks to me that the above part should not be removed, but
> set_64bit should be substituted with WRITE_ONCE. Only #if/#endif lines
> should be removed.

Thanks, I also realized that only a couple minutes after I sent my
initial message. I just got done testing the following diff, which
resolves my issue. Peter, would you like a formal patch or did you want
to squash it in to the original change?

diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 4216cafd67e7..5d176168bb76 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -186,6 +186,9 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
 		 * same as the old value.
 		 */
 		WARN_ON(!ret);
+	} else {
+		WRITE_ONCE(irte->low, irte_modified->low);
+		WRITE_ONCE(irte->high, irte_modified->high);
 	}
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 

> > >
> > > @@ -249,8 +243,8 @@ static int clear_entries(struct irq_2_io
> > >       end = start + (1 << irq_iommu->irte_mask);
> > >
> > >       for (entry = start; entry < end; entry++) {
> > > -             set_64bit(&entry->low, 0);
> > > -             set_64bit(&entry->high, 0);
> > > +             WRITE_ONCE(entry->low, 0);
> > > +             WRITE_ONCE(entry->high, 0);
> > >       }
> > >       bitmap_release_region(iommu->ir_table->bitmap, index,
> > >                             irq_iommu->irte_mask);
> > >
> > >
> >
> > This commit is now in -next as commit 0475a2d10fc7 ("x86_64: Remove
> > pointless set_64bit() usage") and I just bisect a boot failure on my
> > Intel test desktop to it.
> >
> > # bad: [81214a573d19ae2fa5b528286ba23cd1cb17feec] Add linux-next specific files for 20221103
> > # good: [8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a] Merge tag 'parisc-for-6.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
> > git bisect start '81214a573d19ae2fa5b528286ba23cd1cb17feec' '8e5423e991e8cd0988d0c4a3f4ac4ca1af7d148a'
> > # good: [8c13089d26d070fef87a64b48191cb7ae6dfbdb2] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
> > git bisect good 8c13089d26d070fef87a64b48191cb7ae6dfbdb2
> > # bad: [1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
> > git bisect bad 1bba8e9d15551d2f1c304d8f9d5c647a5b54bfc0
> > # good: [748c419c7ade509684ce5bcf74f50e13e0447afd] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git
> > git bisect good 748c419c7ade509684ce5bcf74f50e13e0447afd
> > # good: [0acc81a3bf9f875c5ef03037ff5431d37f536f05] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git
> > git bisect good 0acc81a3bf9f875c5ef03037ff5431d37f536f05
> > # bad: [c0fb84e0698d2ce57f9391c7f4112f6e17676f99] Merge branch into tip/master: 'x86/cleanups'
> > git bisect bad c0fb84e0698d2ce57f9391c7f4112f6e17676f99
> > # good: [7212c34aac1ec6abadf8b439824c8307ef0dd338] Merge branch 'x86/core' into x86/paravirt, to resolve conflicts
> > git bisect good 7212c34aac1ec6abadf8b439824c8307ef0dd338
> > # good: [e1f2ac1d285d963a783a027a1b109420b07f30c1] Merge branch into tip/master: 'x86/cpu'
> > git bisect good e1f2ac1d285d963a783a027a1b109420b07f30c1
> > # good: [306b75edbf25b86fe8189a4f96c217e49483f8ae] Merge branch into tip/master: 'x86/cleanups'
> > git bisect good 306b75edbf25b86fe8189a4f96c217e49483f8ae
> > # good: [8f28b415703e1935457a4bf0be7f03dc5471d09f] mm: Rename GUP_GET_PTE_LOW_HIGH
> > git bisect good 8f28b415703e1935457a4bf0be7f03dc5471d09f
> > # bad: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
> > git bisect bad 0475a2d10fc7ced3268cd0f0551390b5858f90cd
> > # good: [a677802d5b0258f93f54620e1cd181b56547c36c] x86/mm/pae: Don't (ab)use atomic64
> > git bisect good a677802d5b0258f93f54620e1cd181b56547c36c
> > # good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
> > git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
> > # first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
> > # good: [533627610ae7709572a4fac1393fb61153e2a5b3] x86/mm/pae: Be consistent with pXXp_get_and_clear()
> > git bisect good 533627610ae7709572a4fac1393fb61153e2a5b3
> > # first bad commit: [0475a2d10fc7ced3268cd0f0551390b5858f90cd] x86_64: Remove pointless set_64bit() usage
> >
> > Unfortunately, I see no output on the screen it is attached to so I
> > assume it is happening pretty early during the boot sequence, which will
> > probably make getting logs somewhat hard. I can provide information
> > about the system if that would help reveal anything. If there is
> > anything I can test, I am more than happy to do so.
> >
> > Cheers,
> > Nathan
  
Linus Torvalds Nov. 3, 2022, 8:39 p.m. UTC | #5
On Thu, Nov 3, 2022 at 12:36 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> Thanks, I also realized that only a couple minutes after I sent my
> initial message. I just got done testing the following diff, which
> resolves my issue.

That looks obviously correct.

Except in this case "obviously correct patch" is to some very
non-obvious code, and I think the whole code around it is very very
questionable.

I had to actually go check that this code can only be enabled on
x86-64 (because "IRQ_REMAP" has a "depends on X86_64" on it), because
it also uses cmpxchg_double and that now exists on x86-32 too (but
only does 64 bits, not 128 bits, of course).

Now, to make things even more confusing, I think that

    #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)

has *never* made sense, since it's always enabled for x86.

HOWEVER - there were actually early AMD x86-64 machines that didn't
have CMPXCHG16B. So the conditional kind of makes sense, but doing it
based on CONFIG_HAVE_CMPXCHG_DOUBLE does not.

It turns out that we do do this all correctly, except we do it at boot
time, with a test for boot_cpu_has(X86_FEATURE_CX16):

        /*
         * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
         * XT, GAM also requires GA mode. Therefore, we need to
         * check cmpxchg16b support before enabling them.
         */
        if (!boot_cpu_has(X86_FEATURE_CX16) ||
              ...

but that #ifdef has apparenrly never been valid (I didn't go back and
see if we at some point had a config entry for those old CPUs).

And even after I checked *that*, I then checked the 'struct irte' to
check that it's actually properly defined, and it isn't. Considering
that this all requires 16-byte alignment to work, I think that type
should also be marked as being 16-byte aligned.

In fact, I wonder if we should aim to actually force compile-time
checking, because right now we have

        VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));
        VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));

in our x86-64 __cmpxchg_double() macro, and honestly, that first one
might be better as a compile time check of __alignof__, and the second
one shouldn't exisrt at all because our interface shouldn't be using
two different pointers when the only possible use is for one single
aligned value.

If somebody actually wants the old m68k type of "DCAS" that did a
cmpxchg on two actually *different* pointers, we should call it
somethign else (and that's not what any current architecture does).

So honestly, just looking at this trivially correct patch, I got into
a rats nest of horribly wrong code. Nasty.

               Linus
  
Peter Zijlstra Nov. 3, 2022, 9:06 p.m. UTC | #6
On Thu, Nov 03, 2022 at 01:39:17PM -0700, Linus Torvalds wrote:
> On Thu, Nov 3, 2022 at 12:36 PM Nathan Chancellor <nathan@kernel.org> wrote:
> >
> > Thanks, I also realized that only a couple minutes after I sent my
> > initial message. I just got done testing the following diff, which
> > resolves my issue.
> 
> That looks obviously correct.

I'll force-push tip/x86/mm with the fix from Nathan and I'll try and
look into the rest of the trainwreck tomorrow with the brain awake --
after I figure out that other fail Nathan reported :/

Sorry for breaking all your machines Nate.
  
Peter Zijlstra Nov. 4, 2022, 4:01 p.m. UTC | #7
On Thu, Nov 03, 2022 at 01:39:17PM -0700, Linus Torvalds wrote:

> And even after I checked *that*, I then checked the 'struct irte' to
> check that it's actually properly defined, and it isn't. Considering
> that this all requires 16-byte alignment to work, I think that type
> should also be marked as being 16-byte aligned.
> 
> In fact, I wonder if we should aim to actually force compile-time
> checking, because right now we have
> 
>         VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));
>         VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));
> 
> in our x86-64 __cmpxchg_double() macro, and honestly, that first one
> might be better as a compile time check of __alignof__, and the second
> one shouldn't exisrt at all because our interface shouldn't be using
> two different pointers when the only possible use is for one single
> aligned value.

So cmpxchg_double() does a cmpxchg on a double long value and is
currently supported by: i386, x86_64, arm64 and s390.

On all those, except i386, two longs are u128.

So how about we introduce u128 and cmpxchg128 -- then it directly
mirrors the u64 and cmpxchg64 usage we already have. It then also
naturally imposses the alignment thing.

Afaict the only cmpxchg_double() users are:

  arch/s390/kernel/perf_cpum_sf.c
  drivers/iommu/amd/iommu.c
  drivers/iommu/intel/irq_remapping.c
  mm/slub.c

Of those slub.c is the only one that cares about 32bit and would need
some 'compat' code to pick between cmpxchg64 / cmpxchg128, but it
already has everything wrapped in helpers so that shouldn't be too big
of a worry.

Then we can convert these few users over and simply delete the whole
cmpxchg_double() thing.
  
Linus Torvalds Nov. 4, 2022, 5:15 p.m. UTC | #8
On Fri, Nov 4, 2022 at 9:01 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> So cmpxchg_double() does a cmpxchg on a double long value and is
> currently supported by: i386, x86_64, arm64 and s390.
>
> On all those, except i386, two longs are u128.
>
> So how about we introduce u128 and cmpxchg128 -- then it directly
> mirrors the u64 and cmpxchg64 usage we already have. It then also
> naturally imposses the alignment thing.

Ack, except that we might have some "u128" users that do *not*
necessarily want any alignment thing.

But maybe we could at least start with an u128 type that is marked as
being fully aligned, and if some other user comes in down the line
that wants relaxed alignment we can call it "u128_unaligned" or
something.

That would have avoided the pain we have on x86-32 where "u64" in a
UAPI structure is differently aligned than it is on actual 64-bit
architectures.

> Of those slub.c is the only one that cares about 32bit and would need
> some 'compat' code to pick between cmpxchg64 / cmpxchg128, but it
> already has everything wrapped in helpers so that shouldn't be too big
> of a worry.

Ack. Special case, and we can call it something very clear, and in
fact it would clean up the slab code too if we actually used some
explicit type for this.

Right now, slab does

        /* Double-word boundary */
        void *freelist;         /* first free object */
        union {
                unsigned long counters;
                struct {

where the alignment constraints are just a comment, and then it passes
pointers to the two words around manually.

But it should be fairly straightforward to make it use an actual
properly typed thing, and using an unnamed union member, do something
that takes an explicitly aligned "two word" thing instead.

IOW, make the 'compat' case *not* use those stupid two separate
pointers (that have to be consecutive and aligned), but actually do
something like

        struct cmpxchg_double_long {
                unsigned long a,b;
        } __aligned(2*sizeof(long));

and then the slab case can use that union of that and the existing
"freelist+word-union" to make this all not just a comment to people,
but something the compiler sees too.

            Linus
  
Jason A. Donenfeld Nov. 5, 2022, 1:29 p.m. UTC | #9
On Fri, Nov 04, 2022 at 10:15:08AM -0700, Linus Torvalds wrote:
> On Fri, Nov 4, 2022 at 9:01 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > So cmpxchg_double() does a cmpxchg on a double long value and is
> > currently supported by: i386, x86_64, arm64 and s390.
> >
> > On all those, except i386, two longs are u128.
> >
> > So how about we introduce u128 and cmpxchg128 -- then it directly
> > mirrors the u64 and cmpxchg64 usage we already have. It then also
> > naturally imposses the alignment thing.
> 
> Ack, except that we might have some "u128" users that do *not*
> necessarily want any alignment thing.
> 
> But maybe we could at least start with an u128 type that is marked as
> being fully aligned, and if some other user comes in down the line
> that wants relaxed alignment we can call it "u128_unaligned" or
> something.

Hm, sounds maybe not so nice for another use case: arithmetic code that
makes use of u128 for efficient computations, but otherwise has
no particular alignment requirements. For example, `typedef __uint128_t
u128;` in:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/crypto/poly1305-donna64.c
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/crypto/curve25519-hacl64.c

I always thought it'd be nice to see that typedef alongside the others
in the shared kernel headers, but figured the requirement for 64-bit and
libgcc for some operations on some architectures made it a bit less
general purpose, so I never proposed it.

Jason
  
Peter Zijlstra Nov. 5, 2022, 3:14 p.m. UTC | #10
On Sat, Nov 05, 2022 at 02:29:47PM +0100, Jason A. Donenfeld wrote:
> On Fri, Nov 04, 2022 at 10:15:08AM -0700, Linus Torvalds wrote:
> > On Fri, Nov 4, 2022 at 9:01 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > So cmpxchg_double() does a cmpxchg on a double long value and is
> > > currently supported by: i386, x86_64, arm64 and s390.
> > >
> > > On all those, except i386, two longs are u128.
> > >
> > > So how about we introduce u128 and cmpxchg128 -- then it directly
> > > mirrors the u64 and cmpxchg64 usage we already have. It then also
> > > naturally imposses the alignment thing.
> > 
> > Ack, except that we might have some "u128" users that do *not*
> > necessarily want any alignment thing.
> > 
> > But maybe we could at least start with an u128 type that is marked as
> > being fully aligned, and if some other user comes in down the line
> > that wants relaxed alignment we can call it "u128_unaligned" or
> > something.
> 
> Hm, sounds maybe not so nice for another use case: arithmetic code that
> makes use of u128 for efficient computations, but otherwise has
> no particular alignment requirements. For example, `typedef __uint128_t
> u128;` in:

Natural alignment is... natural. Making it unaligned is quite mad. That
whole u64 is not naturally aligned on i386 thing Linus referred to is a
sodding pain in the backside.

If the code has no alignment requirements, natural alignment is as good
as any. And if it does have requirements, you can use u128_unaligned.

Also:

$ ./align
16, 16

---

#include <stdio.h>

int main(int argx, char **argv)
{
	__int128 a;

	printf("%d, %d\n", sizeof(a), __alignof(a));
	return 0;
}
  
Jason A. Donenfeld Nov. 5, 2022, 8:54 p.m. UTC | #11
On Sat, Nov 05, 2022 at 04:14:19PM +0100, Peter Zijlstra wrote:
> Also:
> 
> $ ./align
> 16, 16
> 
> ---
> 
> #include <stdio.h>
> 
> int main(int argx, char **argv)
> {
> 	__int128 a;
> 
> 	printf("%d, %d\n", sizeof(a), __alignof(a));
> 	return 0;
> }

zx2c4@thinkpad /tmp $ x86_64-linux-musl-gcc -O2 a.c -static && qemu-x86_64 ./a.out
16, 16
zx2c4@thinkpad /tmp $ aarch64-linux-musl-gcc -O2 a.c -static && qemu-aarch64 ./a.out
16, 16
zx2c4@thinkpad /tmp $ powerpc64le-linux-musl-gcc -O2 a.c -static && qemu-ppc64le ./a.out
16, 16
zx2c4@thinkpad /tmp $ s390x-linux-musl-gcc -O2 a.c -static && qemu-s390x ./a.out
16, 8
zx2c4@thinkpad /tmp $ riscv64-linux-musl-gcc -O2 a.c -static && qemu-riscv64 ./a.out
16, 16

Er, yea, you're right. Looks like of these, it's just s390x, so
whatever.

Jason
  
David Laight Nov. 7, 2022, 9:14 a.m. UTC | #12
From: Peter Zijlstra
> Sent: 05 November 2022 15:14
> 
> On Sat, Nov 05, 2022 at 02:29:47PM +0100, Jason A. Donenfeld wrote:
> > On Fri, Nov 04, 2022 at 10:15:08AM -0700, Linus Torvalds wrote:
> > > On Fri, Nov 4, 2022 at 9:01 AM Peter Zijlstra <peterz@infradead.org> wrote:
> > > >
> > > > So cmpxchg_double() does a cmpxchg on a double long value and is
> > > > currently supported by: i386, x86_64, arm64 and s390.
> > > >
> > > > On all those, except i386, two longs are u128.
> > > >
> > > > So how about we introduce u128 and cmpxchg128 -- then it directly
> > > > mirrors the u64 and cmpxchg64 usage we already have. It then also
> > > > naturally imposses the alignment thing.
> > >
> > > Ack, except that we might have some "u128" users that do *not*
> > > necessarily want any alignment thing.
> > >
> > > But maybe we could at least start with an u128 type that is marked as
> > > being fully aligned, and if some other user comes in down the line
> > > that wants relaxed alignment we can call it "u128_unaligned" or
> > > something.
> >
> > Hm, sounds maybe not so nice for another use case: arithmetic code that
> > makes use of u128 for efficient computations, but otherwise has
> > no particular alignment requirements. For example, `typedef __uint128_t
> > u128;` in:
> 
> Natural alignment is... natural. Making it unaligned is quite mad. That
> whole u64 is not naturally aligned on i386 thing Linus referred to is a
> sodding pain in the backside.
> 
> If the code has no alignment requirements, natural alignment is as good
> as any. And if it does have requirements, you can use u128_unaligned.
> 
> Also:
> 
> $ ./align
> 16, 16
> 
> ---
> 
> #include <stdio.h>
> 
> int main(int argx, char **argv)
> {
> 	__int128 a;
> 
> 	printf("%d, %d\n", sizeof(a), __alignof(a));
> 	return 0;
> }

Well, __alignof() doesn't return the required value.
(cf 'long long' on 32bit x86).
But the alignment of __int128 is 16 :-)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Peter Zijlstra Dec. 19, 2022, 3:44 p.m. UTC | #13
On Fri, Nov 04, 2022 at 10:15:08AM -0700, Linus Torvalds wrote:
> On Fri, Nov 4, 2022 at 9:01 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > So cmpxchg_double() does a cmpxchg on a double long value and is
> > currently supported by: i386, x86_64, arm64 and s390.
> >
> > On all those, except i386, two longs are u128.
> >
> > So how about we introduce u128 and cmpxchg128 -- then it directly
> > mirrors the u64 and cmpxchg64 usage we already have. It then also
> > naturally imposses the alignment thing.
> 
> Ack

Out now: https://lkml.kernel.org/r/20221219153525.632521981@infradead.org
  

Patch

--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -58,11 +58,7 @@ 
 #define pud_populate(mm, pud, pmd) \
 	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
 
-#ifdef CONFIG_64BIT
-#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
-#else
 #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
-#endif
 
 static inline int pgd_newpage(pgd_t pgd)
 {
@@ -71,11 +67,7 @@  static inline int pgd_newpage(pgd_t pgd)
 
 static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
 
-#ifdef CONFIG_64BIT
-#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
-#else
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
-#endif
 
 static inline void pud_clear (pud_t *pud)
 {
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -2,11 +2,6 @@ 
 #ifndef _ASM_X86_CMPXCHG_64_H
 #define _ASM_X86_CMPXCHG_64_H
 
-static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
-	*ptr = val;
-}
-
 #define arch_cmpxchg64(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -173,7 +173,6 @@  static int modify_irte(struct irq_2_iomm
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
 	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
 		bool ret;
 
@@ -187,11 +186,6 @@  static int modify_irte(struct irq_2_iomm
 		 * same as the old value.
 		 */
 		WARN_ON(!ret);
-	} else
-#endif
-	{
-		set_64bit(&irte->low, irte_modified->low);
-		set_64bit(&irte->high, irte_modified->high);
 	}
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
@@ -249,8 +243,8 @@  static int clear_entries(struct irq_2_io
 	end = start + (1 << irq_iommu->irte_mask);
 
 	for (entry = start; entry < end; entry++) {
-		set_64bit(&entry->low, 0);
-		set_64bit(&entry->high, 0);
+		WRITE_ONCE(entry->low, 0);
+		WRITE_ONCE(entry->high, 0);
 	}
 	bitmap_release_region(iommu->ir_table->bitmap, index,
 			      irq_iommu->irte_mask);