[v3,13/34] m68k: Implement the new page table range API

Message ID 20230228213738.272178-14-willy@infradead.org
State New
Headers
Series New page table range API |

Commit Message

Matthew Wilcox Feb. 28, 2023, 9:37 p.m. UTC
  Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
flush_dcache_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
---
 arch/m68k/include/asm/cacheflush_mm.h | 26 +++++++++++++++++---------
 arch/m68k/include/asm/pgtable_mm.h    | 21 ++++++++++++++++++---
 arch/m68k/mm/motorola.c               |  2 +-
 3 files changed, 36 insertions(+), 13 deletions(-)
  

Comments

Geert Uytterhoeven March 5, 2023, 10:16 a.m. UTC | #1
Hi Willy,

On Tue, Feb 28, 2023 at 10:37 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
> Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
> flush_dcache_folio().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Thanks for your patch!

> --- a/arch/m68k/include/asm/cacheflush_mm.h
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -220,24 +220,28 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
>
>  /* Push the page at kernel virtual address and clear the icache */
>  /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
> -static inline void __flush_page_to_ram(void *vaddr)
> +static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
>  {
>         if (CPU_IS_COLDFIRE) {
>                 unsigned long addr, start, end;
>                 addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
>                 start = addr & ICACHE_SET_MASK;
> -               end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
> +               end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
>                 if (start > end) {
>                         flush_cf_bcache(0, end);
>                         end = ICACHE_MAX_ADDR;
>                 }
>                 flush_cf_bcache(start, end);
>         } else if (CPU_IS_040_OR_060) {
> -               __asm__ __volatile__("nop\n\t"
> -                                    ".chip 68040\n\t"
> -                                    "cpushp %%bc,(%0)\n\t"
> -                                    ".chip 68k"
> -                                    : : "a" (__pa(vaddr)));
> +               unsigned long paddr = __pa(vaddr);
> +
> +               while (nr--) {
> +                       __asm__ __volatile__("nop\n\t"
> +                                            ".chip 68040\n\t"
> +                                            "cpushp %%bc,(%0)\n\t"
> +                                            ".chip 68k"
> +                                            : : "a" (paddr + nr * PAGE_SIZE));

As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
inside the loop (albeit using a shift instead of a multiplication),
please use "paddr" here, followed by "paddr += PAGE_SIZE;".

Gr{oetje,eeting}s,

                        Geert


--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
  
Matthew Wilcox March 5, 2023, 3:28 p.m. UTC | #2
On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> > +               while (nr--) {
> > +                       __asm__ __volatile__("nop\n\t"
> > +                                            ".chip 68040\n\t"
> > +                                            "cpushp %%bc,(%0)\n\t"
> > +                                            ".chip 68k"
> > +                                            : : "a" (paddr + nr * PAGE_SIZE));
> 
> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> inside the loop (albeit using a shift instead of a multiplication),
> please use "paddr" here, followed by "paddr += PAGE_SIZE;".

Thanks.  So this?

+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
        } else if (CPU_IS_040_OR_060) {
                unsigned long paddr = __pa(vaddr);
 
-               while (nr--) {
+               do {
                        __asm__ __volatile__("nop\n\t"
                                             ".chip 68040\n\t"
                                             "cpushp %%bc,(%0)\n\t"
                                             ".chip 68k"
-                                            : : "a" (paddr + nr * PAGE_SIZE));
-               }
+                                            : : "a" (paddr));
+                       paddr += PAGE_SIZE;
+               } while (--nr);
        } else {
                unsigned long _tmp;
                __asm__ __volatile__("movec %%cacr,%0\n\t"

Also, I noticed that I broke sun3.  It puts the PFN in bits 0-n instead
of 12-n.  New patch coming soon.
  
Geert Uytterhoeven March 5, 2023, 4:48 p.m. UTC | #3
Hi Willy,

On Sun, Mar 5, 2023 at 4:28 PM Matthew Wilcox <willy@infradead.org> wrote:
> On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> > > +               while (nr--) {
> > > +                       __asm__ __volatile__("nop\n\t"
> > > +                                            ".chip 68040\n\t"
> > > +                                            "cpushp %%bc,(%0)\n\t"
> > > +                                            ".chip 68k"
> > > +                                            : : "a" (paddr + nr * PAGE_SIZE));
> >
> > As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> > inside the loop (albeit using a shift instead of a multiplication),
> > please use "paddr" here, followed by "paddr += PAGE_SIZE;".
>
> Thanks.  So this?
>
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
>         } else if (CPU_IS_040_OR_060) {
>                 unsigned long paddr = __pa(vaddr);
>
> -               while (nr--) {
> +               do {
>                         __asm__ __volatile__("nop\n\t"
>                                              ".chip 68040\n\t"
>                                              "cpushp %%bc,(%0)\n\t"
>                                              ".chip 68k"
> -                                            : : "a" (paddr + nr * PAGE_SIZE));
> -               }
> +                                            : : "a" (paddr));
> +                       paddr += PAGE_SIZE;
> +               } while (--nr);
>         } else {
>                 unsigned long _tmp;
>                 __asm__ __volatile__("movec %%cacr,%0\n\t"

LGTM. Might be safer to keep the "while (nr--) {", just in case someone
ever passes zero.

> Also, I noticed that I broke sun3.  It puts the PFN in bits 0-n instead
> of 12-n.  New patch coming soon.

Thanks, hadn't noticed (there are no sun3-specific code changes in
this series?)

Gr{oetje,eeting}s,

                        Geert
  
Michael Schmitz March 5, 2023, 8:44 p.m. UTC | #4
Hi Matthew, Geert,

sorry, I missed that one when it got posted ...

On 6/03/23 04:28, Matthew Wilcox wrote:
> On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
>>> +               while (nr--) {
>>> +                       __asm__ __volatile__("nop\n\t"
>>> +                                            ".chip 68040\n\t"
>>> +                                            "cpushp %%bc,(%0)\n\t"
>>> +                                            ".chip 68k"
>>> +                                            : : "a" (paddr + nr * PAGE_SIZE));
>> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
>> inside the loop (albeit using a shift instead of a multiplication),
>> please use "paddr" here, followed by "paddr += PAGE_SIZE;".

Are we certain that contiguous vaddr always maps to contiguous paddr?

If not, I'd suggest we increment vaddr inside the loop, and use __pa() 
each time:

> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
>          } else if (CPU_IS_040_OR_060) {
>   
> -               while (nr--) {
> +               do {
>                          __asm__ __volatile__("nop\n\t"
>                                               ".chip 68040\n\t"
>                                               "cpushp %%bc,(%0)\n\t"
>                                               ".chip 68k"
> -                                            : : "a" (paddr + nr * PAGE_SIZE));
> -               }
> +                                            : : "a" __pa(vaddr));
> +                       vaddr += PAGE_SIZE;
> +               } while (--nr);
>          } else {
>                  unsigned long _tmp;
>                  __asm__ __volatile__("movec %%cacr,%0\n\t"
>
(just edited Matthew's patch in the mail editor, untested, may not apply 
cleanly ...)

Cheers,

     Michael
  
Geert Uytterhoeven March 6, 2023, 7:21 a.m. UTC | #5
Hi Michael,

On Sun, Mar 5, 2023 at 9:44 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
> On 6/03/23 04:28, Matthew Wilcox wrote:
> > On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> >>> +               while (nr--) {
> >>> +                       __asm__ __volatile__("nop\n\t"
> >>> +                                            ".chip 68040\n\t"
> >>> +                                            "cpushp %%bc,(%0)\n\t"
> >>> +                                            ".chip 68k"
> >>> +                                            : : "a" (paddr + nr * PAGE_SIZE));
> >> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> >> inside the loop (albeit using a shift instead of a multiplication),
> >> please use "paddr" here, followed by "paddr += PAGE_SIZE;".
>
> Are we certain that contiguous vaddr always maps to contiguous paddr?

For a general __flush_pages_to_ram() function, that would not be
guaranteed. But as this is meant for folios, it must be true:
https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320

Gr{oetje,eeting}s,

                        Geert
  
Michael Schmitz March 6, 2023, 11:01 p.m. UTC | #6
Hi Geert,

On 6/03/23 20:21, Geert Uytterhoeven wrote:
>> Are we certain that contiguous vaddr always maps to contiguous paddr?
> For a general __flush_pages_to_ram() function, that would not be
> guaranteed. But as this is meant for folios, it must be true:
> https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320

Thanks for explaining - that just leaves the problem of cowboys like 
myself abusing __flush_pages_to_ram(addr, nr) with nr > 1 for something 
that isn't a folio. Maybe a comment 'nr > 1 only valid on folios' would 
help ...

Cheers,

     Michael

>
> Gr{oetje,eeting}s,
>
>                          Geert
>
  

Patch

diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 1ac55e7b47f0..d43c8bce149b 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -220,24 +220,28 @@  static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
 
 /* Push the page at kernel virtual address and clear the icache */
 /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
-static inline void __flush_page_to_ram(void *vaddr)
+static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
 {
 	if (CPU_IS_COLDFIRE) {
 		unsigned long addr, start, end;
 		addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
 		start = addr & ICACHE_SET_MASK;
-		end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
+		end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
 		if (start > end) {
 			flush_cf_bcache(0, end);
 			end = ICACHE_MAX_ADDR;
 		}
 		flush_cf_bcache(start, end);
 	} else if (CPU_IS_040_OR_060) {
-		__asm__ __volatile__("nop\n\t"
-				     ".chip 68040\n\t"
-				     "cpushp %%bc,(%0)\n\t"
-				     ".chip 68k"
-				     : : "a" (__pa(vaddr)));
+		unsigned long paddr = __pa(vaddr);
+
+		while (nr--) {
+			__asm__ __volatile__("nop\n\t"
+					     ".chip 68040\n\t"
+					     "cpushp %%bc,(%0)\n\t"
+					     ".chip 68k"
+					     : : "a" (paddr + nr * PAGE_SIZE));
+		}
 	} else {
 		unsigned long _tmp;
 		__asm__ __volatile__("movec %%cacr,%0\n\t"
@@ -249,10 +253,14 @@  static inline void __flush_page_to_ram(void *vaddr)
 }
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-#define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
+#define flush_dcache_page(page)	__flush_pages_to_ram(page_address(page), 1)
+#define flush_dcache_folio(folio)		\
+	__flush_pages_to_ram(folio_address(folio), folio_nr_pages(folio))
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-#define flush_icache_page(vma, page)	__flush_page_to_ram(page_address(page))
+#define flush_icache_pages(vma, page, nr)	\
+	__flush_pages_to_ram(page_address(page), nr)
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
 
 extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
 				    unsigned long addr, int len);
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index b93c41fe2067..400206c17c97 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -31,8 +31,20 @@ 
 	do{							\
 		*(pteptr) = (pteval);				\
 	} while(0)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += PAGE_SIZE;
+	}
+}
+
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
 
 /* PMD_SHIFT determines the size of the area a second-level page table can map */
 #if CONFIG_PGTABLE_LEVELS == 3
@@ -138,11 +150,14 @@  extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
  * tables contain all the necessary information.  The Sun3 does, but
  * they are updated on demand.
  */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-				    unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
 }
 
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
+
 #endif /* !__ASSEMBLY__ */
 
 /* MMU-specific headers */
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2a375637e007..7784d0fcdf6e 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -81,7 +81,7 @@  static inline void cache_page(void *vaddr)
 
 void mmu_page_ctor(void *page)
 {
-	__flush_page_to_ram(page);
+	__flush_pages_to_ram(page, 1);
 	flush_tlb_kernel_page(page);
 	nocache_page(page);
 }