[v3,12/34] loongarch: Implement the new page table range API

Message ID 20230228213738.272178-13-willy@infradead.org
State New
Headers
Series New page table range API |

Commit Message

Matthew Wilcox Feb. 28, 2023, 9:37 p.m. UTC
  Add set_ptes() and update_mmu_cache_range().  It would probably be
more efficient to implement __update_tlb() by flushing the entire
folio instead of calling it __update_tlb() N times, but I'll leave
that for someone who understands the architecture better.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: loongarch@lists.linux.dev
---
 arch/loongarch/include/asm/cacheflush.h |  2 ++
 arch/loongarch/include/asm/pgtable.h    | 30 +++++++++++++++++++------
 2 files changed, 25 insertions(+), 7 deletions(-)
  

Comments

WANG Xuerui March 1, 2023, 2:04 a.m. UTC | #1
Hi,

On 3/1/23 05:37, Matthew Wilcox (Oracle) wrote:
> Add set_ptes() and update_mmu_cache_range().  It would probably be
> more efficient to implement __update_tlb() by flushing the entire
> folio instead of calling it __update_tlb() N times, but I'll leave
> that for someone who understands the architecture better.
Thanks for the patch! Unfortunately it doesn't seem possible 
architecture-wise to batch-flush pages right now on loongarch, but AFAIK 
the vendor *could* be listening so a future model could have the feature 
supported... who knows!
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Cc: Huacai Chen <chenhuacai@kernel.org>
> Cc: WANG Xuerui <kernel@xen0n.name>
> Cc: loongarch@lists.linux.dev
> ---
>   arch/loongarch/include/asm/cacheflush.h |  2 ++
>   arch/loongarch/include/asm/pgtable.h    | 30 +++++++++++++++++++------
>   2 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index 0681788eb474..7907eb42bfbd 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -47,8 +47,10 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
>   #define flush_cache_vmap(start, end)			do { } while (0)
>   #define flush_cache_vunmap(start, end)			do { } while (0)
>   #define flush_icache_page(vma, page)			do { } while (0)
> +#define flush_icache_pages(vma, page)			do { } while (0)
>   #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
>   #define flush_dcache_page(page)				do { } while (0)
> +#define flush_dcache_folio(folio)			do { } while (0)
This will break the build because the surrounding code is unnecessarily 
redefining the stubs that the asm-generic include at the end of the file 
will properly take care of. With the build fixed (patch attached below), 
I can successfully boot-test and stress mm for a while on a Loongson 
3A5000. I haven't done the benchmarks though.
>   #define flush_dcache_mmap_lock(mapping)			do { } while (0)
>   #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
>   
> <snip>

Cleanup patch:

-- >8 --

 From 0de3f49e6ba23c1b45e7b5da355f87398c4a7feb Mon Sep 17 00:00:00 2001
From: WANG Xuerui <git@xen0n.name>
Date: Wed, 1 Mar 2023 09:32:18 +0800
Subject: [PATCH] LoongArch: Remove stub definitions in cacheflush.h

Per the current best practice in mm, it's unnecessary to define no-op
stubs explicitly in arch code, because the asm-generic inclusion at the
end of the file will properly take care of these. And the definition of
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE to 0 is going to confuse the
asm-generic code, so remove the stubs for clarity and avoiding misuse.

Signed-off-by: WANG Xuerui <git@xen0n.name>
---
  arch/loongarch/include/asm/cacheflush.h | 15 ---------------
  1 file changed, 15 deletions(-)

diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 326ac6f1b27c..cb8c046b4f17 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -37,21 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
  #define flush_icache_range     local_flush_icache_range
  #define flush_icache_user_range        local_flush_icache_range
  
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-
-#define flush_cache_all()                              do { } while (0)
-#define flush_cache_mm(mm)                             do { } while (0)
-#define flush_cache_dup_mm(mm)                         do { } while (0)
-#define flush_cache_range(vma, start, end)             do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn)             do { } while (0)
-#define flush_cache_vmap(start, end)                   do { } while (0)
-#define flush_cache_vunmap(start, end)                 do { } while (0)
-#define flush_icache_user_page(vma, page, addr, len)   do { } while (0)
-#define flush_dcache_page(page)                                do { } while (0)
-#define flush_dcache_folio(folio)                      do { } while (0)
-#define flush_dcache_mmap_lock(mapping)                        do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)              do { } while (0)
-
  #define cache_op(op, addr)                                             \
         __asm__ __volatile__(                                           \
         "       cacop   %0, %1                                  \n"     \
  

Patch

diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 0681788eb474..7907eb42bfbd 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -47,8 +47,10 @@  void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_cache_vmap(start, end)			do { } while (0)
 #define flush_cache_vunmap(start, end)			do { } while (0)
 #define flush_icache_page(vma, page)			do { } while (0)
+#define flush_icache_pages(vma, page)			do { } while (0)
 #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
 #define flush_dcache_page(page)				do { } while (0)
+#define flush_dcache_folio(folio)			do { } while (0)
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index d28fb9dbec59..9154d317ffb4 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -334,12 +334,20 @@  static inline void set_pte(pte_t *ptep, pte_t pteval)
 	}
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte, unsigned int nr)
+{
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte_val(pte) += 1 << _PFN_SHIFT;
+	}
 }
 
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	/* Preserve global status for the pair */
@@ -445,11 +453,19 @@  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 extern void __update_tlb(struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep);
 
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
 {
-	__update_tlb(vma, address, ptep);
+	for (;;) {
+		__update_tlb(vma, address, ptep);
+		if (--nr == 0)
+			break;
+		address += PAGE_SIZE;
+		ptep++;
+	}
 }
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(vma, addr, ptep, 1)
 
 #define __HAVE_ARCH_UPDATE_MMU_TLB
 #define update_mmu_tlb	update_mmu_cache