[RFC,V1,05/11] riscv: Decouple pmd operations and pte operations

Message ID 20231123065708.91345-6-luxu.kernel@bytedance.com
State New
Headers
Series riscv: Introduce 64K base page |

Commit Message

Xu Lu Nov. 23, 2023, 6:57 a.m. UTC
  Existing pmd operations are usually implemented via pte operations. For
example, the pmd_mkdirty function, which is used to mark a pmd_t struct
as dirty, will transfer pmd_t struct to pte_t struct via pmd_pte first,
mark the generated pte_t as dirty then, and finally transfer it back to
pmd_t struct via pte_pmd function. Such implementation introduces
unnecessary overhead of struct transferring. Also, Now that pte_t struct
is a number of page table entries, which can be larger than pmd_t
struct, functions like set_pmd_at implemented via set_pte_at will cause
write amplifications.

This commit decouples pmd operations and pte operations. Pmd operations
are now implemented independently of pte operations.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 arch/riscv/include/asm/pgtable-64.h |   6 ++
 arch/riscv/include/asm/pgtable.h    | 124 +++++++++++++++++++++-------
 include/asm-generic/pgtable-nopmd.h |   1 +
 include/linux/pgtable.h             |   6 ++
 4 files changed, 108 insertions(+), 29 deletions(-)
  

Patch

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 1926727698fc..95e785f2160c 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -206,6 +206,12 @@  static inline int pud_leaf(pud_t pud)
 	return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF);
 }
 
+#define pud_exec	pud_exec
+static inline int pud_exec(pud_t pud)
+{
+	return pud_val(pud) & _PAGE_EXEC;
+}
+
 static inline int pud_user(pud_t pud)
 {
 	return pud_val(pud) & _PAGE_USER;
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d50c4588c1ed..9f81fe046cb8 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -272,6 +272,18 @@  static inline int pmd_leaf(pmd_t pmd)
 	return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
 }
 
+#define pmd_exec	pmd_exec
+static inline int pmd_exec(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_EXEC;
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return pmd_val(pmd_a) == pmd_val(pmd_b);
+}
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
@@ -506,7 +518,7 @@  static inline int pte_protnone(pte_t pte)
 
 static inline int pmd_protnone(pmd_t pmd)
 {
-	return pte_protnone(pmd_pte(pmd));
+	return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
 }
 #endif
 
@@ -740,73 +752,95 @@  static inline unsigned long pud_pfn(pud_t pud)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+	unsigned long newprot_val = pgprot_val(newprot);
+
+	ALT_THEAD_PMA(newprot_val);
+
+	return __pmd((pmd_val(pmd) & _PAGE_CHG_MASK) | newprot_val);
 }
 
 #define pmd_write pmd_write
 static inline int pmd_write(pmd_t pmd)
 {
-	return pte_write(pmd_pte(pmd));
+	return pmd_val(pmd) & _PAGE_WRITE;
 }
 
 static inline int pmd_dirty(pmd_t pmd)
 {
-	return pte_dirty(pmd_pte(pmd));
+	return pmd_val(pmd) & _PAGE_DIRTY;
 }
 
 #define pmd_young pmd_young
 static inline int pmd_young(pmd_t pmd)
 {
-	return pte_young(pmd_pte(pmd));
+	return pmd_val(pmd) & _PAGE_ACCESSED;
 }
 
 static inline int pmd_user(pmd_t pmd)
 {
-	return pte_user(pmd_pte(pmd));
+	return pmd_val(pmd) & _PAGE_USER;
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
-	return pte_pmd(pte_mkold(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) & ~(_PAGE_ACCESSED));
 }
 
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
-	return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) | _PAGE_ACCESSED);
 }
 
 static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
-	return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) | _PAGE_WRITE);
 }
 
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
-	return pte_pmd(pte_wrprotect(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) & (~_PAGE_WRITE));
 }
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	return pte_pmd(pte_mkclean(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) & (~_PAGE_DIRTY));
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	return pte_pmd(pte_mkdirty(pmd_pte(pmd)));
+	return __pmd(pmd_val(pmd) | _PAGE_DIRTY);
+}
+
+#define pmd_accessible(mm, pmd)		((void)(pmd), 1)
+
+static inline void __set_pmd_at(pmd_t *pmdp, pmd_t pmd)
+{
+	if (pmd_present(pmd) && pmd_exec(pmd))
+		flush_icache_pte(pmd_pte(pmd));
+
+	set_pmd(pmdp, pmd);
 }
 
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 				pmd_t *pmdp, pmd_t pmd)
 {
 	page_table_check_pmd_set(mm, pmdp, pmd);
-	return __set_pte_at((pte_t *)pmdp, pmd_pte(pmd));
+	return __set_pmd_at(pmdp, pmd);
+}
+
+static inline void __set_pud_at(pud_t *pudp, pud_t pud)
+{
+	if (pud_present(pud) && pud_exec(pud))
+		flush_icache_pte(pud_pte(pud));
+
+	set_pud(pudp, pud);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
 				pud_t *pudp, pud_t pud)
 {
 	page_table_check_pud_set(mm, pudp, pud);
-	return __set_pte_at((pte_t *)pudp, pud_pte(pud));
+	return __set_pud_at(pudp, pud);
 }
 
 #ifdef CONFIG_PAGE_TABLE_CHECK
@@ -826,25 +860,64 @@  static inline bool pud_user_accessible_page(pud_t pud)
 }
 #endif
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	return pmd_leaf(pmd);
-}
-
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 					unsigned long address, pmd_t *pmdp,
 					pmd_t entry, int dirty)
 {
-	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+	if (!pmd_same(*pmdp, entry))
+		set_pmd_at(vma->vm_mm, address, pmdp, entry);
+	/*
+	 * update_mmu_cache will unconditionally execute, handling both
+	 * the case that the PMD changed and the spurious fault case.
+	 */
+	return true;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+					unsigned long address, pmd_t *pmdp)
+{
+	pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
+
+	page_table_check_pmd_clear(mm, pmd);
+
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+					unsigned long address, pmd_t *pmdp)
+{
+	atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+					unsigned long address, pmd_t *pmdp)
+{
+	struct mm_struct *mm = (vma)->vm_mm;
+	pmd_t pmd = pmdp_get_and_clear(mm, address, pmdp);
+
+	if (pmd_accessible(mm, pmd))
+		flush_tlb_page(vma, address);
+
+	return pmd;
 }
 
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 					unsigned long address, pmd_t *pmdp)
 {
-	return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+	if (!pmd_young(*pmdp))
+		return 0;
+	return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pmd_val(*pmdp));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_leaf(pmd);
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -858,13 +931,6 @@  static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 	return pmd;
 }
 
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm,
-					unsigned long address, pmd_t *pmdp)
-{
-	ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
-}
-
 #define pmdp_establish pmdp_establish
 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 				unsigned long address, pmd_t *pmdp, pmd_t pmd)
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 8ffd64e7a24c..acef201b29f5 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -32,6 +32,7 @@  static inline int pud_bad(pud_t pud)		{ return 0; }
 static inline int pud_present(pud_t pud)	{ return 1; }
 static inline int pud_user(pud_t pud)		{ return 0; }
 static inline int pud_leaf(pud_t pud)		{ return 0; }
+static inline int pud_exec(pud_t pud)		{ return 0; }
 static inline void pud_clear(pud_t *pud)	{ }
 #define pmd_ERROR(pmd)				(pud_ERROR((pmd).pud))
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index af7639c3b0a3..b8d6e39fefc2 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1630,9 +1630,15 @@  typedef unsigned int pgtbl_mod_mask;
 #ifndef pud_leaf
 #define pud_leaf(x)	0
 #endif
+#ifndef pud_exec
+#define pud_exec(x)	0
+#endif
 #ifndef pmd_leaf
 #define pmd_leaf(x)	0
 #endif
+#ifndef pmd_exec
+#define pmd_exec(x)	0
+#endif
 
 #ifndef pgd_leaf_size
 #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)