@@ -106,6 +106,17 @@ enum napot_cont_order {
#define napot_cont_mask(order) (~(napot_cont_size(order) - 1UL))
#define napot_pte_num(order) BIT(order)
+static inline bool is_napot_order(unsigned int order)
+{
+ unsigned int napot_order;
+
+ for_each_napot_order(napot_order)
+ if (order == napot_order)
+ return true;
+
+ return false;
+}
+
#ifdef CONFIG_RISCV_ISA_SVNAPOT
#define HUGE_MAX_HSTATE (2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE))
#else
@@ -303,6 +303,8 @@ static inline unsigned long pte_napot(pte_t pte)
return pte_val(pte) & _PAGE_NAPOT;
}
+#define pte_valid_napot(pte) (pte_present(pte) && pte_napot(pte))
+
static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
{
int pos = order - 1 + _PAGE_PFN_SHIFT;
@@ -312,6 +314,12 @@ static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT);
}
+/* pte at entry must *not* encode the mapping size in the pfn LSBs. */
+static inline pte_t pte_clear_napot(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_NAPOT);
+}
+
#else
static __always_inline bool has_svnapot(void) { return false; }
@@ -321,17 +329,14 @@ static inline unsigned long pte_napot(pte_t pte)
return 0;
}
+#define pte_valid_napot(pte) false
+
#endif /* CONFIG_RISCV_ISA_SVNAPOT */
/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn(pte_t pte)
{
- unsigned long res = __page_val_to_pfn(pte_val(pte));
-
- if (has_svnapot() && pte_napot(pte))
- res = res & (res - 1UL);
-
- return res;
+ return __page_val_to_pfn(pte_val(pte));
}
#define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -523,9 +528,91 @@ static inline void __set_pte_at(pte_t *ptep, pte_t pteval)
set_pte(ptep, pteval);
}
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+static inline int arch_contpte_get_num_contig(pte_t *ptep, unsigned long size,
+ size_t *pgsize)
+{
+ pte_t __pte;
+
+ /* We must read the raw value of the pte to get the size of the mapping */
+ __pte = READ_ONCE(*ptep);
+
+ if (pgsize) {
+ if (size >= PGDIR_SIZE)
+ *pgsize = PGDIR_SIZE;
+ else if (size >= P4D_SIZE)
+ *pgsize = P4D_SIZE;
+ else if (size >= PUD_SIZE)
+ *pgsize = PUD_SIZE;
+ else if (size >= PMD_SIZE)
+ *pgsize = PMD_SIZE;
+ else
+ *pgsize = PAGE_SIZE;
+ }
+
+ /* Make sure __pte is not a swap entry */
+ if (pte_valid_napot(__pte))
+ return napot_pte_num(napot_cont_order(__pte));
+
+ return 1;
+}
+#endif
+
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ pte_t pte = READ_ONCE(*ptep);
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+ /*
+ * The pte we load has the N bit set and the size of the mapping in
+ * the pfn LSBs: keep the N bit and replace the mapping size with
+ * the *real* pfn since the core mm code expects to find it there.
+ * The mapping size will be reset just before being written to the
+ * page table in set_ptes().
+ */
+ if (unlikely(pte_valid_napot(pte))) {
+ unsigned int order = napot_cont_order(pte);
+ int pos = order - 1 + _PAGE_PFN_SHIFT;
+ unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT);
+ pte_t *orig_ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * napot_pte_num(order));
+
+ pte = __pte((pte_val(pte) & napot_mask) + ((ptep - orig_ptep) << _PAGE_PFN_SHIFT));
+ }
+#endif
+
+ return pte;
+}
+#define ptep_get ptep_get
+
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval, unsigned int nr)
{
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+ if (unlikely(pte_valid_napot(pteval))) {
+ unsigned int order = ilog2(nr);
+
+ if (!is_napot_order(order)) {
+ /*
+ * Something's weird, we are given a NAPOT pte but the
+ * size of the mapping is not a known NAPOT mapping
+ * size, so clear the NAPOT bit and map this without
+ * NAPOT support: core mm only manipulates pte with the
+ * real pfn so we know the pte is valid without the N
+ * bit.
+ */
+ pr_err("Incorrect NAPOT mapping, resetting.\n");
+ pteval = pte_clear_napot(pteval);
+ } else {
+ /*
+ * NAPOT ptes that arrive here only have the N bit set
+ * and their pfn does not contain the mapping size, so
+ * set that here.
+ */
+ pteval = pte_mknapot(pteval, order);
+ }
+ }
+#endif
+
page_table_check_ptes_set(mm, ptep, pteval, nr);
for (;;) {
@@ -533,6 +620,12 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
if (--nr == 0)
break;
ptep++;
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+ if (unlikely(pte_valid_napot(pteval)))
+ continue;
+#endif
+
pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
}
}
@@ -256,8 +256,7 @@ void set_huge_pte_at(struct mm_struct *mm,
clear_flush(mm, addr, ptep, pgsize, pte_num);
- for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
- set_pte_at(mm, addr, ptep, pte);
+ set_ptes(mm, addr, ptep, pte, pte_num);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -267,16 +266,16 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int dirty)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long order;
+ size_t pgsize;
pte_t orig_pte;
- int i, pte_num;
+ int pte_num;
if (!pte_napot(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
- ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, &pgsize);
+ ptep = huge_pte_offset(mm, addr, pte_num * pgsize);
+
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
if (pte_dirty(orig_pte))
@@ -285,8 +284,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
if (pte_young(orig_pte))
pte = pte_mkyoung(pte);
- for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
- set_pte_at(mm, addr, ptep, pte);
+ set_ptes(mm, addr, ptep, pte, pte_num);
return true;
}
@@ -301,7 +299,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
if (!pte_napot(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
- pte_num = napot_pte_num(napot_cont_order(orig_pte));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
return get_clear_contig(mm, addr, ptep, pte_num);
}
@@ -311,24 +309,23 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
pte_t *ptep)
{
pte_t pte = ptep_get(ptep);
- unsigned long order;
+ size_t pgsize;
pte_t orig_pte;
- int i, pte_num;
+ int pte_num;
if (!pte_napot(pte)) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
- ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, &pgsize);
+ ptep = huge_pte_offset(mm, addr, pte_num * pgsize);
+
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
orig_pte = pte_wrprotect(orig_pte);
- for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
- set_pte_at(mm, addr, ptep, orig_pte);
+ set_ptes(mm, addr, ptep, orig_pte, pte_num);
}
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
@@ -341,7 +338,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
if (!pte_napot(pte))
return ptep_clear_flush(vma, addr, ptep);
- pte_num = napot_pte_num(napot_cont_order(pte));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
}
@@ -351,6 +348,7 @@ void huge_pte_clear(struct mm_struct *mm,
pte_t *ptep,
unsigned long sz)
{
+ size_t pgsize;
pte_t pte = ptep_get(ptep);
int i, pte_num;
@@ -359,8 +357,8 @@ void huge_pte_clear(struct mm_struct *mm,
return;
}
- pte_num = napot_pte_num(napot_cont_order(pte));
- for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+ pte_num = arch_contpte_get_num_contig(ptep, 0, &pgsize);
+ for (i = 0; i < pte_num; i++, addr += pgsize, ptep++)
pte_clear(mm, addr, ptep);
}