[26/31] mm/huge_memory: split huge pmd under one pte_offset_map()

Message ID 3f442a9c-af6d-573d-1ad1-f6f413b1abc9@google.com
State New
Headers
Series mm: allow pte_offset_map[_lock]() to fail |

Commit Message

Hugh Dickins May 22, 2023, 5:23 a.m. UTC
  __split_huge_zero_page_pmd() use a single pte_offset_map() to sweep the
extent: it's already under pmd_lock(), so this is no worse for latency;
and since it's supposed to have full control of the just-withdrawn page
table, here choose to VM_BUG_ON if it were to fail.  And please don't
increment haddr by PAGE_SIZE, that should remain huge aligned: declare
a separate addr (not a bugfix, but it was deceptive).

__split_huge_pmd_locked() likewise (but it had declared a separate addr);
and change its BUG_ON(!pte_none) to VM_BUG_ON, for consistency with zero
(those deposited page tables are sometimes victims of random corruption).

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/huge_memory.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
  

Comments

Yang Shi May 22, 2023, 11:35 p.m. UTC | #1
On Sun, May 21, 2023 at 10:23 PM Hugh Dickins <hughd@google.com> wrote:
>
> __split_huge_zero_page_pmd() use a single pte_offset_map() to sweep the
> extent: it's already under pmd_lock(), so this is no worse for latency;
> and since it's supposed to have full control of the just-withdrawn page
> table, here choose to VM_BUG_ON if it were to fail.  And please don't
> increment haddr by PAGE_SIZE, that should remain huge aligned: declare
> a separate addr (not a bugfix, but it was deceptive).
>
> __split_huge_pmd_locked() likewise (but it had declared a separate addr);
> and change its BUG_ON(!pte_none) to VM_BUG_ON, for consistency with zero
> (those deposited page tables are sometimes victims of random corruption).
>
> Signed-off-by: Hugh Dickins <hughd@google.com>

Reviewed-by: Yang Shi <shy828301@gmail.com>

> ---
>  mm/huge_memory.c | 28 ++++++++++++++++++----------
>  1 file changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index d4bd5fa7c823..839c13fa0bbe 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2037,6 +2037,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
>         struct mm_struct *mm = vma->vm_mm;
>         pgtable_t pgtable;
>         pmd_t _pmd, old_pmd;
> +       unsigned long addr;
> +       pte_t *pte;
>         int i;
>
>         /*
> @@ -2052,17 +2054,20 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
>         pgtable = pgtable_trans_huge_withdraw(mm, pmd);
>         pmd_populate(mm, &_pmd, pgtable);
>
> -       for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
> -               pte_t *pte, entry;
> -               entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
> +       pte = pte_offset_map(&_pmd, haddr);
> +       VM_BUG_ON(!pte);
> +       for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
> +               pte_t entry;
> +
> +               entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot);
>                 entry = pte_mkspecial(entry);
>                 if (pmd_uffd_wp(old_pmd))
>                         entry = pte_mkuffd_wp(entry);
> -               pte = pte_offset_map(&_pmd, haddr);
>                 VM_BUG_ON(!pte_none(*pte));
> -               set_pte_at(mm, haddr, pte, entry);
> -               pte_unmap(pte);
> +               set_pte_at(mm, addr, pte, entry);
> +               pte++;
>         }
> +       pte_unmap(pte - 1);
>         smp_wmb(); /* make pte visible before pmd */
>         pmd_populate(mm, pmd, pgtable);
>  }
> @@ -2077,6 +2082,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>         bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
>         bool anon_exclusive = false, dirty = false;
>         unsigned long addr;
> +       pte_t *pte;
>         int i;
>
>         VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
> @@ -2205,8 +2211,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>         pgtable = pgtable_trans_huge_withdraw(mm, pmd);
>         pmd_populate(mm, &_pmd, pgtable);
>
> +       pte = pte_offset_map(&_pmd, haddr);
> +       VM_BUG_ON(!pte);
>         for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
> -               pte_t entry, *pte;
> +               pte_t entry;
>                 /*
>                  * Note that NUMA hinting access restrictions are not
>                  * transferred to avoid any possibility of altering
> @@ -2249,11 +2257,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>                                 entry = pte_mkuffd_wp(entry);
>                         page_add_anon_rmap(page + i, vma, addr, false);
>                 }
> -               pte = pte_offset_map(&_pmd, addr);
> -               BUG_ON(!pte_none(*pte));
> +               VM_BUG_ON(!pte_none(*pte));
>                 set_pte_at(mm, addr, pte, entry);
> -               pte_unmap(pte);
> +               pte++;
>         }
> +       pte_unmap(pte - 1);
>
>         if (!pmd_migration)
>                 page_remove_rmap(page, vma, true);
> --
> 2.35.3
>
  

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d4bd5fa7c823..839c13fa0bbe 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2037,6 +2037,8 @@  static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	pgtable_t pgtable;
 	pmd_t _pmd, old_pmd;
+	unsigned long addr;
+	pte_t *pte;
 	int i;
 
 	/*
@@ -2052,17 +2054,20 @@  static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
 
-	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
-		pte_t *pte, entry;
-		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+	pte = pte_offset_map(&_pmd, haddr);
+	VM_BUG_ON(!pte);
+	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+		pte_t entry;
+
+		entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot);
 		entry = pte_mkspecial(entry);
 		if (pmd_uffd_wp(old_pmd))
 			entry = pte_mkuffd_wp(entry);
-		pte = pte_offset_map(&_pmd, haddr);
 		VM_BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, haddr, pte, entry);
-		pte_unmap(pte);
+		set_pte_at(mm, addr, pte, entry);
+		pte++;
 	}
+	pte_unmap(pte - 1);
 	smp_wmb(); /* make pte visible before pmd */
 	pmd_populate(mm, pmd, pgtable);
 }
@@ -2077,6 +2082,7 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
 	bool anon_exclusive = false, dirty = false;
 	unsigned long addr;
+	pte_t *pte;
 	int i;
 
 	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2205,8 +2211,10 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
 
+	pte = pte_offset_map(&_pmd, haddr);
+	VM_BUG_ON(!pte);
 	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
-		pte_t entry, *pte;
+		pte_t entry;
 		/*
 		 * Note that NUMA hinting access restrictions are not
 		 * transferred to avoid any possibility of altering
@@ -2249,11 +2257,11 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pte_mkuffd_wp(entry);
 			page_add_anon_rmap(page + i, vma, addr, false);
 		}
-		pte = pte_offset_map(&_pmd, addr);
-		BUG_ON(!pte_none(*pte));
+		VM_BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
-		pte_unmap(pte);
+		pte++;
 	}
+	pte_unmap(pte - 1);
 
 	if (!pmd_migration)
 		page_remove_rmap(page, vma, true);