@@ -305,6 +305,64 @@ void munlock_folio(struct folio *folio)
local_unlock(&mlock_fbatch.lock);
}
+void mlock_folio_range(struct folio *folio, struct vm_area_struct *vma,
+ pte_t *pte, unsigned long addr, unsigned int nr)
+{
+ struct folio *cow_folio;
+ unsigned int step = 1;
+
+ mlock_folio(folio);
+ if (nr == 1)
+ return;
+
+ for (; nr > 0; pte += step, addr += (step << PAGE_SHIFT), nr -= step) {
+ pte_t ptent;
+
+ step = 1;
+ ptent = ptep_get(pte);
+
+ if (!pte_present(ptent))
+ continue;
+
+ cow_folio = vm_normal_folio(vma, addr, ptent);
+ if (!cow_folio || cow_folio == folio) {
+ continue;
+ }
+
+ mlock_folio(cow_folio);
+ step = min_t(unsigned int, nr, folio_nr_pages(cow_folio));
+ }
+}
+
+void munlock_folio_range(struct folio *folio, struct vm_area_struct *vma,
+ pte_t *pte, unsigned long addr, unsigned int nr)
+{
+ struct folio *cow_folio;
+ unsigned int step = 1;
+
+ munlock_folio(folio);
+ if (nr == 1)
+ return;
+
+ for (; nr > 0; pte += step, addr += (step << PAGE_SHIFT), nr -= step) {
+ pte_t ptent;
+
+ step = 1;
+ ptent = ptep_get(pte);
+
+ if (!pte_present(ptent))
+ continue;
+
+ cow_folio = vm_normal_folio(vma, addr, ptent);
+ if (!cow_folio || cow_folio == folio) {
+ continue;
+ }
+
+ munlock_folio(cow_folio);
+ step = min_t(unsigned int, nr, folio_nr_pages(cow_folio));
+ }
+}
+
static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
@@ -314,6 +372,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
pte_t *start_pte, *pte;
pte_t ptent;
struct folio *folio;
+ unsigned int step = 1, nr;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
@@ -329,24 +388,52 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
}
- start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ pte = start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (!start_pte) {
walk->action = ACTION_AGAIN;
return 0;
}
- for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
+
+ for (; addr != end; pte += step, addr += (step << PAGE_SHIFT)) {
+ step = 1;
ptent = ptep_get(pte);
if (!pte_present(ptent))
continue;
folio = vm_normal_folio(vma, addr, ptent);
if (!folio || folio_is_zone_device(folio))
continue;
- if (folio_test_large(folio))
- continue;
- if (vma->vm_flags & VM_LOCKED)
- mlock_folio(folio);
- else
- munlock_folio(folio);
+
+ folio_get(folio);
+ nr = folio_nr_pages(folio) + folio_pfn(folio) - pte_pfn(ptent);
+ nr = min_t(unsigned int, nr, (end - addr) >> PAGE_SHIFT);
+
+ if (vma->vm_flags & VM_LOCKED) {
+ /*
+ * Only mlock the 4K folio or large folio
+ * in VMA range
+ */
+ if (folio_test_large(folio) &&
+ !folio_in_range(folio, vma,
+ vma->vm_start, vma->vm_end)) {
+ folio_put(folio);
+ continue;
+ }
+ mlock_folio_range(folio, vma, pte, addr, nr);
+ } else {
+ /*
+ * Allow munlock large folio which is partially mapped
+ * to VMA. As it's possible that large folio is mlocked
+ * and VMA is split later.
+ *
+ * During memory pressure, such kind of large folio can
+ * be split. And the pages are not in VM_LOCKed VMA
+ * can be reclaimed.
+ */
+ munlock_folio_range(folio, vma, pte, addr, nr);
+ }
+
+ step = nr;
+ folio_put(folio);
}
pte_unmap(start_pte);
out: