@@ -266,25 +266,30 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
#ifdef CONFIG_SMP
/*
- * This both sets 'delayed_rmap', and returns true. It would be an inline
- * function, except we define it before the 'struct mmu_gather'.
+ * For configurations that support batching the rmap removal, the removal is
+ * triggered by calling tlb_flush_rmaps(), which must be called after the pte(s)
+ * are cleared and the page has been added to the mmu_gather, and before the ptl
+ * lock that was held for clearing the pte is released.
*/
-#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
+#define tlb_batch_rmap(tlb) (true)
extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
+extern void tlb_discard_rmaps(struct mmu_gather *tlb);
#endif
#endif
/*
- * We have a no-op version of the rmap removal that doesn't
- * delay anything. That is used on S390, which flushes remote
- * TLBs synchronously, and on UP, which doesn't have any
- * remote TLBs to flush and is not preemptible due to this
- * all happening under the page table lock.
+ * We have a no-op version of the rmap removal that doesn't do anything. That is
+ * used on S390, which flushes remote TLBs synchronously, and on UP, which
+ * doesn't have any remote TLBs to flush and is not preemptible due to this all
+ * happening under the page table lock. Here, the caller must manage each rmap
+ * removal separately.
*/
-#ifndef tlb_delay_rmap
-#define tlb_delay_rmap(tlb) (false)
-static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+#ifndef tlb_batch_rmap
+#define tlb_batch_rmap(tlb) (false)
+static inline void tlb_flush_rmaps(struct mmu_gather *tlb,
+ struct vm_area_struct *vma) { }
+static inline void tlb_discard_rmaps(struct mmu_gather *tlb) { }
#endif
/*
@@ -317,11 +322,6 @@ struct mmu_gather {
*/
unsigned int freed_tables : 1;
- /*
- * Do we have pending delayed rmap removals?
- */
- unsigned int delayed_rmap : 1;
-
/*
* at which levels have we cleared entries?
*/
@@ -343,6 +343,8 @@ struct mmu_gather {
struct mmu_gather_batch *active;
struct mmu_gather_batch local;
struct page *__pages[MMU_GATHER_BUNDLE];
+ struct mmu_gather_batch *rmap_pend;
+ unsigned int rmap_pend_first;
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
unsigned int page_size;
@@ -1405,6 +1405,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
swp_entry_t entry;
tlb_change_page_size(tlb, PAGE_SIZE);
+ tlb_discard_rmaps(tlb);
init_rss_vec(rss);
start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!pte)
@@ -1423,7 +1424,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
break;
if (pte_present(ptent)) {
- unsigned int delay_rmap;
+ unsigned int batch_rmap;
page = vm_normal_page(vma, addr, ptent);
if (unlikely(!should_zap_page(details, page)))
@@ -1438,12 +1439,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
}
- delay_rmap = 0;
+ batch_rmap = tlb_batch_rmap(tlb);
if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
set_page_dirty(page);
- if (tlb_delay_rmap(tlb)) {
- delay_rmap = 1;
+ if (batch_rmap) {
+ /*
+ * Ensure tlb flush happens
+ * before rmap remove.
+ */
force_flush = 1;
}
}
@@ -1451,12 +1455,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
- if (!delay_rmap) {
+ if (!batch_rmap) {
page_remove_rmap(page, vma, false);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
}
- if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
+ if (unlikely(__tlb_remove_page(tlb, page, 0))) {
force_flush = 1;
addr += PAGE_SIZE;
break;
@@ -1517,10 +1521,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
arch_leave_lazy_mmu_mode();
/* Do the actual TLB flush before dropping ptl */
- if (force_flush) {
+ if (force_flush)
tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_rmaps(tlb, vma);
- }
+
+ /* Rmap removal must always happen before dropping ptl */
+ tlb_flush_rmaps(tlb, vma);
+
pte_unmap_unlock(start_pte, ptl);
/*
@@ -19,10 +19,6 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
- /* Limit batching if we have delayed rmaps pending */
- if (tlb->delayed_rmap && tlb->active != &tlb->local)
- return false;
-
batch = tlb->active;
if (batch->next) {
tlb->active = batch->next;
@@ -48,36 +44,49 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
}
#ifdef CONFIG_SMP
-static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
+static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch,
+ unsigned int first,
+ struct vm_area_struct *vma)
{
- for (int i = 0; i < batch->nr; i++) {
+ for (int i = first; i < batch->nr; i++) {
struct encoded_page *enc = batch->encoded_pages[i];
+ struct page *page = encoded_page_ptr(enc);
- if (encoded_page_flags(enc)) {
- struct page *page = encoded_page_ptr(enc);
- page_remove_rmap(page, vma, false);
- }
+ page_remove_rmap(page, vma, false);
}
}
/**
- * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
+ * tlb_flush_rmaps - do pending rmap removals
* @tlb: the current mmu_gather
+ * @vma: vm area from which all pages are removed
*
- * Note that because of how tlb_next_batch() above works, we will
- * never start multiple new batches with pending delayed rmaps, so
- * we only need to walk through the current active batch and the
- * original local one.
+ * Removes rmap from all pages added via (e.g.) __tlb_remove_page_size() since
+ * the last call to tlb_discard_rmaps() or tlb_flush_rmaps(). All of those pages
+ * must have been mapped by vma. Must be called after the pte(s) are cleared,
+ * and before the ptl lock that was held for clearing the pte is released. Pages
+ * are accounted using the order-0 folio (or base page) scheme.
*/
void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
- if (!tlb->delayed_rmap)
- return;
+ struct mmu_gather_batch *batch = tlb->rmap_pend;
- tlb_flush_rmap_batch(&tlb->local, vma);
- if (tlb->active != &tlb->local)
- tlb_flush_rmap_batch(tlb->active, vma);
- tlb->delayed_rmap = 0;
+ tlb_flush_rmap_batch(batch, tlb->rmap_pend_first, vma);
+
+ for (batch = batch->next; batch && batch->nr; batch = batch->next)
+ tlb_flush_rmap_batch(batch, 0, vma);
+
+ tlb_discard_rmaps(tlb);
+}
+
+/**
+ * tlb_discard_rmaps - discard any pending rmap removals
+ * @tlb: the current mmu_gather
+ */
+void tlb_discard_rmaps(struct mmu_gather *tlb)
+{
+ tlb->rmap_pend = tlb->active;
+ tlb->rmap_pend_first = tlb->active->nr;
}
#endif
@@ -102,6 +111,7 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb)
} while (batch->nr);
}
tlb->active = &tlb->local;
+ tlb_discard_rmaps(tlb);
}
static void tlb_batch_list_free(struct mmu_gather *tlb)
@@ -312,8 +322,9 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
tlb->local.max = ARRAY_SIZE(tlb->__pages);
tlb->active = &tlb->local;
tlb->batch_count = 0;
+ tlb->rmap_pend = &tlb->local;
+ tlb->rmap_pend_first = 0;
#endif
- tlb->delayed_rmap = 0;
tlb_table_init(tlb);
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE