diff mbox series

[v1] mm: remove total_mapcount()

Message ID	20240222160943.622386-1-david@redhat.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel+bounces-76859-ouuuleilei=gmail.com@vger.kernel.org designates 147.75.80.249 as permitted sender) client-ip=147.75.80.249; From: David Hildenbrand <david@redhat.com> To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand <david@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, Matthew Wilcox <willy@infradead.org> Subject: [PATCH v1] mm: remove total_mapcount() Date: Thu, 22 Feb 2024 17:09:43 +0100 Message-ID: <20240222160943.622386-1-david@redhat.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[v1] mm: remove total_mapcount() \| [v1] mm: remove total_mapcount()

Commit Message

David Hildenbrand Feb. 22, 2024, 4:09 p.m. UTC

  mm/memfd.c is that last remaining user of total_mapcount().  Let's
convert memfd_tag_pins() and memfd_wait_for_pins() to use folios
instead of pages, so we can remove total_mapcount() for good.

We always get a head page, so we can just naturally interpret is as a folio
(similar to other code).

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---

Did a quick test with write-sealing a memfd backed by THP. Seems to work
as it used to.

---
 include/linux/mm.h |  9 +--------
 mm/memfd.c         | 34 ++++++++++++++++++----------------
 2 files changed, 19 insertions(+), 24 deletions(-)

Comments

Matthew Wilcox Feb. 22, 2024, 5:13 p.m. UTC | #1

On Thu, Feb 22, 2024 at 05:09:43PM +0100, David Hildenbrand wrote:
> We always get a head page, so we can just naturally interpret is as a folio
> (similar to other code).

memfd seems rather confused about how to iterate over the page cache.
Perhaps we could sort that out and then delete total_mapcount as a
second patch?

I haven't tested this at all, but ...

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

diff --git a/mm/memfd.c b/mm/memfd.c
index d3a1ba4208c9..45e55b0e3cbe 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -29,28 +29,29 @@
 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
 #define LAST_SCAN               4       /* about 150ms max */
 
+static bool memfd_extra_refs(struct folio *folio)
+{
+	return folio_ref_count(folio) - folio_mapcount(folio) !=
+		folio_nr_pages(folio);
+}
+
 static void memfd_tag_pins(struct xa_state *xas)
 {
-	struct page *page;
+	struct folio *folio;
 	int latency = 0;
-	int cache_count;
 
 	lru_add_drain();
 
 	xas_lock_irq(xas);
-	xas_for_each(xas, page, ULONG_MAX) {
-		cache_count = 1;
-		if (!xa_is_value(page) &&
-		    PageTransHuge(page) && !PageHuge(page))
-			cache_count = HPAGE_PMD_NR;
-
-		if (!xa_is_value(page) &&
-		    page_count(page) - total_mapcount(page) != cache_count)
+	xas_for_each(xas, folio, ULONG_MAX) {
+		/* Can we have shadow/swap entries in memfd? */
+		if (xa_is_value(folio))
+			continue;
+
+		if (memfd_extra_refs(folio))
 			xas_set_mark(xas, MEMFD_TAG_PINNED);
-		if (cache_count != 1)
-			xas_set(xas, page->index + cache_count);
 
-		latency += cache_count;
+		latency++;
 		if (latency < XA_CHECK_SCHED)
 			continue;
 		latency = 0;
@@ -75,7 +76,6 @@ static void memfd_tag_pins(struct xa_state *xas)
 static int memfd_wait_for_pins(struct address_space *mapping)
 {
 	XA_STATE(xas, &mapping->i_pages, 0);
-	struct page *page;
 	int error, scan;
 
 	memfd_tag_pins(&xas);
@@ -83,7 +83,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 	error = 0;
 	for (scan = 0; scan <= LAST_SCAN; scan++) {
 		int latency = 0;
-		int cache_count;
+		struct folio *folio;
 
 		if (!xas_marked(&xas, MEMFD_TAG_PINNED))
 			break;
@@ -95,16 +95,10 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 
 		xas_set(&xas, 0);
 		xas_lock_irq(&xas);
-		xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
+		xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) {
 			bool clear = true;
 
-			cache_count = 1;
-			if (!xa_is_value(page) &&
-			    PageTransHuge(page) && !PageHuge(page))
-				cache_count = HPAGE_PMD_NR;
-
-			if (!xa_is_value(page) && cache_count !=
-			    page_count(page) - total_mapcount(page)) {
+			if (memfd_extra_refs(folio)) {
 				/*
 				 * On the last scan, we clean up all those tags
 				 * we inserted; but make a note that we still
@@ -118,8 +112,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 			if (clear)
 				xas_clear_mark(&xas, MEMFD_TAG_PINNED);
 
-			latency += cache_count;
-			if (latency < XA_CHECK_SCHED)
+			if (++latency < XA_CHECK_SCHED)
 				continue;
 			latency = 0;

David Hildenbrand Feb. 22, 2024, 5:16 p.m. UTC | #2

On 22.02.24 18:13, Matthew Wilcox wrote:
> On Thu, Feb 22, 2024 at 05:09:43PM +0100, David Hildenbrand wrote:
>> We always get a head page, so we can just naturally interpret is as a folio
>> (similar to other code).
> 
> memfd seems rather confused about how to iterate over the page cache.
> Perhaps we could sort that out and then delete total_mapcount as a
> second patch?
> 
> I haven't tested this at all, but ...
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> 
> diff --git a/mm/memfd.c b/mm/memfd.c
> index d3a1ba4208c9..45e55b0e3cbe 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -29,28 +29,29 @@
>   #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
>   #define LAST_SCAN               4       /* about 150ms max */
>   
> +static bool memfd_extra_refs(struct folio *folio)
> +{
> +	return folio_ref_count(folio) - folio_mapcount(folio) !=
> +		folio_nr_pages(folio);
> +}

That is an obvious improvement I should have realized myself.

Let me play with that.

Thanks!

diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6f4825d82965..49e22a2f6ccc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1183,7 +1183,7 @@  static inline int is_vmalloc_or_module_addr(const void *x)
  * How many times the entire folio is mapped as a single unit (eg by a
  * PMD or PUD entry).  This is probably not what you want, except for
  * debugging purposes - it does not include PTE-mapped sub-pages; look
- * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
+ * at folio_mapcount() or page_mapcount() instead.
  */
 static inline int folio_entire_mapcount(struct folio *folio)
 {
@@ -1243,13 +1243,6 @@  static inline int folio_mapcount(struct folio *folio)
 	return folio_total_mapcount(folio);
 }
 
-static inline int total_mapcount(struct page *page)
-{
-	if (likely(!PageCompound(page)))
-		return atomic_read(&page->_mapcount) + 1;
-	return folio_total_mapcount(page_folio(page));
-}
-
 static inline bool folio_large_is_mapped(struct folio *folio)
 {
 	/*
diff --git a/mm/memfd.c b/mm/memfd.c
index d3a1ba4208c9..0a6c1a6ee03b 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -31,24 +31,25 @@ 
 
 static void memfd_tag_pins(struct xa_state *xas)
 {
-	struct page *page;
+	struct folio *folio;
 	int latency = 0;
 	int cache_count;
 
 	lru_add_drain();
 
 	xas_lock_irq(xas);
-	xas_for_each(xas, page, ULONG_MAX) {
+	xas_for_each(xas, folio, ULONG_MAX) {
 		cache_count = 1;
-		if (!xa_is_value(page) &&
-		    PageTransHuge(page) && !PageHuge(page))
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+		    !xa_is_value(folio) && folio_test_large(folio) &&
+		    !folio_test_hugetlb(folio))
 			cache_count = HPAGE_PMD_NR;
 
-		if (!xa_is_value(page) &&
-		    page_count(page) - total_mapcount(page) != cache_count)
+		if (!xa_is_value(folio) && cache_count !=
+		    folio_ref_count(folio) - folio_mapcount(folio))
 			xas_set_mark(xas, MEMFD_TAG_PINNED);
 		if (cache_count != 1)
-			xas_set(xas, page->index + cache_count);
+			xas_set(xas, folio->index + cache_count);
 
 		latency += cache_count;
 		if (latency < XA_CHECK_SCHED)
@@ -66,16 +67,16 @@  static void memfd_tag_pins(struct xa_state *xas)
 /*
  * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
  * via get_user_pages(), drivers might have some pending I/O without any active
- * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
+ * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all folios
  * and see whether it has an elevated ref-count. If so, we tag them and wait for
  * them to be dropped.
  * The caller must guarantee that no new user will acquire writable references
- * to those pages to avoid races.
+ * to those folios to avoid races.
  */
 static int memfd_wait_for_pins(struct address_space *mapping)
 {
 	XA_STATE(xas, &mapping->i_pages, 0);
-	struct page *page;
+	struct folio *folio;
 	int error, scan;
 
 	memfd_tag_pins(&xas);
@@ -95,20 +96,21 @@  static int memfd_wait_for_pins(struct address_space *mapping)
 
 		xas_set(&xas, 0);
 		xas_lock_irq(&xas);
-		xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
+		xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) {
 			bool clear = true;
 
 			cache_count = 1;
-			if (!xa_is_value(page) &&
-			    PageTransHuge(page) && !PageHuge(page))
+			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+			    !xa_is_value(folio) && folio_test_large(folio) &&
+			    !folio_test_hugetlb(folio))
 				cache_count = HPAGE_PMD_NR;
 
-			if (!xa_is_value(page) && cache_count !=
-			    page_count(page) - total_mapcount(page)) {
+			if (!xa_is_value(folio) && cache_count !=
+			    folio_ref_count(folio) - folio_mapcount(folio)) {
 				/*
 				 * On the last scan, we clean up all those tags
 				 * we inserted; but make a note that we still
-				 * found pages pinned.
+				 * found folios pinned.
 				 */
 				if (scan == LAST_SCAN)
 					error = -EBUSY;