[v8,3/3] arm64: mte: add compression support to mteswap.c

Message ID 20231103160335.2464561-4-glider@google.com
State New
Headers
Series Implement MTE tag compression for swapped pages |

Commit Message

Alexander Potapenko Nov. 3, 2023, 4:03 p.m. UTC
  Update mteswap.c to perform inline compression of memory tags when
possible.

If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress
saved tags for a struct page and store them directly in Xarray entry
instead of wasting heap space.

Soon after booting Android, tag compression saves ~2x memory previously
spent by mteswap.c on tag allocations. On a moderately loaded device with
~20% tagged pages, this leads to saving several megabytes of kernel heap:

  # cat /sys/kernel/debug/mteswap/stats
  8 bytes: 102496 allocations, 67302 deallocations
  128 bytes: 212234 allocations, 178278 deallocations
  uncompressed tag storage size: 8851200
  compressed tag storage size: 4346368

Signed-off-by: Alexander Potapenko <glider@google.com>

---
 v8:
  - adapt to the new compression API, abandon mteswap_{no,}comp.c
  - move stats collection to mteswap.c

 v5:
  - drop a dead variable from _mte_free_saved_tags() in mteswap_comp.c
  - ensure MTE compression works with arbitrary page sizes
  - update patch description

 v4:
  - minor code simplifications suggested by Andy Shevchenko, added
    missing header dependencies
  - changed compression API names to reflect modifications made to
    memcomp.h (as suggested by Yury Norov)

 v3:
  - Addressed comments by Andy Shevchenko in another patch:
   - fixed includes order
   - replaced u64 with unsigned long
   - added MODULE_IMPORT_NS(MTECOMP)
---
 arch/arm64/mm/mteswap.c | 88 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 4 deletions(-)
  

Comments

Yury Norov Nov. 3, 2023, 10:05 p.m. UTC | #1
On Fri, Nov 03, 2023 at 05:03:35PM +0100, Alexander Potapenko wrote:
> Update mteswap.c to perform inline compression of memory tags when
> possible.
> 
> If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress

attempt?

> saved tags for a struct page and store them directly in Xarray entry
> instead of wasting heap space.
> 
> Soon after booting Android, tag compression saves ~2x memory previously
> spent by mteswap.c on tag allocations. On a moderately loaded device with
> ~20% tagged pages, this leads to saving several megabytes of kernel heap:
> 
>   # cat /sys/kernel/debug/mteswap/stats
>   8 bytes: 102496 allocations, 67302 deallocations
>   128 bytes: 212234 allocations, 178278 deallocations
>   uncompressed tag storage size: 8851200
>   compressed tag storage size: 4346368

Can you align them like this:

   # cat /sys/kernel/debug/mteswap/stats
   8 bytes:      102496 allocations,    67302 deallocations
   128 bytes:    212234 allocations,   178278 deallocations
   uncompressed tag storage size:    8851200
   compressed tag storage size:      4346368

And also, can you mention a new file in the documentation?

IIRC, it was my suggestion to measure some stats... If so, can you add:

Suggested-by: Yury Norov <yury.norov@gmail.com> # for stats

> Signed-off-by: Alexander Potapenko <glider@google.com>
> 
> ---
>  v8:
>   - adapt to the new compression API, abandon mteswap_{no,}comp.c
>   - move stats collection to mteswap.c
> 
>  v5:
>   - drop a dead variable from _mte_free_saved_tags() in mteswap_comp.c
>   - ensure MTE compression works with arbitrary page sizes
>   - update patch description
> 
>  v4:
>   - minor code simplifications suggested by Andy Shevchenko, added
>     missing header dependencies
>   - changed compression API names to reflect modifications made to
>     memcomp.h (as suggested by Yury Norov)
> 
>  v3:
>   - Addressed comments by Andy Shevchenko in another patch:
>    - fixed includes order
>    - replaced u64 with unsigned long
>    - added MODULE_IMPORT_NS(MTECOMP)
> ---
>  arch/arm64/mm/mteswap.c | 88 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 84 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index a31833e3ddc54..0f558942d88b8 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -1,28 +1,48 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  
> +#include <linux/debugfs.h>
>  #include <linux/pagemap.h>
>  #include <linux/xarray.h>
>  #include <linux/slab.h>
>  #include <linux/swap.h>
>  #include <linux/swapops.h>
>  #include <asm/mte.h>
> +#include <asm/mtecomp.h>
> +#include "mtecomp.h"
> +
> +enum mteswap_counters {
> +	MTESWAP_CTR_INLINE = 0,
> +	MTESWAP_CTR_NOINLINE,
> +	MTESWAP_CTR_SIZE
> +};
> +static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
> +static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];

I think it's worth to add them and all the book keeping code in
a separate patch? Also can you consider making them configurable,
maybe depending on CONFIG_ARM64_MTE_SWAP_STAT?..

>  static DEFINE_XARRAY(mte_pages);
>  
>  void *mte_allocate_tag_storage(void)
>  {
> -	/* tags granule is 16 bytes, 2 tags stored per byte */
> -	return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> +	void *ret;
> +
> +	ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> +	if (ret)
> +		atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
> +	return ret;
>  }
>  
>  void mte_free_tag_storage(char *storage)

If you use a term 'free' here, the counter name should probably
reflect that.

>  {
> -	kfree(storage);
> +	if (!mte_is_compressed(storage)) {
> +		kfree(storage);
> +		atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);

s/NOINLINE/OUTLINE ?

> +	} else {
> +		atomic_long_dec(&alloc_counters[MTESWAP_CTR_INLINE]);
> +	}
>  }
>  
>  int mte_save_tags(struct page *page)
>  {
> -	void *tag_storage, *ret;
> +	void *tag_storage, *ret, *compressed;
>  
>  	if (!page_mte_tagged(page))
>  		return 0;
> @@ -32,6 +52,12 @@ int mte_save_tags(struct page *page)
>  		return -ENOMEM;
>  
>  	mte_save_page_tags(page_address(page), tag_storage);
> +	compressed = mte_compress(tag_storage);
> +	if (compressed) {
> +		mte_free_tag_storage(tag_storage);
> +		tag_storage = (void *)compressed;

But 'compressed' is already 'void *', what for typecasting?
Also, it's a bad naming - adjective implies bool type. I'd name it
'compressed_tag', or similar.

> +		atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
> +	}

Is it possible to move all this conditional inside the function call?
I feel like it should be a single-line:

	tag_storage = mte_compress(tag_storage);

So that client code will be free of housekeeping details.

>  
>  	/* lookup the swap entry.val from the page */
>  	ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> @@ -50,13 +76,20 @@ int mte_save_tags(struct page *page)
>  void mte_restore_tags(swp_entry_t entry, struct page *page)
>  {
>  	void *tags = xa_load(&mte_pages, entry.val);
> +	void *tag_storage = NULL;
>  
>  	if (!tags)
>  		return;
>  
>  	if (try_page_mte_tagging(page)) {
> +		if (mte_is_compressed(tags)) {
> +			tag_storage = mte_allocate_tag_storage();
> +			mte_decompress(tags, tag_storage);
> +			tags = tag_storage;
> +		}

Same here, if it's possible, I'd prefer a single line call instead of
the above chunk:
       
        tags = mte_decompress(tags);
  	if (!tags)
  		return;

And make sure that the decompress code will take care of details.

This would have a clear meaning: we add a compression/decompression
option which is as simple as calling corresponding functions in
mte_save/restore_tags(); and the rest of mte code is untouched

Thanks,
Yury

>  		mte_restore_page_tags(page_address(page), tags);
>  		set_page_mte_tagged(page);
> +		mte_free_tag_storage(tag_storage);
>  	}
>  }
>  
> @@ -83,3 +116,50 @@ void mte_invalidate_tags_area(int type)
>  	}
>  	xa_unlock(&mte_pages);
>  }
> +
> +/* DebugFS interface. */
> +static int stats_show(struct seq_file *seq, void *v)
> +{
> +	unsigned long total_mem_alloc = 0, total_mem_dealloc = 0;
> +	unsigned long total_num_alloc = 0, total_num_dealloc = 0;
> +	unsigned long sizes[2] = { 8, MTE_PAGE_TAG_STORAGE };
> +	long alloc, dealloc;
> +	unsigned long size;
> +	int i;
> +
> +	for (i = 0; i < MTESWAP_CTR_SIZE; i++) {
> +		alloc = atomic_long_read(&alloc_counters[i]);
> +		dealloc = atomic_long_read(&dealloc_counters[i]);
> +		total_num_alloc += alloc;
> +		total_num_dealloc += dealloc;
> +		size = sizes[i];
> +		/*
> +		 * Do not count 8-byte buffers towards compressed tag storage
> +		 * size.
> +		 */
> +		if (i) {
> +			total_mem_alloc += (size * alloc);
> +			total_mem_dealloc += (size * dealloc);
> +		}
> +		seq_printf(seq,
> +			   "%lu bytes: %lu allocations, %lu deallocations\n",
> +			   size, alloc, dealloc);
> +	}
> +	seq_printf(seq, "uncompressed tag storage size: %lu\n",
> +		   (total_num_alloc - total_num_dealloc) *
> +			   MTE_PAGE_TAG_STORAGE);
> +	seq_printf(seq, "compressed tag storage size: %lu\n",
> +		   total_mem_alloc - total_mem_dealloc);
> +	return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(stats);
> +
> +static int mteswap_init(void)
> +{
> +	struct dentry *mteswap_dir;
> +
> +	mteswap_dir = debugfs_create_dir("mteswap", NULL);
> +	debugfs_create_file("stats", 0444, mteswap_dir, NULL, &stats_fops);
> +	return 0;
> +}
> +module_init(mteswap_init);
> -- 
> 2.42.0.869.gea05f2083d-goog
  
Alexander Potapenko Nov. 8, 2023, 1:53 p.m. UTC | #2
> >
> > If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress
>
> attempt?

Good catch!

> >
> >   # cat /sys/kernel/debug/mteswap/stats
> >   8 bytes: 102496 allocations, 67302 deallocations
> >   128 bytes: 212234 allocations, 178278 deallocations
> >   uncompressed tag storage size: 8851200
> >   compressed tag storage size: 4346368
>
> Can you align them like this:
>
>    # cat /sys/kernel/debug/mteswap/stats
>    8 bytes:      102496 allocations,    67302 deallocations
>    128 bytes:    212234 allocations,   178278 deallocations
>    uncompressed tag storage size:    8851200
>    compressed tag storage size:      4346368

Ok, will do in v9.

> And also, can you mention a new file in the documentation?

Sure.

> IIRC, it was my suggestion to measure some stats... If so, can you add:
>
> Suggested-by: Yury Norov <yury.norov@gmail.com> # for stats

Will do.


> > +static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
> > +static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];
>
> I think it's worth to add them and all the book keeping code in
> a separate patch? Also can you consider making them configurable,
> maybe depending on CONFIG_ARM64_MTE_SWAP_STAT?..

Sounds fine, I'll split this code into a separate patch.

> >  static DEFINE_XARRAY(mte_pages);
> >
> >  void *mte_allocate_tag_storage(void)
> >  {
> > -     /* tags granule is 16 bytes, 2 tags stored per byte */
> > -     return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> > +     void *ret;
> > +
> > +     ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> > +     if (ret)
> > +             atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
> > +     return ret;
> >  }
> >
> >  void mte_free_tag_storage(char *storage)
>
> If you use a term 'free' here, the counter name should probably
> reflect that.

I still want to keep the terms "allocation/deallocation" in the
user-facing code (in the /sys/ file) though.
So renaming the counter here will cause a mismatch between its name
and the stats output.

>
> >  {
> > -     kfree(storage);
> > +     if (!mte_is_compressed(storage)) {
> > +             kfree(storage);
> > +             atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);
>
> s/NOINLINE/OUTLINE ?

Done.

> >       mte_save_page_tags(page_address(page), tag_storage);
> > +     compressed = mte_compress(tag_storage);
> > +     if (compressed) {
> > +             mte_free_tag_storage(tag_storage);
> > +             tag_storage = (void *)compressed;
>
> But 'compressed' is already 'void *', what for typecasting?

Yeah, it used to be unsigned long or something like that. Removed the cast.

> Also, it's a bad naming - adjective implies bool type. I'd name it
> 'compressed_tag', or similar.
>
> > +             atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
> > +     }
>
> Is it possible to move all this conditional inside the function call?
> I feel like it should be a single-line:
>
>         tag_storage = mte_compress(tag_storage);
>
> So that client code will be free of housekeeping details.

The problem is that this assignment destroys the current value of
tag_storage, which means mte_compress() will have to deal with memory
deallocation.
That, in turn, will introduce an unnecessary dependency between
mtecomp.c and mteswap.c (so that the former can call
mte_free_tag_storage()).
Given that we still have to deal with allocations in mteswap.c, I
don't think it's worth the hassle.

>
> >
> >       /* lookup the swap entry.val from the page */
> >       ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> > @@ -50,13 +76,20 @@ int mte_save_tags(struct page *page)
> >  void mte_restore_tags(swp_entry_t entry, struct page *page)
> >  {
> >       void *tags = xa_load(&mte_pages, entry.val);
> > +     void *tag_storage = NULL;
> >
> >       if (!tags)
> >               return;
> >
> >       if (try_page_mte_tagging(page)) {
> > +             if (mte_is_compressed(tags)) {
> > +                     tag_storage = mte_allocate_tag_storage();
> > +                     mte_decompress(tags, tag_storage);
> > +                     tags = tag_storage;
> > +             }
>
> Same here, if it's possible, I'd prefer a single line call instead of
> the above chunk:
>
>         tags = mte_decompress(tags);
>         if (!tags)
>                 return;

Same here, we'd have to make mte_decompress() allocate/deallocate
memory in a way consistent with mteswap.c allocations.
  
Alexander Potapenko Nov. 13, 2023, 10:45 a.m. UTC | #3
> > >
> > >  void mte_free_tag_storage(char *storage)
> >
> > If you use a term 'free' here, the counter name should probably
> > reflect that.
>
> I still want to keep the terms "allocation/deallocation" in the
> user-facing code (in the /sys/ file) though.
> So renaming the counter here will cause a mismatch between its name
> and the stats output.

To move forward, I am going to send out v9 that addresses your
comments except for this one and for moving the conditionals inside
mte_compress() and mte_decompress().
Happy to discuss those three points further.
  

Patch

diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index a31833e3ddc54..0f558942d88b8 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -1,28 +1,48 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/xarray.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <asm/mte.h>
+#include <asm/mtecomp.h>
+#include "mtecomp.h"
+
+enum mteswap_counters {
+	MTESWAP_CTR_INLINE = 0,
+	MTESWAP_CTR_NOINLINE,
+	MTESWAP_CTR_SIZE
+};
+static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
+static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];
 
 static DEFINE_XARRAY(mte_pages);
 
 void *mte_allocate_tag_storage(void)
 {
-	/* tags granule is 16 bytes, 2 tags stored per byte */
-	return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
+	void *ret;
+
+	ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
+	if (ret)
+		atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
+	return ret;
 }
 
 void mte_free_tag_storage(char *storage)
 {
-	kfree(storage);
+	if (!mte_is_compressed(storage)) {
+		kfree(storage);
+		atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);
+	} else {
+		atomic_long_dec(&alloc_counters[MTESWAP_CTR_INLINE]);
+	}
 }
 
 int mte_save_tags(struct page *page)
 {
-	void *tag_storage, *ret;
+	void *tag_storage, *ret, *compressed;
 
 	if (!page_mte_tagged(page))
 		return 0;
@@ -32,6 +52,12 @@  int mte_save_tags(struct page *page)
 		return -ENOMEM;
 
 	mte_save_page_tags(page_address(page), tag_storage);
+	compressed = mte_compress(tag_storage);
+	if (compressed) {
+		mte_free_tag_storage(tag_storage);
+		tag_storage = (void *)compressed;
+		atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
+	}
 
 	/* lookup the swap entry.val from the page */
 	ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
@@ -50,13 +76,20 @@  int mte_save_tags(struct page *page)
 void mte_restore_tags(swp_entry_t entry, struct page *page)
 {
 	void *tags = xa_load(&mte_pages, entry.val);
+	void *tag_storage = NULL;
 
 	if (!tags)
 		return;
 
 	if (try_page_mte_tagging(page)) {
+		if (mte_is_compressed(tags)) {
+			tag_storage = mte_allocate_tag_storage();
+			mte_decompress(tags, tag_storage);
+			tags = tag_storage;
+		}
 		mte_restore_page_tags(page_address(page), tags);
 		set_page_mte_tagged(page);
+		mte_free_tag_storage(tag_storage);
 	}
 }
 
@@ -83,3 +116,50 @@  void mte_invalidate_tags_area(int type)
 	}
 	xa_unlock(&mte_pages);
 }
+
+/* DebugFS interface. */
+static int stats_show(struct seq_file *seq, void *v)
+{
+	unsigned long total_mem_alloc = 0, total_mem_dealloc = 0;
+	unsigned long total_num_alloc = 0, total_num_dealloc = 0;
+	unsigned long sizes[2] = { 8, MTE_PAGE_TAG_STORAGE };
+	long alloc, dealloc;
+	unsigned long size;
+	int i;
+
+	for (i = 0; i < MTESWAP_CTR_SIZE; i++) {
+		alloc = atomic_long_read(&alloc_counters[i]);
+		dealloc = atomic_long_read(&dealloc_counters[i]);
+		total_num_alloc += alloc;
+		total_num_dealloc += dealloc;
+		size = sizes[i];
+		/*
+		 * Do not count 8-byte buffers towards compressed tag storage
+		 * size.
+		 */
+		if (i) {
+			total_mem_alloc += (size * alloc);
+			total_mem_dealloc += (size * dealloc);
+		}
+		seq_printf(seq,
+			   "%lu bytes: %lu allocations, %lu deallocations\n",
+			   size, alloc, dealloc);
+	}
+	seq_printf(seq, "uncompressed tag storage size: %lu\n",
+		   (total_num_alloc - total_num_dealloc) *
+			   MTE_PAGE_TAG_STORAGE);
+	seq_printf(seq, "compressed tag storage size: %lu\n",
+		   total_mem_alloc - total_mem_dealloc);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+static int mteswap_init(void)
+{
+	struct dentry *mteswap_dir;
+
+	mteswap_dir = debugfs_create_dir("mteswap", NULL);
+	debugfs_create_file("stats", 0444, mteswap_dir, NULL, &stats_fops);
+	return 0;
+}
+module_init(mteswap_init);