[v8,3/3] arm64: mte: add compression support to mteswap.c
Commit Message
Update mteswap.c to perform inline compression of memory tags when
possible.
If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress
saved tags for a struct page and store them directly in Xarray entry
instead of wasting heap space.
Soon after booting Android, tag compression saves ~2x memory previously
spent by mteswap.c on tag allocations. On a moderately loaded device with
~20% tagged pages, this leads to saving several megabytes of kernel heap:
# cat /sys/kernel/debug/mteswap/stats
8 bytes: 102496 allocations, 67302 deallocations
128 bytes: 212234 allocations, 178278 deallocations
uncompressed tag storage size: 8851200
compressed tag storage size: 4346368
Signed-off-by: Alexander Potapenko <glider@google.com>
---
v8:
- adapt to the new compression API, abandon mteswap_{no,}comp.c
- move stats collection to mteswap.c
v5:
- drop a dead variable from _mte_free_saved_tags() in mteswap_comp.c
- ensure MTE compression works with arbitrary page sizes
- update patch description
v4:
- minor code simplifications suggested by Andy Shevchenko, added
missing header dependencies
- changed compression API names to reflect modifications made to
memcomp.h (as suggested by Yury Norov)
v3:
- Addressed comments by Andy Shevchenko in another patch:
- fixed includes order
- replaced u64 with unsigned long
- added MODULE_IMPORT_NS(MTECOMP)
---
arch/arm64/mm/mteswap.c | 88 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 84 insertions(+), 4 deletions(-)
Comments
On Fri, Nov 03, 2023 at 05:03:35PM +0100, Alexander Potapenko wrote:
> Update mteswap.c to perform inline compression of memory tags when
> possible.
>
> If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress
attempt?
> saved tags for a struct page and store them directly in Xarray entry
> instead of wasting heap space.
>
> Soon after booting Android, tag compression saves ~2x memory previously
> spent by mteswap.c on tag allocations. On a moderately loaded device with
> ~20% tagged pages, this leads to saving several megabytes of kernel heap:
>
> # cat /sys/kernel/debug/mteswap/stats
> 8 bytes: 102496 allocations, 67302 deallocations
> 128 bytes: 212234 allocations, 178278 deallocations
> uncompressed tag storage size: 8851200
> compressed tag storage size: 4346368
Can you align them like this:
# cat /sys/kernel/debug/mteswap/stats
8 bytes: 102496 allocations, 67302 deallocations
128 bytes: 212234 allocations, 178278 deallocations
uncompressed tag storage size: 8851200
compressed tag storage size: 4346368
And also, can you mention a new file in the documentation?
IIRC, it was my suggestion to measure some stats... If so, can you add:
Suggested-by: Yury Norov <yury.norov@gmail.com> # for stats
> Signed-off-by: Alexander Potapenko <glider@google.com>
>
> ---
> v8:
> - adapt to the new compression API, abandon mteswap_{no,}comp.c
> - move stats collection to mteswap.c
>
> v5:
> - drop a dead variable from _mte_free_saved_tags() in mteswap_comp.c
> - ensure MTE compression works with arbitrary page sizes
> - update patch description
>
> v4:
> - minor code simplifications suggested by Andy Shevchenko, added
> missing header dependencies
> - changed compression API names to reflect modifications made to
> memcomp.h (as suggested by Yury Norov)
>
> v3:
> - Addressed comments by Andy Shevchenko in another patch:
> - fixed includes order
> - replaced u64 with unsigned long
> - added MODULE_IMPORT_NS(MTECOMP)
> ---
> arch/arm64/mm/mteswap.c | 88 +++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 84 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index a31833e3ddc54..0f558942d88b8 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -1,28 +1,48 @@
> // SPDX-License-Identifier: GPL-2.0-only
>
> +#include <linux/debugfs.h>
> #include <linux/pagemap.h>
> #include <linux/xarray.h>
> #include <linux/slab.h>
> #include <linux/swap.h>
> #include <linux/swapops.h>
> #include <asm/mte.h>
> +#include <asm/mtecomp.h>
> +#include "mtecomp.h"
> +
> +enum mteswap_counters {
> + MTESWAP_CTR_INLINE = 0,
> + MTESWAP_CTR_NOINLINE,
> + MTESWAP_CTR_SIZE
> +};
> +static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
> +static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];
I think it's worth to add them and all the book keeping code in
a separate patch? Also can you consider making them configurable,
maybe depending on CONFIG_ARM64_MTE_SWAP_STAT?..
> static DEFINE_XARRAY(mte_pages);
>
> void *mte_allocate_tag_storage(void)
> {
> - /* tags granule is 16 bytes, 2 tags stored per byte */
> - return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> + void *ret;
> +
> + ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> + if (ret)
> + atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
> + return ret;
> }
>
> void mte_free_tag_storage(char *storage)
If you use a term 'free' here, the counter name should probably
reflect that.
> {
> - kfree(storage);
> + if (!mte_is_compressed(storage)) {
> + kfree(storage);
> + atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);
s/NOINLINE/OUTLINE ?
> + } else {
> + atomic_long_dec(&alloc_counters[MTESWAP_CTR_INLINE]);
> + }
> }
>
> int mte_save_tags(struct page *page)
> {
> - void *tag_storage, *ret;
> + void *tag_storage, *ret, *compressed;
>
> if (!page_mte_tagged(page))
> return 0;
> @@ -32,6 +52,12 @@ int mte_save_tags(struct page *page)
> return -ENOMEM;
>
> mte_save_page_tags(page_address(page), tag_storage);
> + compressed = mte_compress(tag_storage);
> + if (compressed) {
> + mte_free_tag_storage(tag_storage);
> + tag_storage = (void *)compressed;
But 'compressed' is already 'void *', what for typecasting?
Also, it's a bad naming - adjective implies bool type. I'd name it
'compressed_tag', or similar.
> + atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
> + }
Is it possible to move all this conditional inside the function call?
I feel like it should be a single-line:
tag_storage = mte_compress(tag_storage);
So that client code will be free of housekeeping details.
>
> /* lookup the swap entry.val from the page */
> ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> @@ -50,13 +76,20 @@ int mte_save_tags(struct page *page)
> void mte_restore_tags(swp_entry_t entry, struct page *page)
> {
> void *tags = xa_load(&mte_pages, entry.val);
> + void *tag_storage = NULL;
>
> if (!tags)
> return;
>
> if (try_page_mte_tagging(page)) {
> + if (mte_is_compressed(tags)) {
> + tag_storage = mte_allocate_tag_storage();
> + mte_decompress(tags, tag_storage);
> + tags = tag_storage;
> + }
Same here, if it's possible, I'd prefer a single line call instead of
the above chunk:
tags = mte_decompress(tags);
if (!tags)
return;
And make sure that the decompress code will take care of details.
This would have a clear meaning: we add a compression/decompression
option which is as simple as calling corresponding functions in
mte_save/restore_tags(); and the rest of mte code is untouched
Thanks,
Yury
> mte_restore_page_tags(page_address(page), tags);
> set_page_mte_tagged(page);
> + mte_free_tag_storage(tag_storage);
> }
> }
>
> @@ -83,3 +116,50 @@ void mte_invalidate_tags_area(int type)
> }
> xa_unlock(&mte_pages);
> }
> +
> +/* DebugFS interface. */
> +static int stats_show(struct seq_file *seq, void *v)
> +{
> + unsigned long total_mem_alloc = 0, total_mem_dealloc = 0;
> + unsigned long total_num_alloc = 0, total_num_dealloc = 0;
> + unsigned long sizes[2] = { 8, MTE_PAGE_TAG_STORAGE };
> + long alloc, dealloc;
> + unsigned long size;
> + int i;
> +
> + for (i = 0; i < MTESWAP_CTR_SIZE; i++) {
> + alloc = atomic_long_read(&alloc_counters[i]);
> + dealloc = atomic_long_read(&dealloc_counters[i]);
> + total_num_alloc += alloc;
> + total_num_dealloc += dealloc;
> + size = sizes[i];
> + /*
> + * Do not count 8-byte buffers towards compressed tag storage
> + * size.
> + */
> + if (i) {
> + total_mem_alloc += (size * alloc);
> + total_mem_dealloc += (size * dealloc);
> + }
> + seq_printf(seq,
> + "%lu bytes: %lu allocations, %lu deallocations\n",
> + size, alloc, dealloc);
> + }
> + seq_printf(seq, "uncompressed tag storage size: %lu\n",
> + (total_num_alloc - total_num_dealloc) *
> + MTE_PAGE_TAG_STORAGE);
> + seq_printf(seq, "compressed tag storage size: %lu\n",
> + total_mem_alloc - total_mem_dealloc);
> + return 0;
> +}
> +DEFINE_SHOW_ATTRIBUTE(stats);
> +
> +static int mteswap_init(void)
> +{
> + struct dentry *mteswap_dir;
> +
> + mteswap_dir = debugfs_create_dir("mteswap", NULL);
> + debugfs_create_file("stats", 0444, mteswap_dir, NULL, &stats_fops);
> + return 0;
> +}
> +module_init(mteswap_init);
> --
> 2.42.0.869.gea05f2083d-goog
> >
> > If CONFIG_ARM64_MTE_COMP is enabled, mteswap.c will attemt to compress
>
> attempt?
Good catch!
> >
> > # cat /sys/kernel/debug/mteswap/stats
> > 8 bytes: 102496 allocations, 67302 deallocations
> > 128 bytes: 212234 allocations, 178278 deallocations
> > uncompressed tag storage size: 8851200
> > compressed tag storage size: 4346368
>
> Can you align them like this:
>
> # cat /sys/kernel/debug/mteswap/stats
> 8 bytes: 102496 allocations, 67302 deallocations
> 128 bytes: 212234 allocations, 178278 deallocations
> uncompressed tag storage size: 8851200
> compressed tag storage size: 4346368
Ok, will do in v9.
> And also, can you mention a new file in the documentation?
Sure.
> IIRC, it was my suggestion to measure some stats... If so, can you add:
>
> Suggested-by: Yury Norov <yury.norov@gmail.com> # for stats
Will do.
> > +static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
> > +static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];
>
> I think it's worth to add them and all the book keeping code in
> a separate patch? Also can you consider making them configurable,
> maybe depending on CONFIG_ARM64_MTE_SWAP_STAT?..
Sounds fine, I'll split this code into a separate patch.
> > static DEFINE_XARRAY(mte_pages);
> >
> > void *mte_allocate_tag_storage(void)
> > {
> > - /* tags granule is 16 bytes, 2 tags stored per byte */
> > - return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> > + void *ret;
> > +
> > + ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
> > + if (ret)
> > + atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
> > + return ret;
> > }
> >
> > void mte_free_tag_storage(char *storage)
>
> If you use a term 'free' here, the counter name should probably
> reflect that.
I still want to keep the terms "allocation/deallocation" in the
user-facing code (in the /sys/ file) though.
So renaming the counter here will cause a mismatch between its name
and the stats output.
>
> > {
> > - kfree(storage);
> > + if (!mte_is_compressed(storage)) {
> > + kfree(storage);
> > + atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);
>
> s/NOINLINE/OUTLINE ?
Done.
> > mte_save_page_tags(page_address(page), tag_storage);
> > + compressed = mte_compress(tag_storage);
> > + if (compressed) {
> > + mte_free_tag_storage(tag_storage);
> > + tag_storage = (void *)compressed;
>
> But 'compressed' is already 'void *', what for typecasting?
Yeah, it used to be unsigned long or something like that. Removed the cast.
> Also, it's a bad naming - adjective implies bool type. I'd name it
> 'compressed_tag', or similar.
>
> > + atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
> > + }
>
> Is it possible to move all this conditional inside the function call?
> I feel like it should be a single-line:
>
> tag_storage = mte_compress(tag_storage);
>
> So that client code will be free of housekeeping details.
The problem is that this assignment destroys the current value of
tag_storage, which means mte_compress() will have to deal with memory
deallocation.
That, in turn, will introduce an unnecessary dependency between
mtecomp.c and mteswap.c (so that the former can call
mte_free_tag_storage()).
Given that we still have to deal with allocations in mteswap.c, I
don't think it's worth the hassle.
>
> >
> > /* lookup the swap entry.val from the page */
> > ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> > @@ -50,13 +76,20 @@ int mte_save_tags(struct page *page)
> > void mte_restore_tags(swp_entry_t entry, struct page *page)
> > {
> > void *tags = xa_load(&mte_pages, entry.val);
> > + void *tag_storage = NULL;
> >
> > if (!tags)
> > return;
> >
> > if (try_page_mte_tagging(page)) {
> > + if (mte_is_compressed(tags)) {
> > + tag_storage = mte_allocate_tag_storage();
> > + mte_decompress(tags, tag_storage);
> > + tags = tag_storage;
> > + }
>
> Same here, if it's possible, I'd prefer a single line call instead of
> the above chunk:
>
> tags = mte_decompress(tags);
> if (!tags)
> return;
Same here, we'd have to make mte_decompress() allocate/deallocate
memory in a way consistent with mteswap.c allocations.
> > >
> > > void mte_free_tag_storage(char *storage)
> >
> > If you use a term 'free' here, the counter name should probably
> > reflect that.
>
> I still want to keep the terms "allocation/deallocation" in the
> user-facing code (in the /sys/ file) though.
> So renaming the counter here will cause a mismatch between its name
> and the stats output.
To move forward, I am going to send out v9 that addresses your
comments except for this one and for moving the conditionals inside
mte_compress() and mte_decompress().
Happy to discuss those three points further.
@@ -1,28 +1,48 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/debugfs.h>
#include <linux/pagemap.h>
#include <linux/xarray.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <asm/mte.h>
+#include <asm/mtecomp.h>
+#include "mtecomp.h"
+
+enum mteswap_counters {
+ MTESWAP_CTR_INLINE = 0,
+ MTESWAP_CTR_NOINLINE,
+ MTESWAP_CTR_SIZE
+};
+static atomic_long_t alloc_counters[MTESWAP_CTR_SIZE];
+static atomic_long_t dealloc_counters[MTESWAP_CTR_SIZE];
static DEFINE_XARRAY(mte_pages);
void *mte_allocate_tag_storage(void)
{
- /* tags granule is 16 bytes, 2 tags stored per byte */
- return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
+ void *ret;
+
+ ret = kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
+ if (ret)
+ atomic_long_inc(&alloc_counters[MTESWAP_CTR_NOINLINE]);
+ return ret;
}
void mte_free_tag_storage(char *storage)
{
- kfree(storage);
+ if (!mte_is_compressed(storage)) {
+ kfree(storage);
+ atomic_long_dec(&alloc_counters[MTESWAP_CTR_NOINLINE]);
+ } else {
+ atomic_long_dec(&alloc_counters[MTESWAP_CTR_INLINE]);
+ }
}
int mte_save_tags(struct page *page)
{
- void *tag_storage, *ret;
+ void *tag_storage, *ret, *compressed;
if (!page_mte_tagged(page))
return 0;
@@ -32,6 +52,12 @@ int mte_save_tags(struct page *page)
return -ENOMEM;
mte_save_page_tags(page_address(page), tag_storage);
+ compressed = mte_compress(tag_storage);
+ if (compressed) {
+ mte_free_tag_storage(tag_storage);
+ tag_storage = (void *)compressed;
+ atomic_long_inc(&alloc_counters[MTESWAP_CTR_INLINE]);
+ }
/* lookup the swap entry.val from the page */
ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
@@ -50,13 +76,20 @@ int mte_save_tags(struct page *page)
void mte_restore_tags(swp_entry_t entry, struct page *page)
{
void *tags = xa_load(&mte_pages, entry.val);
+ void *tag_storage = NULL;
if (!tags)
return;
if (try_page_mte_tagging(page)) {
+ if (mte_is_compressed(tags)) {
+ tag_storage = mte_allocate_tag_storage();
+ mte_decompress(tags, tag_storage);
+ tags = tag_storage;
+ }
mte_restore_page_tags(page_address(page), tags);
set_page_mte_tagged(page);
+ mte_free_tag_storage(tag_storage);
}
}
@@ -83,3 +116,50 @@ void mte_invalidate_tags_area(int type)
}
xa_unlock(&mte_pages);
}
+
+/* DebugFS interface. */
+static int stats_show(struct seq_file *seq, void *v)
+{
+ unsigned long total_mem_alloc = 0, total_mem_dealloc = 0;
+ unsigned long total_num_alloc = 0, total_num_dealloc = 0;
+ unsigned long sizes[2] = { 8, MTE_PAGE_TAG_STORAGE };
+ long alloc, dealloc;
+ unsigned long size;
+ int i;
+
+ for (i = 0; i < MTESWAP_CTR_SIZE; i++) {
+ alloc = atomic_long_read(&alloc_counters[i]);
+ dealloc = atomic_long_read(&dealloc_counters[i]);
+ total_num_alloc += alloc;
+ total_num_dealloc += dealloc;
+ size = sizes[i];
+ /*
+ * Do not count 8-byte buffers towards compressed tag storage
+ * size.
+ */
+ if (i) {
+ total_mem_alloc += (size * alloc);
+ total_mem_dealloc += (size * dealloc);
+ }
+ seq_printf(seq,
+ "%lu bytes: %lu allocations, %lu deallocations\n",
+ size, alloc, dealloc);
+ }
+ seq_printf(seq, "uncompressed tag storage size: %lu\n",
+ (total_num_alloc - total_num_dealloc) *
+ MTE_PAGE_TAG_STORAGE);
+ seq_printf(seq, "compressed tag storage size: %lu\n",
+ total_mem_alloc - total_mem_dealloc);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+static int mteswap_init(void)
+{
+ struct dentry *mteswap_dir;
+
+ mteswap_dir = debugfs_create_dir("mteswap", NULL);
+ debugfs_create_file("stats", 0444, mteswap_dir, NULL, &stats_fops);
+ return 0;
+}
+module_init(mteswap_init);