squashfs: cache partial compressed blocks

Message ID 20230510-squashfs-cache-v1-1-3b6bb0e7d952@axis.com
State New
Headers
Series squashfs: cache partial compressed blocks |

Commit Message

Vincent Whitchurch May 12, 2023, 6:32 a.m. UTC
  Before commit 93e72b3c612adcaca1 ("squashfs: migrate from ll_rw_block
usage to BIO"), compressed blocks read by squashfs were cached in the
page cache, but that is not the case after that commit.  That has lead
to squashfs having to re-read a lot of sectors from disk/flash.

For example, the first sectors of every metadata block need to be read
twice from the disk.  Once partially to read the length, and a
second time to read the block itself.  Also, in linear reads of large
files, the last sectors of one data block are re-read from disk when
reading the next data block, since the compressed blocks are of variable
sizes and not aligned to device blocks.  This extra I/O results in a
degrade in read performance of, for example, ~16% in one scenario on my
ARM platform using squashfs with dm-verity and NAND.

Since the decompressed data is cached in the page cache or squashfs'
internal metadata and fragment caches, caching _all_ compressed pages
would lead to a lot of double caching and is undesirable.  But make the
code cache any disk blocks which were only partially requested, since
these are the ones likely to include data which is needed by other file
system blocks.  This restores read performance in my test scenario.

The compressed block caching is only applied when the disk block size is
equal to the page size, to avoid having to deal with caching sub-page
reads.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 fs/squashfs/block.c          | 118 +++++++++++++++++++++++++++++++++++++++++--
 fs/squashfs/squashfs_fs_sb.h |   1 +
 fs/squashfs/super.c          |  12 +++++
 3 files changed, 127 insertions(+), 4 deletions(-)


---
base-commit: 457391b0380335d5e9a5babdec90ac53928b23b4
change-id: 20230510-squashfs-cache-7a3b9e7355c1

Best regards,
  

Comments

kernel test robot May 12, 2023, 8:54 a.m. UTC | #1
Hi Vincent,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 457391b0380335d5e9a5babdec90ac53928b23b4]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Whitchurch/squashfs-cache-partial-compressed-blocks/20230512-143553
base:   457391b0380335d5e9a5babdec90ac53928b23b4
patch link:    https://lore.kernel.org/r/20230510-squashfs-cache-v1-1-3b6bb0e7d952%40axis.com
patch subject: [PATCH] squashfs: cache partial compressed blocks
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230512/202305121615.T6PJo2hI-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/3ca22c93f1faf376ecf133f84d0148497284366a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vincent-Whitchurch/squashfs-cache-partial-compressed-blocks/20230512-143553
        git checkout 3ca22c93f1faf376ecf133f84d0148497284366a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash fs/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202305121615.T6PJo2hI-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/squashfs/block.c:168:5: warning: no previous prototype for 'squashfs_bio_read' [-Wmissing-prototypes]
     168 | int squashfs_bio_read(struct super_block *sb, u64 index, int length,
         |     ^~~~~~~~~~~~~~~~~


vim +/squashfs_bio_read +168 fs/squashfs/block.c

   167	
 > 168	int squashfs_bio_read(struct super_block *sb, u64 index, int length,
   169				     struct bio **biop, int *block_offset)
   170	{
   171		struct squashfs_sb_info *msblk = sb->s_fs_info;
   172		struct inode *cache_inode = msblk->cache_inode;
   173		struct address_space *cache_mapping = cache_inode ? cache_inode->i_mapping : NULL;
   174		const u64 read_start = round_down(index, msblk->devblksize);
   175		const sector_t block = read_start >> msblk->devblksize_log2;
   176		const u64 read_end = round_up(index + length, msblk->devblksize);
   177		const sector_t block_end = read_end >> msblk->devblksize_log2;
   178		int offset = read_start - round_down(index, PAGE_SIZE);
   179		int total_len = (block_end - block) << msblk->devblksize_log2;
   180		const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE);
   181		int error, i;
   182		struct bio *bio;
   183	
   184		bio = bio_kmalloc(page_count, GFP_NOIO);
   185		if (!bio)
   186			return -ENOMEM;
   187		bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ);
   188		bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
   189	
   190		for (i = 0; i < page_count; ++i) {
   191			unsigned int len =
   192				min_t(unsigned int, PAGE_SIZE - offset, total_len);
   193			struct page *page = NULL;
   194	
   195			if (cache_mapping)
   196				page = find_get_page(cache_mapping,
   197						     read_start + i * PAGE_SIZE);
   198			if (!page)
   199				page = alloc_page(GFP_NOIO);
   200	
   201			if (!page) {
   202				error = -ENOMEM;
   203				goto out_free_bio;
   204			}
   205	
   206			if (cache_mapping) {
   207				/*
   208				 * Use the __ version to avoid merging since we need
   209				 * each page to be separate when we check for and avoid
   210				 * cached pages.
   211				 */
   212				__bio_add_page(bio, page, len, offset);
   213			} else if (!bio_add_page(bio, page, len, offset)) {
   214				error = -EIO;
   215				goto out_free_bio;
   216			}
   217			offset = 0;
   218			total_len -= len;
   219		}
   220	
   221		if (cache_mapping)
   222			error = squashfs_bio_read_cached(bio, cache_mapping, index,
   223							 length, read_start, read_end,
   224							 page_count);
   225		else
   226			error = submit_bio_wait(bio);
   227		if (error)
   228			goto out_free_bio;
   229	
   230		*biop = bio;
   231		*block_offset = index & ((1 << msblk->devblksize_log2) - 1);
   232		return 0;
   233	
   234	out_free_bio:
   235		bio_free_pages(bio);
   236		bio_uninit(bio);
   237		kfree(bio);
   238		return error;
   239	}
   240
  
kernel test robot May 12, 2023, 8:54 a.m. UTC | #2
Hi Vincent,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 457391b0380335d5e9a5babdec90ac53928b23b4]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Whitchurch/squashfs-cache-partial-compressed-blocks/20230512-143553
base:   457391b0380335d5e9a5babdec90ac53928b23b4
patch link:    https://lore.kernel.org/r/20230510-squashfs-cache-v1-1-3b6bb0e7d952%40axis.com
patch subject: [PATCH] squashfs: cache partial compressed blocks
config: x86_64-randconfig-a014 (https://download.01.org/0day-ci/archive/20230512/202305121650.kWn3uM2f-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/3ca22c93f1faf376ecf133f84d0148497284366a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vincent-Whitchurch/squashfs-cache-partial-compressed-blocks/20230512-143553
        git checkout 3ca22c93f1faf376ecf133f84d0148497284366a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash fs/squashfs/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202305121650.kWn3uM2f-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/squashfs/block.c:168:5: warning: no previous prototype for function 'squashfs_bio_read' [-Wmissing-prototypes]
   int squashfs_bio_read(struct super_block *sb, u64 index, int length,
       ^
   fs/squashfs/block.c:168:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int squashfs_bio_read(struct super_block *sb, u64 index, int length,
   ^
   static 
   1 warning generated.


vim +/squashfs_bio_read +168 fs/squashfs/block.c

   167	
 > 168	int squashfs_bio_read(struct super_block *sb, u64 index, int length,
   169				     struct bio **biop, int *block_offset)
   170	{
   171		struct squashfs_sb_info *msblk = sb->s_fs_info;
   172		struct inode *cache_inode = msblk->cache_inode;
   173		struct address_space *cache_mapping = cache_inode ? cache_inode->i_mapping : NULL;
   174		const u64 read_start = round_down(index, msblk->devblksize);
   175		const sector_t block = read_start >> msblk->devblksize_log2;
   176		const u64 read_end = round_up(index + length, msblk->devblksize);
   177		const sector_t block_end = read_end >> msblk->devblksize_log2;
   178		int offset = read_start - round_down(index, PAGE_SIZE);
   179		int total_len = (block_end - block) << msblk->devblksize_log2;
   180		const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE);
   181		int error, i;
   182		struct bio *bio;
   183	
   184		bio = bio_kmalloc(page_count, GFP_NOIO);
   185		if (!bio)
   186			return -ENOMEM;
   187		bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ);
   188		bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
   189	
   190		for (i = 0; i < page_count; ++i) {
   191			unsigned int len =
   192				min_t(unsigned int, PAGE_SIZE - offset, total_len);
   193			struct page *page = NULL;
   194	
   195			if (cache_mapping)
   196				page = find_get_page(cache_mapping,
   197						     read_start + i * PAGE_SIZE);
   198			if (!page)
   199				page = alloc_page(GFP_NOIO);
   200	
   201			if (!page) {
   202				error = -ENOMEM;
   203				goto out_free_bio;
   204			}
   205	
   206			if (cache_mapping) {
   207				/*
   208				 * Use the __ version to avoid merging since we need
   209				 * each page to be separate when we check for and avoid
   210				 * cached pages.
   211				 */
   212				__bio_add_page(bio, page, len, offset);
   213			} else if (!bio_add_page(bio, page, len, offset)) {
   214				error = -EIO;
   215				goto out_free_bio;
   216			}
   217			offset = 0;
   218			total_len -= len;
   219		}
   220	
   221		if (cache_mapping)
   222			error = squashfs_bio_read_cached(bio, cache_mapping, index,
   223							 length, read_start, read_end,
   224							 page_count);
   225		else
   226			error = submit_bio_wait(bio);
   227		if (error)
   228			goto out_free_bio;
   229	
   230		*biop = bio;
   231		*block_offset = index & ((1 << msblk->devblksize_log2) - 1);
   232		return 0;
   233	
   234	out_free_bio:
   235		bio_free_pages(bio);
   236		bio_uninit(bio);
   237		kfree(bio);
   238		return error;
   239	}
   240
  

Patch

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index bed3bb8b27fa..e565ff574e24 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -76,10 +76,101 @@  static int copy_bio_to_actor(struct bio *bio,
 	return copied_bytes;
 }
 
-static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
+static int squashfs_bio_read_cached(struct bio *fullbio, struct address_space *cache_mapping,
+				    u64 index, int length, u64 read_start, u64 read_end,
+				    int page_count)
+{
+	struct page *head_to_cache = NULL, *tail_to_cache = NULL;
+	struct block_device *bdev = fullbio->bi_bdev;
+	struct bvec_iter_all iter_all;
+	struct bio *bio = NULL;
+	int prev_io_idx = -1;
+	struct bio_vec *bv;
+	int idx = 0;
+	int err = 0;
+
+	bio_for_each_segment_all(bv, fullbio, iter_all) {
+		struct page *page = bv->bv_page;
+		int retlen;
+
+		if (page->mapping == cache_mapping && PageUptodate(page)) {
+			idx++;
+			continue;
+		}
+
+		/*
+		 * We only use this when the device block size is the same as
+		 * the page size, so read_start and read_end cover full pages.
+		 *
+		 * Compare these to the original required index and length to
+		 * only cache pages which were requested partially, since these
+		 * are the ones which are likely to be needed when reading
+		 * adjacent blocks.
+		 */
+		if (idx == 0 && index != read_start)
+			head_to_cache = page;
+		else if (idx == page_count - 1 && index + length != read_end)
+			tail_to_cache = page;
+
+		if (!bio || idx != prev_io_idx + 1) {
+			unsigned int remaining_pages;
+			unsigned int this_nr_pages;
+
+submit_and_retry:
+			remaining_pages = page_count - idx;
+			this_nr_pages = min(remaining_pages, BIO_MAX_VECS);
+			bio = blk_next_bio(bio, bdev, this_nr_pages, REQ_OP_READ,
+					   GFP_NOIO);
+			bio->bi_iter.bi_sector = fullbio->bi_iter.bi_sector +
+						 idx * (PAGE_SIZE / SECTOR_SIZE);
+		}
+
+		retlen = bio_add_page(bio, bv->bv_page, bv->bv_len, bv->bv_offset);
+		if (retlen != bv->bv_len)
+			goto submit_and_retry;
+
+		prev_io_idx = idx;
+		idx++;
+	}
+
+	if (bio) {
+		err = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+
+	if (err)
+		return err;
+
+	if (head_to_cache) {
+		int ret = add_to_page_cache_lru(head_to_cache, cache_mapping,
+						read_start, GFP_NOIO);
+
+		if (!ret) {
+			SetPageUptodate(head_to_cache);
+			unlock_page(head_to_cache);
+		}
+
+	}
+
+	if (tail_to_cache) {
+		int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping,
+						read_end - PAGE_SIZE, GFP_NOIO);
+
+		if (!ret) {
+			SetPageUptodate(tail_to_cache);
+			unlock_page(tail_to_cache);
+		}
+	}
+
+	return 0;
+}
+
+int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 			     struct bio **biop, int *block_offset)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	struct inode *cache_inode = msblk->cache_inode;
+	struct address_space *cache_mapping = cache_inode ? cache_inode->i_mapping : NULL;
 	const u64 read_start = round_down(index, msblk->devblksize);
 	const sector_t block = read_start >> msblk->devblksize_log2;
 	const u64 read_end = round_up(index + length, msblk->devblksize);
@@ -99,13 +190,27 @@  static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 	for (i = 0; i < page_count; ++i) {
 		unsigned int len =
 			min_t(unsigned int, PAGE_SIZE - offset, total_len);
-		struct page *page = alloc_page(GFP_NOIO);
+		struct page *page = NULL;
+
+		if (cache_mapping)
+			page = find_get_page(cache_mapping,
+					     read_start + i * PAGE_SIZE);
+		if (!page)
+			page = alloc_page(GFP_NOIO);
 
 		if (!page) {
 			error = -ENOMEM;
 			goto out_free_bio;
 		}
-		if (!bio_add_page(bio, page, len, offset)) {
+
+		if (cache_mapping) {
+			/*
+			 * Use the __ version to avoid merging since we need
+			 * each page to be separate when we check for and avoid
+			 * cached pages.
+			 */
+			__bio_add_page(bio, page, len, offset);
+		} else if (!bio_add_page(bio, page, len, offset)) {
 			error = -EIO;
 			goto out_free_bio;
 		}
@@ -113,7 +218,12 @@  static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 		total_len -= len;
 	}
 
-	error = submit_bio_wait(bio);
+	if (cache_mapping)
+		error = squashfs_bio_read_cached(bio, cache_mapping, index,
+						 length, read_start, read_end,
+						 page_count);
+	else
+		error = submit_bio_wait(bio);
 	if (error)
 		goto out_free_bio;
 
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 72f6f4b37863..dfee65845d48 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -47,6 +47,7 @@  struct squashfs_sb_info {
 	struct squashfs_cache			*block_cache;
 	struct squashfs_cache			*fragment_cache;
 	struct squashfs_cache			*read_page;
+	struct inode				*cache_inode;
 	int					next_meta_index;
 	__le64					*id_table;
 	__le64					*fragment_index;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index e090fae48e68..64d6bc95950b 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -329,6 +329,16 @@  static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto failed_mount;
 	}
 
+	if (msblk->devblksize == PAGE_SIZE) {
+		msblk->cache_inode = new_inode(sb);
+		if (msblk->cache_inode == NULL)
+			goto failed_mount;
+
+		set_nlink(msblk->cache_inode, 1);
+		msblk->cache_inode->i_size = OFFSET_MAX;
+		mapping_set_gfp_mask(msblk->cache_inode->i_mapping, GFP_NOFS);
+	}
+
 	msblk->stream = squashfs_decompressor_setup(sb, flags);
 	if (IS_ERR(msblk->stream)) {
 		err = PTR_ERR(msblk->stream);
@@ -454,6 +464,7 @@  static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	squashfs_cache_delete(msblk->block_cache);
 	squashfs_cache_delete(msblk->fragment_cache);
 	squashfs_cache_delete(msblk->read_page);
+	iput(msblk->cache_inode);
 	msblk->thread_ops->destroy(msblk);
 	kfree(msblk->inode_lookup_table);
 	kfree(msblk->fragment_index);
@@ -572,6 +583,7 @@  static void squashfs_put_super(struct super_block *sb)
 		squashfs_cache_delete(sbi->block_cache);
 		squashfs_cache_delete(sbi->fragment_cache);
 		squashfs_cache_delete(sbi->read_page);
+		iput(sbi->cache_inode);
 		sbi->thread_ops->destroy(sbi);
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);