[v2] erofs: use kmap_local_page() only for erofs_bread()

Message ID 20221018105313.4940-1-hsiangkao@linux.alibaba.com
State New
Headers
Series [v2] erofs: use kmap_local_page() only for erofs_bread() |

Commit Message

Gao Xiang Oct. 18, 2022, 10:53 a.m. UTC
  Convert all mapped erofs_bread() users to use kmap_local_page()
instead of kmap() or kmap_atomic().

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/data.c     | 8 ++------
 fs/erofs/internal.h | 3 +--
 fs/erofs/xattr.c    | 8 ++++----
 fs/erofs/zmap.c     | 4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)
  

Comments

Fabio M. De Francesco Oct. 18, 2022, 4:28 p.m. UTC | #1
On Tuesday, October 18, 2022 12:53:13 PM CEST Gao Xiang wrote:
> Convert all mapped erofs_bread() users to use kmap_local_page()
> instead of kmap() or kmap_atomic().
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  fs/erofs/data.c     | 8 ++------
>  fs/erofs/internal.h | 3 +--
>  fs/erofs/xattr.c    | 8 ++++----
>  fs/erofs/zmap.c     | 4 ++--
>  4 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index fe8ac0e163f7..fe1ae80284bf 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -13,9 +13,7 @@
>  void erofs_unmap_metabuf(struct erofs_buf *buf)
>  {
>  	if (buf->kmap_type == EROFS_KMAP)
> -		kunmap(buf->page);
> -	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
> -		kunmap_atomic(buf->base);
> +		kunmap_local(buf->base);
>  	buf->base = NULL;
>  	buf->kmap_type = EROFS_NO_KMAP;
>  }
> @@ -54,9 +52,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode 
*inode,
>  	}
>  	if (buf->kmap_type == EROFS_NO_KMAP) {
>  		if (type == EROFS_KMAP)
> -			buf->base = kmap(page);
> -		else if (type == EROFS_KMAP_ATOMIC)
> -			buf->base = kmap_atomic(page);
> +			buf->base = kmap_local_page(page);
>  		buf->kmap_type = type;
>  	} else if (buf->kmap_type != type) {
>  		DBG_BUGON(1);
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 1701df48c446..67dc8e177211 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -253,8 +253,7 @@ static inline int erofs_wait_on_workgroup_freezed(struct 
erofs_workgroup *grp)
>  
>  enum erofs_kmap_type {
>  	EROFS_NO_KMAP,		/* don't map the buffer */
> -	EROFS_KMAP,		/* use kmap() to map the buffer */
> -	EROFS_KMAP_ATOMIC,	/* use kmap_atomic() to map the buffer */
> +	EROFS_KMAP,		/* use kmap_local_page() to map the 
buffer */
>  };
>  
>  struct erofs_buf {
> diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
> index 8106bcb5a38d..a62fb8a3318a 100644
> --- a/fs/erofs/xattr.c
> +++ b/fs/erofs/xattr.c
> @@ -148,7 +148,7 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
>  
>  	it->blkaddr += erofs_blknr(it->ofs);
>  	it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
> -				       EROFS_KMAP_ATOMIC);
> +				       EROFS_KMAP);
>  	if (IS_ERR(it->kaddr))
>  		return PTR_ERR(it->kaddr);
>  	it->ofs = erofs_blkoff(it->ofs);
> @@ -174,7 +174,7 @@ static int inline_xattr_iter_begin(struct xattr_iter 
*it,
>  	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
>  
>  	it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
> -				       EROFS_KMAP_ATOMIC);
> +				       EROFS_KMAP);
>  	if (IS_ERR(it->kaddr))
>  		return PTR_ERR(it->kaddr);
>  	return vi->xattr_isize - xattr_header_sz;
> @@ -368,7 +368,7 @@ static int shared_getxattr(struct inode *inode, struct 
getxattr_iter *it)
>  
>  		it->it.ofs = xattrblock_offset(sbi, vi-
>xattr_shared_xattrs[i]);
>  		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, 
blkaddr,
> -						  
EROFS_KMAP_ATOMIC);
> +						  
EROFS_KMAP);
>  		if (IS_ERR(it->it.kaddr))
>  			return PTR_ERR(it->it.kaddr);
>  		it->it.blkaddr = blkaddr;
> @@ -580,7 +580,7 @@ static int shared_listxattr(struct listxattr_iter *it)
>  
>  		it->it.ofs = xattrblock_offset(sbi, vi-
>xattr_shared_xattrs[i]);
>  		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, 
blkaddr,
> -						  
EROFS_KMAP_ATOMIC);
> +						  
EROFS_KMAP);
>  		if (IS_ERR(it->it.kaddr))
>  			return PTR_ERR(it->it.kaddr);
>  		it->it.blkaddr = blkaddr;
> diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
> index 0bb66927e3d0..749a5ac943f4 100644
> --- a/fs/erofs/zmap.c
> +++ b/fs/erofs/zmap.c
> @@ -178,7 +178,7 @@ static int legacy_load_cluster_from_disk(struct 
z_erofs_maprecorder *m,
>  	unsigned int advise, type;
>  
>  	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
> -				      erofs_blknr(pos), 
EROFS_KMAP_ATOMIC);
> +				      erofs_blknr(pos), 
EROFS_KMAP);
>  	if (IS_ERR(m->kaddr))
>  		return PTR_ERR(m->kaddr);
>  
> @@ -416,7 +416,7 @@ static int compacted_load_cluster_from_disk(struct 
z_erofs_maprecorder *m,
>  out:
>  	pos += lcn * (1 << amortizedshift);
>  	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
> -				      erofs_blknr(pos), 
EROFS_KMAP_ATOMIC);
> +				      erofs_blknr(pos), 
EROFS_KMAP);
>  	if (IS_ERR(m->kaddr))
>  		return PTR_ERR(m->kaddr);
>  	return unpack_compacted_index(m, amortizedshift, pos, lookahead);
> -- 
> 2.24.4
> 
> 
Can you please say whether or not you ran some tests on a 32 bits machine, 
possibly QEMU/KVM x86_32 with enough RAM (I'd suggest 6GB), booting a kernel 
with HIGHMEM4G or HIGHMEM64G enabled?

Did you check whether or not the pointers are handed to other contexts?

You didn't say anything about the safety of these conversions. And you also 
didn't say why you did them (I don't care but many might ignore why).

Therefore, can you please say at least how you checked that these conversions 
are safe so that they won't break the filesystem in 32bit machines?

Thanks,

Fabio M. De Francesco
  
Fabio M. De Francesco Oct. 18, 2022, 7:18 p.m. UTC | #2
On Tuesday, October 18, 2022 12:53:13 PM CEST Gao Xiang wrote:
> Convert all mapped erofs_bread() users to use kmap_local_page()
> instead of kmap() or kmap_atomic().
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  fs/erofs/data.c     | 8 ++------
>  fs/erofs/internal.h | 3 +--
>  fs/erofs/xattr.c    | 8 ++++----
>  fs/erofs/zmap.c     | 4 ++--
>  4 files changed, 9 insertions(+), 14 deletions(-)
> 

I just realized that you know the code of fs/erofs very well. I saw a Gao 
Xiang in MAINTAINERS, although having a different email address.

Therefore, I'm sure that everybody can trust that you checked everything is 
needed to assure the safety of the conversions.

However, an extended commit message would have prevented me to send you the 
previous email with all those questions / objections.

Thanks,

Fabio
  
Gao Xiang Oct. 18, 2022, 9:29 p.m. UTC | #3
Hi Fabio,

On Tue, Oct 18, 2022 at 09:18:49PM +0200, Fabio M. De Francesco wrote:
> On Tuesday, October 18, 2022 12:53:13 PM CEST Gao Xiang wrote:
> > Convert all mapped erofs_bread() users to use kmap_local_page()
> > instead of kmap() or kmap_atomic().
> > 
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > ---
> >  fs/erofs/data.c     | 8 ++------
> >  fs/erofs/internal.h | 3 +--
> >  fs/erofs/xattr.c    | 8 ++++----
> >  fs/erofs/zmap.c     | 4 ++--
> >  4 files changed, 9 insertions(+), 14 deletions(-)
> > 
> 
> I just realized that you know the code of fs/erofs very well. I saw a Gao 
> Xiang in MAINTAINERS, although having a different email address.
> 
> Therefore, I'm sure that everybody can trust that you checked everything is 
> needed to assure the safety of the conversions.
> 
> However, an extended commit message would have prevented me to send you the 
> previous email with all those questions / objections.

Thanks for your suggestion.  Yeah, this conversion looks trivial [since most
paths for erofs_bread() don't have more restriction in principle so we can
just disable migration.  One of what I need to care is nested kmap() usage,
some unmap/remap order cannot be simply converted to kmap_local() but I think
it's not the case for erofs_bread().  Actually EROFS has one of such nested
kmap() usage, but I don't really care its performance on HIGHMEM platforms,
so I think kmap() is still somewhat useful compared to kmap_local() from this
point of view], but in order to make it all work properly, I will try to do
stress test with 32-bit platform later.  Since it targets for the next cycle
6.2, I will do a full stress test in the next following weeks.

Thanks,
Gao Xiang

> 
> Thanks,
> 
> Fabio
>
  
Fabio M. De Francesco Oct. 18, 2022, 11:21 p.m. UTC | #4
On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> Hi Fabio,
> 
> On Tue, Oct 18, 2022 at 09:18:49PM +0200, Fabio M. De Francesco wrote:
> > On Tuesday, October 18, 2022 12:53:13 PM CEST Gao Xiang wrote:
> > > Convert all mapped erofs_bread() users to use kmap_local_page()
> > > instead of kmap() or kmap_atomic().
> > > 
> > > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > > ---
> > >  fs/erofs/data.c     | 8 ++------
> > >  fs/erofs/internal.h | 3 +--
> > >  fs/erofs/xattr.c    | 8 ++++----
> > >  fs/erofs/zmap.c     | 4 ++--
> > >  4 files changed, 9 insertions(+), 14 deletions(-)
> > > 
> > 
> > I just realized that you know the code of fs/erofs very well. I saw a Gao 
> > Xiang in MAINTAINERS, although having a different email address.
> > 
> > Therefore, I'm sure that everybody can trust that you checked everything 
is 
> > needed to assure the safety of the conversions.
> > 
> > However, an extended commit message would have prevented me to send you 
the 
> > previous email with all those questions / objections.
> 
> Thanks for your suggestion. 
> Yeah, this conversion looks trivial [since most
> paths for erofs_bread() don't have more restriction in principle so we can
> just disable migration.

Not sure about what you mean by "restrictions". Few months ago I updated the 
kmap_local_page() documentantation (highmem.rst). Please take a look at it, so 
that you may check if what you call restrictions are intended the way you 
mean.

The two most important rules are (1) that users cannot hand the virtual kernel 
addresses returned by kmap_local_page() to other contexts (that is why they 
are thread local) and (2) how to nest mappings /unmappings.

> One of what I need to care is nested kmap() usage,
> some unmap/remap order cannot be simply converted to kmap_local()

Correct about nesting. If we map A and then B, we must unmap B and then A.

However, as you seem to convey, not always unmappings in right order (stack 
based) is possible, sometimes because very long functions have many loop's 
breaks and many goto exit labels.

> but I think
> it's not the case for erofs_bread().  Actually EROFS has one of such nested
> kmap() usage, but I don't really care its performance on HIGHMEM platforms,
> so I think kmap() is still somewhat useful compared to kmap_local() from 
this
> point of view],

In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
pages, only the one coming from the page cache. 

The other page didn't need the use of kmap_local_page() because it was 
allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't ever 
allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid the 
mapping and the nesting issues.

Did you check if you may solve the same way? 

A little group of people are working to remove all kmap() and kmap_atomic() we 
meet across the whole kernel. I have not yet encountered conversions which 
cannot be made. Sometimes we may refactor, if what I said above cannot apply.

> but in order to make it all work properly, I will try to do
> stress test with 32-bit platform later. 

I use QEMU/KVM x86_32 VM, 6GB RAM, and a kernel with HIGHMEM64 enabled and an 
openSUSE Tumbleweed 32 distro. I've heard that Debian too provides an x86_32 
distribution. 

> Since it targets for the next cycle
> 6.2, I will do a full stress test in the next following weeks.
> 
> Thanks,
> Gao Xiang
> 

Thanks,

Fabio
  
Gao Xiang Oct. 18, 2022, 11:36 p.m. UTC | #5
On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:

...

> 
> > One of what I need to care is nested kmap() usage,
> > some unmap/remap order cannot be simply converted to kmap_local()
> 
> Correct about nesting. If we map A and then B, we must unmap B and then A.
> 
> However, as you seem to convey, not always unmappings in right order (stack 
> based) is possible, sometimes because very long functions have many loop's 
> breaks and many goto exit labels.
> 
> > but I think
> > it's not the case for erofs_bread().  Actually EROFS has one of such nested
> > kmap() usage, but I don't really care its performance on HIGHMEM platforms,
> > so I think kmap() is still somewhat useful compared to kmap_local() from 
> this
> > point of view],
> 
> In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
> pages, only the one coming from the page cache. 
> 
> The other page didn't need the use of kmap_local_page() because it was 
> allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't ever 
> allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid the 
> mapping and the nesting issues.
> 
> Did you check if you may solve the same way? 

That is not simple.  Currently we have compressed pages and decompressed
pages (page cache or others), and they can be unmapped when either data
is all consumed, so compressed pages can be unmapped first, or
decompressed pages can be unmapped first.  That quite depends on which
pages goes first.

I think such usage is a quite common pattern for decoder or encoder,
you could take a look at z_erofs_lzma_decompress() in
fs/erofs/decompressor_lzma.c.  So kmap() is still useful for such cases
since I don't really care the HIGHMEM performance but correctness, but
other alternative could churn/complex the map/unmap/remap pattern.

Thanks,
Gao Xiang

> 
> A little group of people are working to remove all kmap() and kmap_atomic() we 
> meet across the whole kernel. I have not yet encountered conversions which 
> cannot be made. Sometimes we may refactor, if what I said above cannot apply.
> 
> > but in order to make it all work properly, I will try to do
> > stress test with 32-bit platform later. 
> 
> I use QEMU/KVM x86_32 VM, 6GB RAM, and a kernel with HIGHMEM64 enabled and an 
> openSUSE Tumbleweed 32 distro. I've heard that Debian too provides an x86_32 
> distribution. 
> 
> > Since it targets for the next cycle
> > 6.2, I will do a full stress test in the next following weeks.
> > 
> > Thanks,
> > Gao Xiang
> > 
> 
> Thanks,
> 
> Fabio
>
  
Ira Weiny Oct. 19, 2022, 2:51 a.m. UTC | #6
On Wed, Oct 19, 2022 at 07:36:55AM +0800, Gao Xiang wrote:
> On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> 
> ...

[snip]

> > 
> > In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
> > pages, only the one coming from the page cache. 
> > 
> > The other page didn't need the use of kmap_local_page() because it was 
> > allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't ever 
> > allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid the 
> > mapping and the nesting issues.
> > 
> > Did you check if you may solve the same way? 
> 
> That is not simple.  Currently we have compressed pages and decompressed
> pages (page cache or others), and they can be unmapped when either data
> is all consumed, so compressed pages can be unmapped first, or
> decompressed pages can be unmapped first.  That quite depends on which
> pages goes first.
> 
> I think such usage is a quite common pattern for decoder or encoder,
> you could take a look at z_erofs_lzma_decompress() in
> fs/erofs/decompressor_lzma.c.  So kmap() is still useful for such cases
> since I don't really care the HIGHMEM performance but correctness, but
> other alternative could churn/complex the map/unmap/remap pattern.
> 

When you say kmap() is still useful is this because of the map/unmap ordering
restrictions or because the address is required in different threads?

Ira
  
Gao Xiang Oct. 19, 2022, 3:06 a.m. UTC | #7
Hi Ira,

On Tue, Oct 18, 2022 at 07:51:55PM -0700, Ira Weiny wrote:
> On Wed, Oct 19, 2022 at 07:36:55AM +0800, Gao Xiang wrote:
> > On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> > 
> > ...
> 
> [snip]
> 
> > > 
> > > In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
> > > pages, only the one coming from the page cache. 
> > > 
> > > The other page didn't need the use of kmap_local_page() because it was 
> > > allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't ever 
> > > allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid the 
> > > mapping and the nesting issues.
> > > 
> > > Did you check if you may solve the same way? 
> > 
> > That is not simple.  Currently we have compressed pages and decompressed
> > pages (page cache or others), and they can be unmapped when either data
> > is all consumed, so compressed pages can be unmapped first, or
> > decompressed pages can be unmapped first.  That quite depends on which
> > pages goes first.
> > 
> > I think such usage is a quite common pattern for decoder or encoder,
> > you could take a look at z_erofs_lzma_decompress() in
> > fs/erofs/decompressor_lzma.c.  So kmap() is still useful for such cases
> > since I don't really care the HIGHMEM performance but correctness, but
> > other alternative could churn/complex the map/unmap/remap pattern.
> > 
> 
> When you say kmap() is still useful is this because of the map/unmap ordering
> restrictions or because the address is required in different threads?

... mainly due to map/unmap ordering restriction. I think
the decompressor here could still be a simple dependency.  I'm not
sure if there are more complicated cases (like multiple
decoding/encoding sources into target pages) though..

Thanks,
Gao Xiang

> 
> Ira
  
Fabio M. De Francesco Oct. 19, 2022, 6:17 p.m. UTC | #8
On Wednesday, October 19, 2022 1:36:55 AM CEST Gao Xiang wrote:
> On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> 
> ...
> 
> > 
> > > One of what I need to care is nested kmap() usage,
> > > some unmap/remap order cannot be simply converted to kmap_local()
> > 
> > Correct about nesting. If we map A and then B, we must unmap B and then A.
> > 
> > However, as you seem to convey, not always unmappings in right order 
(stack 
> > based) is possible, sometimes because very long functions have many loop's 
> > breaks and many goto exit labels.
> > 
> > > but I think
> > > it's not the case for erofs_bread().  Actually EROFS has one of such 
nested
> > > kmap() usage, but I don't really care its performance on HIGHMEM 
platforms,
> > > so I think kmap() is still somewhat useful compared to kmap_local() from 
> > this
> > > point of view],
> > 

fs/erofs conversions are in our (Ira's and my) list. So I'm am happy to see 
that we can delete some entries because of your changes. :-)

> > In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
> > pages, only the one coming from the page cache. 
> > 
> > The other page didn't need the use of kmap_local_page() because it was 
> > allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't 
ever 
> > allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid 
the 
> > mapping and the nesting issues.
> > 
> > Did you check if you may solve the same way? 
> 
> That is not simple.  Currently we have compressed pages and decompressed
> pages (page cache or others), and they can be unmapped when either data
> is all consumed, so compressed pages can be unmapped first, or
> decompressed pages can be unmapped first.  That quite depends on which
> pages goes first.
> 
> I think such usage is a quite common pattern for decoder or encoder,
> you could take a look at z_erofs_lzma_decompress() in
> fs/erofs/decompressor_lzma.c.  

I haven't yet read that code, however I may attempt to propose a pattern for 
solving this kinds of issue, I mean where you don't know which page got mapped 
last...

It's not elegant but it may work. You have compressed and decompressed pages 
and you can't know in advance what page should be unmapped first because you 
can't know in which order they where mapped, right?

I'd use a variable to save two different values, each representing the last 
page mapped. When the code gets to the unmapping block (perhaps in an "out" 
label), just check what that variable contains. Depending on that value, say 
'c' or 'd', you will be able to know what must be unmapped for first. An "if / 
else" can do the work.

What do you think of this?

> So kmap() is still useful for such cases
> since I don't really care the HIGHMEM performance but correctness, but
> other alternative could churn/complex the map/unmap/remap pattern.

Sooner or later someone will have to address this issue and remove those 
kmap() call sites. We are working on this and hope to always figure out a way 
to work it out. 

I hope that what I wrote above may help, although I'm writing on a purely 
theoretically bases, since, as said, I haven't yet seen that code.

If due to my poor English I've not been able to convey my thoughts please let 
me know, so that I'll try to reword.

Thanks,

Fabio

> Thanks,
> Gao Xiang
> 
> > 
> > A little group of people are working to remove all kmap() and 
kmap_atomic() we 
> > meet across the whole kernel. I have not yet encountered conversions which 
> > cannot be made. Sometimes we may refactor, if what I said above cannot 
apply.
> > 
> > > but in order to make it all work properly, I will try to do
> > > stress test with 32-bit platform later. 
> > 
> > I use QEMU/KVM x86_32 VM, 6GB RAM, and a kernel with HIGHMEM64G enabled 
and an 
> > openSUSE Tumbleweed 32 distro. I've heard that Debian too provides an 
x86_32 
> > distribution. 
> > 
> > > Since it targets for the next cycle
> > > 6.2, I will do a full stress test in the next following weeks.
> > > 
> > > Thanks,
> > > Gao Xiang
> > > 
> > 
> > Thanks,
> > 
> > Fabio
> > 
>
  
Gao Xiang Oct. 20, 2022, 2:18 a.m. UTC | #9
On Wed, Oct 19, 2022 at 08:17:05PM +0200, Fabio M. De Francesco wrote:
> On Wednesday, October 19, 2022 1:36:55 AM CEST Gao Xiang wrote:
> > On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> > 
> > ...
> > 
> > > 
> > > > One of what I need to care is nested kmap() usage,
> > > > some unmap/remap order cannot be simply converted to kmap_local()
> > > 
> > > Correct about nesting. If we map A and then B, we must unmap B and then A.
> > > 
> > > However, as you seem to convey, not always unmappings in right order 
> (stack 
> > > based) is possible, sometimes because very long functions have many loop's 
> > > breaks and many goto exit labels.
> > > 
> > > > but I think
> > > > it's not the case for erofs_bread().  Actually EROFS has one of such 
> nested
> > > > kmap() usage, but I don't really care its performance on HIGHMEM 
> platforms,
> > > > so I think kmap() is still somewhat useful compared to kmap_local() from 
> > > this
> > > > point of view],
> > > 
> 
> fs/erofs conversions are in our (Ira's and my) list. So I'm am happy to see 
> that we can delete some entries because of your changes. :-)
> 
> > > In Btrfs I solved (thanks to David S.' advice) by mapping only one of two 
> > > pages, only the one coming from the page cache. 
> > > 
> > > The other page didn't need the use of kmap_local_page() because it was 
> > > allocated in the filesystem with "alloc_page(GFP_NOFS)". GFP_NOFS won't 
> ever 
> > > allocate from ZONE_HIGHMEM, therefore a direct page_address() could avoid 
> the 
> > > mapping and the nesting issues.
> > > 
> > > Did you check if you may solve the same way? 
> > 
> > That is not simple.  Currently we have compressed pages and decompressed
> > pages (page cache or others), and they can be unmapped when either data
> > is all consumed, so compressed pages can be unmapped first, or
> > decompressed pages can be unmapped first.  That quite depends on which
> > pages goes first.
> > 
> > I think such usage is a quite common pattern for decoder or encoder,
> > you could take a look at z_erofs_lzma_decompress() in
> > fs/erofs/decompressor_lzma.c.  
> 
> I haven't yet read that code, however I may attempt to propose a pattern for 
> solving this kinds of issue, I mean where you don't know which page got mapped 
> last...
> 
> It's not elegant but it may work. You have compressed and decompressed pages 
> and you can't know in advance what page should be unmapped first because you 
> can't know in which order they where mapped, right?
> 

Not really.

> I'd use a variable to save two different values, each representing the last 
> page mapped. When the code gets to the unmapping block (perhaps in an "out" 
> label), just check what that variable contains. Depending on that value, say 
> 'c' or 'd', you will be able to know what must be unmapped for first. An "if / 
> else" can do the work.

That is not the simple nested unmapped case as you said above, I could take
a very brief example:

1. map a decompresed page
2. map a compressed page
3. working
4. decompressed page is all consumed, unmap the current decompressed page
5. map the next decompressed page
6. working
7. decompressed page is all consumed, unmap the current decompressed page
8. map the next decompressed page
9. working
10. compressed page is all consumed, unmap the current compressed page
11. map the next compressed page
12. working
13. ... (anyway, unmap and remap a compressed page or a decompressed page
         in any order.)

until all process is finished.  by using kmap(), it's much simple to
implement this, but kmap_local(), it only complexes the code.

Thanks,
Gao Xiang

> 
> What do you think of this?
>
  
Ira Weiny Oct. 20, 2022, 3:17 a.m. UTC | #10
On Thu, Oct 20, 2022 at 10:18:36AM +0800, Gao Xiang wrote:
> On Wed, Oct 19, 2022 at 08:17:05PM +0200, Fabio M. De Francesco wrote:
> > On Wednesday, October 19, 2022 1:36:55 AM CEST Gao Xiang wrote:
> > > On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > > > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:

[snip]

> 
> That is not the simple nested unmapped case as you said above, I could take
> a very brief example:

Building on this.  The uncompressed pages always outnumber the compressed
pages, right?

> 
> 1. map a decompresed page
> 2. map a compressed page

First reverse these because you are going to need to map a new decompressed
page before another compressed page.  So:

1. map compressed
2. map decompressed

Then 4/5 and 7/8 become unmap/map new without issue.

> 3. working
> 4. decompressed page is all consumed, unmap the current decompressed page
> 5. map the next decompressed page
> 6. working
> 7. decompressed page is all consumed, unmap the current decompressed page
> 8. map the next decompressed page
> 9. working

This is more complicated but not overly so.

Simply

9.1 unmap decompressed

> 10. compressed page is all consumed, unmap the current compressed page
> 11. map the next compressed page

11.1 remap decompressed

> 12. working
> 13. ... (anyway, unmap and remap a compressed page or a decompressed page
>          in any order.)
> 
> until all process is finished.  by using kmap(), it's much simple to
> implement this, but kmap_local(), it only complexes the code.

Agreed kmap() is easier but I think this could work.

Basically you keep the compressed mapped first.

I also assume there is also a reverse of this so reverse the pages in that
case.

Thoughts?
Ira
  
Gao Xiang Oct. 20, 2022, 3:50 a.m. UTC | #11
On Wed, Oct 19, 2022 at 08:17:31PM -0700, Ira Weiny wrote:
> On Thu, Oct 20, 2022 at 10:18:36AM +0800, Gao Xiang wrote:
> > On Wed, Oct 19, 2022 at 08:17:05PM +0200, Fabio M. De Francesco wrote:
> > > On Wednesday, October 19, 2022 1:36:55 AM CEST Gao Xiang wrote:
> > > > On Wed, Oct 19, 2022 at 01:21:27AM +0200, Fabio M. De Francesco wrote:
> > > > > On Tuesday, October 18, 2022 11:29:21 PM CEST Gao Xiang wrote:
> 
> [snip]
> 
> > 
> > That is not the simple nested unmapped case as you said above, I could take
> > a very brief example:
> 
> Building on this.  The uncompressed pages always outnumber the compressed
> pages, right?

Yes, it's always true for EROFS case.

I think if the locking order is reversed I could unmap and remap the
pages in the correct order.  But as you said below, it just could work
but complex the code (I think if you have extra time you could check
the code first.)

So as I said before, I don't really care HIGHMEM performance so here
kmap_local() cannot benefit such case.  Anyway, it'd be much better
if kmap() is kept on my side anyway.

Thanks,
Gao Xiang

> 
> > 
> > 1. map a decompresed page
> > 2. map a compressed page
> 
> First reverse these because you are going to need to map a new decompressed
> page before another compressed page.  So:
> 
> 1. map compressed
> 2. map decompressed
> 
> Then 4/5 and 7/8 become unmap/map new without issue.
> 
> > 3. working
> > 4. decompressed page is all consumed, unmap the current decompressed page
> > 5. map the next decompressed page
> > 6. working
> > 7. decompressed page is all consumed, unmap the current decompressed page
> > 8. map the next decompressed page
> > 9. working
> 
> This is more complicated but not overly so.
> 
> Simply
> 
> 9.1 unmap decompressed
> 
> > 10. compressed page is all consumed, unmap the current compressed page
> > 11. map the next compressed page
> 
> 11.1 remap decompressed
> 
> > 12. working
> > 13. ... (anyway, unmap and remap a compressed page or a decompressed page
> >          in any order.)
> > 
> > until all process is finished.  by using kmap(), it's much simple to
> > implement this, but kmap_local(), it only complexes the code.
> 
> Agreed kmap() is easier but I think this could work.
> 
> Basically you keep the compressed mapped first.
> 
> I also assume there is also a reverse of this so reverse the pages in that
> case.
> 
> Thoughts?
> Ira
  
Jingbo Xu Dec. 1, 2022, 10:09 a.m. UTC | #12
On 10/18/22 6:53 PM, Gao Xiang wrote:
> Convert all mapped erofs_bread() users to use kmap_local_page()
> instead of kmap() or kmap_atomic().

Reviewed-and-tested-by: Jingbo Xu <jefflexu@linux.alibaba.com>

> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  fs/erofs/data.c     | 8 ++------
>  fs/erofs/internal.h | 3 +--
>  fs/erofs/xattr.c    | 8 ++++----
>  fs/erofs/zmap.c     | 4 ++--
>  4 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index fe8ac0e163f7..fe1ae80284bf 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -13,9 +13,7 @@
>  void erofs_unmap_metabuf(struct erofs_buf *buf)
>  {
>  	if (buf->kmap_type == EROFS_KMAP)
> -		kunmap(buf->page);
> -	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
> -		kunmap_atomic(buf->base);
> +		kunmap_local(buf->base);
>  	buf->base = NULL;
>  	buf->kmap_type = EROFS_NO_KMAP;
>  }
> @@ -54,9 +52,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
>  	}
>  	if (buf->kmap_type == EROFS_NO_KMAP) {
>  		if (type == EROFS_KMAP)
> -			buf->base = kmap(page);
> -		else if (type == EROFS_KMAP_ATOMIC)
> -			buf->base = kmap_atomic(page);
> +			buf->base = kmap_local_page(page);
>  		buf->kmap_type = type;
>  	} else if (buf->kmap_type != type) {
>  		DBG_BUGON(1);
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 1701df48c446..67dc8e177211 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -253,8 +253,7 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
>  
>  enum erofs_kmap_type {
>  	EROFS_NO_KMAP,		/* don't map the buffer */
> -	EROFS_KMAP,		/* use kmap() to map the buffer */
> -	EROFS_KMAP_ATOMIC,	/* use kmap_atomic() to map the buffer */
> +	EROFS_KMAP,		/* use kmap_local_page() to map the buffer */
>  };
>  
>  struct erofs_buf {
> diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
> index 8106bcb5a38d..a62fb8a3318a 100644
> --- a/fs/erofs/xattr.c
> +++ b/fs/erofs/xattr.c
> @@ -148,7 +148,7 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
>  
>  	it->blkaddr += erofs_blknr(it->ofs);
>  	it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
> -				       EROFS_KMAP_ATOMIC);
> +				       EROFS_KMAP);
>  	if (IS_ERR(it->kaddr))
>  		return PTR_ERR(it->kaddr);
>  	it->ofs = erofs_blkoff(it->ofs);
> @@ -174,7 +174,7 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
>  	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
>  
>  	it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
> -				       EROFS_KMAP_ATOMIC);
> +				       EROFS_KMAP);
>  	if (IS_ERR(it->kaddr))
>  		return PTR_ERR(it->kaddr);
>  	return vi->xattr_isize - xattr_header_sz;
> @@ -368,7 +368,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
>  
>  		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
>  		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
> -						  EROFS_KMAP_ATOMIC);
> +						  EROFS_KMAP);
>  		if (IS_ERR(it->it.kaddr))
>  			return PTR_ERR(it->it.kaddr);
>  		it->it.blkaddr = blkaddr;
> @@ -580,7 +580,7 @@ static int shared_listxattr(struct listxattr_iter *it)
>  
>  		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
>  		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
> -						  EROFS_KMAP_ATOMIC);
> +						  EROFS_KMAP);
>  		if (IS_ERR(it->it.kaddr))
>  			return PTR_ERR(it->it.kaddr);
>  		it->it.blkaddr = blkaddr;
> diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
> index 0bb66927e3d0..749a5ac943f4 100644
> --- a/fs/erofs/zmap.c
> +++ b/fs/erofs/zmap.c
> @@ -178,7 +178,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
>  	unsigned int advise, type;
>  
>  	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
> -				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
> +				      erofs_blknr(pos), EROFS_KMAP);
>  	if (IS_ERR(m->kaddr))
>  		return PTR_ERR(m->kaddr);
>  
> @@ -416,7 +416,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
>  out:
>  	pos += lcn * (1 << amortizedshift);
>  	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
> -				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
> +				      erofs_blknr(pos), EROFS_KMAP);
>  	if (IS_ERR(m->kaddr))
>  		return PTR_ERR(m->kaddr);
>  	return unpack_compacted_index(m, amortizedshift, pos, lookahead);
  
Chao Yu Dec. 7, 2022, 1:16 a.m. UTC | #13
On 2022/10/18 18:53, Gao Xiang wrote:
> Convert all mapped erofs_bread() users to use kmap_local_page()
> instead of kmap() or kmap_atomic().
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>

Reviewed-by: Chao Yu <chao@kernel.org>

Thanks,
  

Patch

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fe8ac0e163f7..fe1ae80284bf 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -13,9 +13,7 @@ 
 void erofs_unmap_metabuf(struct erofs_buf *buf)
 {
 	if (buf->kmap_type == EROFS_KMAP)
-		kunmap(buf->page);
-	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
-		kunmap_atomic(buf->base);
+		kunmap_local(buf->base);
 	buf->base = NULL;
 	buf->kmap_type = EROFS_NO_KMAP;
 }
@@ -54,9 +52,7 @@  void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
 	}
 	if (buf->kmap_type == EROFS_NO_KMAP) {
 		if (type == EROFS_KMAP)
-			buf->base = kmap(page);
-		else if (type == EROFS_KMAP_ATOMIC)
-			buf->base = kmap_atomic(page);
+			buf->base = kmap_local_page(page);
 		buf->kmap_type = type;
 	} else if (buf->kmap_type != type) {
 		DBG_BUGON(1);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 1701df48c446..67dc8e177211 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -253,8 +253,7 @@  static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
 
 enum erofs_kmap_type {
 	EROFS_NO_KMAP,		/* don't map the buffer */
-	EROFS_KMAP,		/* use kmap() to map the buffer */
-	EROFS_KMAP_ATOMIC,	/* use kmap_atomic() to map the buffer */
+	EROFS_KMAP,		/* use kmap_local_page() to map the buffer */
 };
 
 struct erofs_buf {
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 8106bcb5a38d..a62fb8a3318a 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -148,7 +148,7 @@  static inline int xattr_iter_fixup(struct xattr_iter *it)
 
 	it->blkaddr += erofs_blknr(it->ofs);
 	it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
-				       EROFS_KMAP_ATOMIC);
+				       EROFS_KMAP);
 	if (IS_ERR(it->kaddr))
 		return PTR_ERR(it->kaddr);
 	it->ofs = erofs_blkoff(it->ofs);
@@ -174,7 +174,7 @@  static int inline_xattr_iter_begin(struct xattr_iter *it,
 	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
 
 	it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
-				       EROFS_KMAP_ATOMIC);
+				       EROFS_KMAP);
 	if (IS_ERR(it->kaddr))
 		return PTR_ERR(it->kaddr);
 	return vi->xattr_isize - xattr_header_sz;
@@ -368,7 +368,7 @@  static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
 
 		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
 		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
-						  EROFS_KMAP_ATOMIC);
+						  EROFS_KMAP);
 		if (IS_ERR(it->it.kaddr))
 			return PTR_ERR(it->it.kaddr);
 		it->it.blkaddr = blkaddr;
@@ -580,7 +580,7 @@  static int shared_listxattr(struct listxattr_iter *it)
 
 		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
 		it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
-						  EROFS_KMAP_ATOMIC);
+						  EROFS_KMAP);
 		if (IS_ERR(it->it.kaddr))
 			return PTR_ERR(it->it.kaddr);
 		it->it.blkaddr = blkaddr;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 0bb66927e3d0..749a5ac943f4 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -178,7 +178,7 @@  static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 	unsigned int advise, type;
 
 	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
-				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
+				      erofs_blknr(pos), EROFS_KMAP);
 	if (IS_ERR(m->kaddr))
 		return PTR_ERR(m->kaddr);
 
@@ -416,7 +416,7 @@  static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 out:
 	pos += lcn * (1 << amortizedshift);
 	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
-				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
+				      erofs_blknr(pos), EROFS_KMAP);
 	if (IS_ERR(m->kaddr))
 		return PTR_ERR(m->kaddr);
 	return unpack_compacted_index(m, amortizedshift, pos, lookahead);