[17/21] filemap: add FGP_CREAT_ONLY

Message ID 20240227232100.478238-18-pbonzini@redhat.com
State New
Headers
Series TDX/SNP part 1 of n, for 6.9 |

Commit Message

Paolo Bonzini Feb. 27, 2024, 11:20 p.m. UTC
  Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/pagemap.h | 2 ++
 mm/filemap.c            | 4 ++++
 2 files changed, 6 insertions(+)
  

Comments

Sean Christopherson Feb. 28, 2024, 2:14 a.m. UTC | #1
On Tue, Feb 27, 2024, Paolo Bonzini wrote:

This needs a changelog, and also needs to be Cc'd to someone(s) that can give it
a thumbs up.

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/linux/pagemap.h | 2 ++
>  mm/filemap.c            | 4 ++++
>  2 files changed, 6 insertions(+)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 2df35e65557d..e8ac0b32f84d 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
>   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
>   *   added to the page cache and the VM's LRU list.  The folio is
>   *   returned locked.
> + * * %FGP_CREAT_ONLY - Fail if a folio is not present
>   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
>   *   folio is already in cache.  If the folio was allocated, unlock it
>   *   before returning so the caller can do the same dance.
> @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t;
>  #define FGP_NOWAIT		((__force fgf_t)0x00000020)
>  #define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
>  #define FGP_STABLE		((__force fgf_t)0x00000080)
> +#define FGP_CREAT_ONLY		((__force fgf_t)0x00000100)
>  #define FGF_GET_ORDER(fgf)	(((__force unsigned)fgf) >> 26)	/* top 6 bits */
>  
>  #define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 750e779c23db..d5107bd0cd09 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>  		folio = NULL;
>  	if (!folio)
>  		goto no_page;
> +	if (fgp_flags & FGP_CREAT_ONLY) {
> +		folio_put(folio);
> +		return ERR_PTR(-EEXIST);
> +	}
>  
>  	if (fgp_flags & FGP_LOCK) {
>  		if (fgp_flags & FGP_NOWAIT) {
> -- 
> 2.39.0
> 
>
  
Yosry Ahmed Feb. 28, 2024, 2:17 a.m. UTC | #2
On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Feb 27, 2024, Paolo Bonzini wrote:
>
> This needs a changelog, and also needs to be Cc'd to someone(s) that can give it
> a thumbs up.

+Matthew Wilcox

>
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  include/linux/pagemap.h | 2 ++
> >  mm/filemap.c            | 4 ++++
> >  2 files changed, 6 insertions(+)
> >
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index 2df35e65557d..e8ac0b32f84d 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
> >   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
> >   *   added to the page cache and the VM's LRU list.  The folio is
> >   *   returned locked.
> > + * * %FGP_CREAT_ONLY - Fail if a folio is not present
> >   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
> >   *   folio is already in cache.  If the folio was allocated, unlock it
> >   *   before returning so the caller can do the same dance.
> > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t;
> >  #define FGP_NOWAIT           ((__force fgf_t)0x00000020)
> >  #define FGP_FOR_MMAP         ((__force fgf_t)0x00000040)
> >  #define FGP_STABLE           ((__force fgf_t)0x00000080)
> > +#define FGP_CREAT_ONLY               ((__force fgf_t)0x00000100)
> >  #define FGF_GET_ORDER(fgf)   (((__force unsigned)fgf) >> 26) /* top 6 bits */
> >
> >  #define FGP_WRITEBEGIN               (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 750e779c23db..d5107bd0cd09 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> >               folio = NULL;
> >       if (!folio)
> >               goto no_page;
> > +     if (fgp_flags & FGP_CREAT_ONLY) {
> > +             folio_put(folio);
> > +             return ERR_PTR(-EEXIST);
> > +     }
> >
> >       if (fgp_flags & FGP_LOCK) {
> >               if (fgp_flags & FGP_NOWAIT) {
> > --
> > 2.39.0
> >
> >
>
  
Matthew Wilcox Feb. 28, 2024, 1:15 p.m. UTC | #3
On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote:
> On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Tue, Feb 27, 2024, Paolo Bonzini wrote:
> >
> > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it
> > a thumbs up.
> 
> +Matthew Wilcox

If only there were an entry in MAINTAINERS for filemap.c ...

This looks bogus to me, and if it's not bogus, it's incomplete.
But it's hard to judge without a commit message that describes what it's
supposed to mean.

> >
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > ---
> > >  include/linux/pagemap.h | 2 ++
> > >  mm/filemap.c            | 4 ++++
> > >  2 files changed, 6 insertions(+)
> > >
> > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > > index 2df35e65557d..e8ac0b32f84d 100644
> > > --- a/include/linux/pagemap.h
> > > +++ b/include/linux/pagemap.h
> > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
> > >   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
> > >   *   added to the page cache and the VM's LRU list.  The folio is
> > >   *   returned locked.
> > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present
> > >   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
> > >   *   folio is already in cache.  If the folio was allocated, unlock it
> > >   *   before returning so the caller can do the same dance.
> > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t;
> > >  #define FGP_NOWAIT           ((__force fgf_t)0x00000020)
> > >  #define FGP_FOR_MMAP         ((__force fgf_t)0x00000040)
> > >  #define FGP_STABLE           ((__force fgf_t)0x00000080)
> > > +#define FGP_CREAT_ONLY               ((__force fgf_t)0x00000100)
> > >  #define FGF_GET_ORDER(fgf)   (((__force unsigned)fgf) >> 26) /* top 6 bits */
> > >
> > >  #define FGP_WRITEBEGIN               (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
> > > diff --git a/mm/filemap.c b/mm/filemap.c
> > > index 750e779c23db..d5107bd0cd09 100644
> > > --- a/mm/filemap.c
> > > +++ b/mm/filemap.c
> > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> > >               folio = NULL;
> > >       if (!folio)
> > >               goto no_page;
> > > +     if (fgp_flags & FGP_CREAT_ONLY) {
> > > +             folio_put(folio);
> > > +             return ERR_PTR(-EEXIST);
> > > +     }
> > >
> > >       if (fgp_flags & FGP_LOCK) {
> > >               if (fgp_flags & FGP_NOWAIT) {
> > > --
> > > 2.39.0
> > >
> > >
> >
  
Paolo Bonzini Feb. 28, 2024, 1:28 p.m. UTC | #4
On Wed, Feb 28, 2024 at 2:15 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote:
> > On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > On Tue, Feb 27, 2024, Paolo Bonzini wrote:
> > >
> > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it
> > > a thumbs up.
> >
> > +Matthew Wilcox
>
> If only there were an entry in MAINTAINERS for filemap.c ...

Not CCing you (or mm in general) was intentional because I first
wanted a review of the KVM APIs; of course I wouldn't have committed
it without an Acked-by. But yeah, not writing the changelog yet was
pure laziness.

Since you're here: KVM would like to add a ioctl to encrypt and
install a page into guest_memfd, in preparation for launching an
encrypted guest. For this API we want to rule out the possibility of
overwriting a page that is already in the guest_memfd's filemap,
therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT
into__filemap_get_folio. Do you think this is bogus...

> This looks bogus to me, and if it's not bogus, it's incomplete.

.. or if not, what incompleteness can you spot?

Thanks,

Paolo

> But it's hard to judge without a commit message that describes what it's
> supposed to mean.
>
> > >
> > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > ---
> > > >  include/linux/pagemap.h | 2 ++
> > > >  mm/filemap.c            | 4 ++++
> > > >  2 files changed, 6 insertions(+)
> > > >
> > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > > > index 2df35e65557d..e8ac0b32f84d 100644
> > > > --- a/include/linux/pagemap.h
> > > > +++ b/include/linux/pagemap.h
> > > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
> > > >   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
> > > >   *   added to the page cache and the VM's LRU list.  The folio is
> > > >   *   returned locked.
> > > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present
> > > >   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
> > > >   *   folio is already in cache.  If the folio was allocated, unlock it
> > > >   *   before returning so the caller can do the same dance.
> > > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t;
> > > >  #define FGP_NOWAIT           ((__force fgf_t)0x00000020)
> > > >  #define FGP_FOR_MMAP         ((__force fgf_t)0x00000040)
> > > >  #define FGP_STABLE           ((__force fgf_t)0x00000080)
> > > > +#define FGP_CREAT_ONLY               ((__force fgf_t)0x00000100)
> > > >  #define FGF_GET_ORDER(fgf)   (((__force unsigned)fgf) >> 26) /* top 6 bits */
> > > >
> > > >  #define FGP_WRITEBEGIN               (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
> > > > diff --git a/mm/filemap.c b/mm/filemap.c
> > > > index 750e779c23db..d5107bd0cd09 100644
> > > > --- a/mm/filemap.c
> > > > +++ b/mm/filemap.c
> > > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> > > >               folio = NULL;
> > > >       if (!folio)
> > > >               goto no_page;
> > > > +     if (fgp_flags & FGP_CREAT_ONLY) {
> > > > +             folio_put(folio);
> > > > +             return ERR_PTR(-EEXIST);
> > > > +     }
> > > >
> > > >       if (fgp_flags & FGP_LOCK) {
> > > >               if (fgp_flags & FGP_NOWAIT) {
> > > > --
> > > > 2.39.0
> > > >
> > > >
> > >
>
  
Matthew Wilcox Feb. 28, 2024, 7:24 p.m. UTC | #5
On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote:
> Since you're here: KVM would like to add a ioctl to encrypt and
> install a page into guest_memfd, in preparation for launching an
> encrypted guest. For this API we want to rule out the possibility of
> overwriting a page that is already in the guest_memfd's filemap,
> therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT
> into__filemap_get_folio. Do you think this is bogus...

Would it work to start out by either asserting the memfd is empty of
pages, or by evicting any existing pages?  Both those seem nicer than
starting, realising you've got some unencrypted memory and aborting.

> > This looks bogus to me, and if it's not bogus, it's incomplete.
> 
> ... or if not, what incompleteness can you spot?

The part where we race another caller passing FGP_CREAT_ONLY and one gets
an EEXIST back from filemap_add_folio().  Maybe that's not something
that can happen in your use case, but it's at least semantics that
need documenting.
  
Paolo Bonzini Feb. 28, 2024, 8:17 p.m. UTC | #6
On Wed, Feb 28, 2024 at 8:24 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote:
> > Since you're here: KVM would like to add a ioctl to encrypt and
> > install a page into guest_memfd, in preparation for launching an
> > encrypted guest. For this API we want to rule out the possibility of
> > overwriting a page that is already in the guest_memfd's filemap,
> > therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT
> > into__filemap_get_folio. Do you think this is bogus...
>
> Would it work to start out by either asserting the memfd is empty of
> pages, or by evicting any existing pages?  Both those seem nicer than
> starting, realising you've got some unencrypted memory and aborting.

Unfortunately it would be quite ugly to force userspace to do all the
initialization in one go. For example, there are different kinds of
pages that probably would be initialized at different points (e.g.
before vs. after vCPUs are created, because the initial vCPU state is
also encrypted).

The thing that I want to protect against is userspace trying to
initialize the same encrypted page twice.

> > > This looks bogus to me, and if it's not bogus, it's incomplete.
> >
> > ... or if not, what incompleteness can you spot?
>
> The part where we race another caller passing FGP_CREAT_ONLY and one gets
> an EEXIST back from filemap_add_folio().  Maybe that's not something
> that can happen in your use case, but it's at least semantics that
> need documenting.

From the point of view of filemap_add_folio(), one of the racers wins
and one fails. It doesn't matter to filemap.c if the missing
synchronization is in the kernel or in userspace. In the case of KVM,
the ioctl will return the number of pages before it found an existing
page, or -EEXIST if that number is zero (similar to what nonblocking
read does with EAGAIN).

I'll improve the documentation and changelog and make sure to Cc you
on the next version.

Thanks again!

Paolo
  
Xu Yilun March 4, 2024, 2:55 a.m. UTC | #7
On Wed, Feb 28, 2024 at 02:28:45PM +0100, Paolo Bonzini wrote:
> On Wed, Feb 28, 2024 at 2:15 PM Matthew Wilcox <willy@infradead.org> wrote:
> >
> > On Tue, Feb 27, 2024 at 06:17:34PM -0800, Yosry Ahmed wrote:
> > > On Tue, Feb 27, 2024 at 6:15 PM Sean Christopherson <seanjc@google.com> wrote:
> > > >
> > > > On Tue, Feb 27, 2024, Paolo Bonzini wrote:
> > > >
> > > > This needs a changelog, and also needs to be Cc'd to someone(s) that can give it
> > > > a thumbs up.
> > >
> > > +Matthew Wilcox
> >
> > If only there were an entry in MAINTAINERS for filemap.c ...
> 
> Not CCing you (or mm in general) was intentional because I first
> wanted a review of the KVM APIs; of course I wouldn't have committed
> it without an Acked-by. But yeah, not writing the changelog yet was
> pure laziness.
> 
> Since you're here: KVM would like to add a ioctl to encrypt and
> install a page into guest_memfd, in preparation for launching an
> encrypted guest. For this API we want to rule out the possibility of
> overwriting a page that is already in the guest_memfd's filemap,
> therefore this API would pass FGP_CREAT_ONLY|FGP_CREAT
> into__filemap_get_folio. Do you think this is bogus...
> 
> > This looks bogus to me, and if it's not bogus, it's incomplete.
> 
> ... or if not, what incompleteness can you spot?
> 
> Thanks,
> 
> Paolo
> 
> > But it's hard to judge without a commit message that describes what it's
> > supposed to mean.
> >
> > > >
> > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > > ---
> > > > >  include/linux/pagemap.h | 2 ++
> > > > >  mm/filemap.c            | 4 ++++
> > > > >  2 files changed, 6 insertions(+)
> > > > >
> > > > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > > > > index 2df35e65557d..e8ac0b32f84d 100644
> > > > > --- a/include/linux/pagemap.h
> > > > > +++ b/include/linux/pagemap.h
> > > > > @@ -586,6 +586,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
> > > > >   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
> > > > >   *   added to the page cache and the VM's LRU list.  The folio is
> > > > >   *   returned locked.
> > > > > + * * %FGP_CREAT_ONLY - Fail if a folio is not present
                                                     ^
So should be: Fail if a folio is present.

Thanks,
Yilun

> > > > >   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
> > > > >   *   folio is already in cache.  If the folio was allocated, unlock it
> > > > >   *   before returning so the caller can do the same dance.
> > > > > @@ -606,6 +607,7 @@ typedef unsigned int __bitwise fgf_t;
> > > > >  #define FGP_NOWAIT           ((__force fgf_t)0x00000020)
> > > > >  #define FGP_FOR_MMAP         ((__force fgf_t)0x00000040)
> > > > >  #define FGP_STABLE           ((__force fgf_t)0x00000080)
> > > > > +#define FGP_CREAT_ONLY               ((__force fgf_t)0x00000100)
> > > > >  #define FGF_GET_ORDER(fgf)   (((__force unsigned)fgf) >> 26) /* top 6 bits */
> > > > >
> > > > >  #define FGP_WRITEBEGIN               (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
> > > > > diff --git a/mm/filemap.c b/mm/filemap.c
> > > > > index 750e779c23db..d5107bd0cd09 100644
> > > > > --- a/mm/filemap.c
> > > > > +++ b/mm/filemap.c
> > > > > @@ -1854,6 +1854,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> > > > >               folio = NULL;
> > > > >       if (!folio)
> > > > >               goto no_page;
> > > > > +     if (fgp_flags & FGP_CREAT_ONLY) {
> > > > > +             folio_put(folio);
> > > > > +             return ERR_PTR(-EEXIST);
> > > > > +     }
> > > > >
> > > > >       if (fgp_flags & FGP_LOCK) {
> > > > >               if (fgp_flags & FGP_NOWAIT) {
> > > > > --
> > > > > 2.39.0
> > > > >
> > > > >
> > > >
> >
> 
>
  

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..e8ac0b32f84d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -586,6 +586,7 @@  pgoff_t page_cache_prev_miss(struct address_space *mapping,
  * * %FGP_CREAT - If no folio is present then a new folio is allocated,
  *   added to the page cache and the VM's LRU list.  The folio is
  *   returned locked.
+ * * %FGP_CREAT_ONLY - Fail if a folio is not present
  * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
  *   folio is already in cache.  If the folio was allocated, unlock it
  *   before returning so the caller can do the same dance.
@@ -606,6 +607,7 @@  typedef unsigned int __bitwise fgf_t;
 #define FGP_NOWAIT		((__force fgf_t)0x00000020)
 #define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
 #define FGP_STABLE		((__force fgf_t)0x00000080)
+#define FGP_CREAT_ONLY		((__force fgf_t)0x00000100)
 #define FGF_GET_ORDER(fgf)	(((__force unsigned)fgf) >> 26)	/* top 6 bits */
 
 #define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..d5107bd0cd09 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1854,6 +1854,10 @@  struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		folio = NULL;
 	if (!folio)
 		goto no_page;
+	if (fgp_flags & FGP_CREAT_ONLY) {
+		folio_put(folio);
+		return ERR_PTR(-EEXIST);
+	}
 
 	if (fgp_flags & FGP_LOCK) {
 		if (fgp_flags & FGP_NOWAIT) {