[v4,2/5] fs: Add fchmodat2()

Message ID f2a846ef495943c5d101011eebcf01179d0c7b61.1689092120.git.legion@kernel.org
State New
Headers
Series Add a new fchmodat2() syscall |

Commit Message

Alexey Gladkov July 11, 2023, 4:16 p.m. UTC
  On the userspace side fchmodat(3) is implemented as a wrapper
function which implements the POSIX-specified interface. This
interface differs from the underlying kernel system call, which does not
have a flags argument. Most implementations require procfs [1][2].

There doesn't appear to be a good userspace workaround for this issue
but the implementation in the kernel is pretty straight-forward.

The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
unlike existing fchmodat.

[1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
[2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28

Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 fs/open.c                | 18 ++++++++++++++----
 include/linux/syscalls.h |  2 ++
 2 files changed, 16 insertions(+), 4 deletions(-)
  

Comments

Christian Brauner July 11, 2023, 5:05 p.m. UTC | #1
On Tue, Jul 11, 2023 at 06:16:04PM +0200, Alexey Gladkov wrote:
> On the userspace side fchmodat(3) is implemented as a wrapper
> function which implements the POSIX-specified interface. This
> interface differs from the underlying kernel system call, which does not
> have a flags argument. Most implementations require procfs [1][2].
> 
> There doesn't appear to be a good userspace workaround for this issue
> but the implementation in the kernel is pretty straight-forward.
> 
> The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
> unlike existing fchmodat.
> 
> [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
> [2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28
> 
> Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
> Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
> Signed-off-by: Alexey Gladkov <legion@kernel.org>
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> ---
>  fs/open.c                | 18 ++++++++++++++----
>  include/linux/syscalls.h |  2 ++
>  2 files changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/open.c b/fs/open.c
> index 0c55c8e7f837..39a7939f0d00 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -671,11 +671,11 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
>  	return err;
>  }
>  
> -static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)

Should all be unsigned instead of int here for flags. We also had a
documentation update to that effect but smh never sent it.
user_path_at() itself takes an unsigned as well.

I'll fix that up though.
  
Aleksa Sarai July 25, 2023, 4:36 p.m. UTC | #2
On 2023-07-11, Alexey Gladkov <legion@kernel.org> wrote:
> On the userspace side fchmodat(3) is implemented as a wrapper
> function which implements the POSIX-specified interface. This
> interface differs from the underlying kernel system call, which does not
> have a flags argument. Most implementations require procfs [1][2].
> 
> There doesn't appear to be a good userspace workaround for this issue
> but the implementation in the kernel is pretty straight-forward.
> 
> The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
> unlike existing fchmodat.
> 
> [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
> [2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28
> 
> Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
> Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
> Signed-off-by: Alexey Gladkov <legion@kernel.org>
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> ---
>  fs/open.c                | 18 ++++++++++++++----
>  include/linux/syscalls.h |  2 ++
>  2 files changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/open.c b/fs/open.c
> index 0c55c8e7f837..39a7939f0d00 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -671,11 +671,11 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
>  	return err;
>  }
>  
> -static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)

I think it'd be much neater to do the conversion of AT_ flags here and
pass 0 as a flags argument for all of the wrappers (this is how most of
the other xyz(), fxyz(), fxyzat() syscall wrappers are done IIRC).

>  {
>  	struct path path;
>  	int error;
> -	unsigned int lookup_flags = LOOKUP_FOLLOW;
> +
>  retry:
>  	error = user_path_at(dfd, filename, lookup_flags, &path);
>  	if (!error) {
> @@ -689,15 +689,25 @@ static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
>  	return error;
>  }
>  
> +SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
> +		umode_t, mode, int, flags)
> +{
> +	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
> +		return -EINVAL;

We almost certainly want to support AT_EMPTY_PATH at the same time.
Otherwise userspace will still need to go through /proc when trying to
chmod a file handle they have.

> +
> +	return do_fchmodat(dfd, filename, mode,
> +			flags & AT_SYMLINK_NOFOLLOW ? 0 : LOOKUP_FOLLOW);
> +}
> +
>  SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
>  		umode_t, mode)
>  {
> -	return do_fchmodat(dfd, filename, mode);
> +	return do_fchmodat(dfd, filename, mode, LOOKUP_FOLLOW);
>  }
>  
>  SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
>  {
> -	return do_fchmodat(AT_FDCWD, filename, mode);
> +	return do_fchmodat(AT_FDCWD, filename, mode, LOOKUP_FOLLOW);
>  }
>  
>  /*
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 584f404bf868..6e852279fbc3 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -440,6 +440,8 @@ asmlinkage long sys_chroot(const char __user *filename);
>  asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
>  asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
>  			     umode_t mode);
> +asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
> +			     umode_t mode, int flags);
>  asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
>  			     gid_t group, int flag);
>  asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
> -- 
> 2.33.8
>
  
Alexey Gladkov July 26, 2023, 1:45 p.m. UTC | #3
On Wed, Jul 26, 2023 at 02:36:25AM +1000, Aleksa Sarai wrote:
> On 2023-07-11, Alexey Gladkov <legion@kernel.org> wrote:
> > On the userspace side fchmodat(3) is implemented as a wrapper
> > function which implements the POSIX-specified interface. This
> > interface differs from the underlying kernel system call, which does not
> > have a flags argument. Most implementations require procfs [1][2].
> > 
> > There doesn't appear to be a good userspace workaround for this issue
> > but the implementation in the kernel is pretty straight-forward.
> > 
> > The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
> > unlike existing fchmodat.
> > 
> > [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
> > [2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28
> > 
> > Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
> > Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
> > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > Acked-by: Arnd Bergmann <arnd@arndb.de>
> > ---
> >  fs/open.c                | 18 ++++++++++++++----
> >  include/linux/syscalls.h |  2 ++
> >  2 files changed, 16 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/open.c b/fs/open.c
> > index 0c55c8e7f837..39a7939f0d00 100644
> > --- a/fs/open.c
> > +++ b/fs/open.c
> > @@ -671,11 +671,11 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
> >  	return err;
> >  }
> >  
> > -static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> > +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)
> 
> I think it'd be much neater to do the conversion of AT_ flags here and
> pass 0 as a flags argument for all of the wrappers (this is how most of
> the other xyz(), fxyz(), fxyzat() syscall wrappers are done IIRC).

I just addressed the Al Viro's suggestion.

https://lore.kernel.org/lkml/20190717014802.GS17978@ZenIV.linux.org.uk/

> >  {
> >  	struct path path;
> >  	int error;
> > -	unsigned int lookup_flags = LOOKUP_FOLLOW;
> > +
> >  retry:
> >  	error = user_path_at(dfd, filename, lookup_flags, &path);
> >  	if (!error) {
> > @@ -689,15 +689,25 @@ static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> >  	return error;
> >  }
> >  
> > +SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
> > +		umode_t, mode, int, flags)
> > +{
> > +	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
> > +		return -EINVAL;
> 
> We almost certainly want to support AT_EMPTY_PATH at the same time.
> Otherwise userspace will still need to go through /proc when trying to
> chmod a file handle they have.

I'm not sure I understand. Can you explain what you mean?
  
David Laight July 27, 2023, 9:01 a.m. UTC | #4
From: Aleksa Sarai
> Sent: 25 July 2023 17:36
...
> We almost certainly want to support AT_EMPTY_PATH at the same time.
> Otherwise userspace will still need to go through /proc when trying to
> chmod a file handle they have.

That can't be allowed.

Just because a process has a file open and write access to
the directory that contains it doesn't mean they are allowed
to change the file permissions.

They also need directory search access from a directory
they have open through to the containing directory.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Christian Brauner July 27, 2023, 10:26 a.m. UTC | #5
> > I think it'd be much neater to do the conversion of AT_ flags here and
> > pass 0 as a flags argument for all of the wrappers (this is how most of
> > the other xyz(), fxyz(), fxyzat() syscall wrappers are done IIRC).

I've fixed that up in-tree.
  
Andreas Schwab July 27, 2023, 4:28 p.m. UTC | #6
On Jul 27 2023, David Laight wrote:

> From: Aleksa Sarai
>> Sent: 25 July 2023 17:36
> ...
>> We almost certainly want to support AT_EMPTY_PATH at the same time.
>> Otherwise userspace will still need to go through /proc when trying to
>> chmod a file handle they have.
>
> That can't be allowed.

IIUC, fchmodat2(fd, "", m, AT_EMPTY_PATH) is equivalent to fchmod(fd,
m).  With that, new architectures only need to implement the fchmodat2
syscall to cover all chmod variants.
  
dalias@libc.org July 27, 2023, 4:31 p.m. UTC | #7
On Thu, Jul 27, 2023 at 09:01:06AM +0000, David Laight wrote:
> From: Aleksa Sarai
> > Sent: 25 July 2023 17:36
> ....
> > We almost certainly want to support AT_EMPTY_PATH at the same time.
> > Otherwise userspace will still need to go through /proc when trying to
> > chmod a file handle they have.
> 
> That can't be allowed.
> 
> Just because a process has a file open and write access to
> the directory that contains it doesn't mean they are allowed
> to change the file permissions.
> 
> They also need directory search access from a directory
> they have open through to the containing directory.

Am I missing something? How is this different from fchmod?

Rich
  
Christian Brauner July 27, 2023, 5:02 p.m. UTC | #8
On Thu, Jul 27, 2023 at 06:28:53PM +0200, Andreas Schwab wrote:
> On Jul 27 2023, David Laight wrote:
> 
> > From: Aleksa Sarai
> >> Sent: 25 July 2023 17:36
> > ...
> >> We almost certainly want to support AT_EMPTY_PATH at the same time.
> >> Otherwise userspace will still need to go through /proc when trying to
> >> chmod a file handle they have.
> >
> > That can't be allowed.
> 
> IIUC, fchmodat2(fd, "", m, AT_EMPTY_PATH) is equivalent to fchmod(fd,
> m).  With that, new architectures only need to implement the fchmodat2
> syscall to cover all chmod variants.

There's a difference though as fchmod() doesn't work with O_PATH file
descriptors while AT_EMPTY_PATH does. Similar to how fchown() doesn't
work with O_PATH file descriptors.

However, we do allow AT_EMPTY_PATH with fchownat() so there's no reason
to not allow it for fchmodat2().

But it's a bit of a shame that O_PATH looks less and less like O_PATH.
It came from can-do-barely-anything to can-do-quite-a-lot-of-things over
the years.

In any case, AT_EMPTY_PATH for fchmodat2() can be an additional patch on
top.
  
Aleksa Sarai July 27, 2023, 5:12 p.m. UTC | #9
On 2023-07-26, Alexey Gladkov <legion@kernel.org> wrote:
> On Wed, Jul 26, 2023 at 02:36:25AM +1000, Aleksa Sarai wrote:
> > On 2023-07-11, Alexey Gladkov <legion@kernel.org> wrote:
> > > On the userspace side fchmodat(3) is implemented as a wrapper
> > > function which implements the POSIX-specified interface. This
> > > interface differs from the underlying kernel system call, which does not
> > > have a flags argument. Most implementations require procfs [1][2].
> > > 
> > > There doesn't appear to be a good userspace workaround for this issue
> > > but the implementation in the kernel is pretty straight-forward.
> > > 
> > > The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
> > > unlike existing fchmodat.
> > > 
> > > [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
> > > [2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28
> > > 
> > > Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
> > > Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
> > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > Acked-by: Arnd Bergmann <arnd@arndb.de>
> > > ---
> > >  fs/open.c                | 18 ++++++++++++++----
> > >  include/linux/syscalls.h |  2 ++
> > >  2 files changed, 16 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/open.c b/fs/open.c
> > > index 0c55c8e7f837..39a7939f0d00 100644
> > > --- a/fs/open.c
> > > +++ b/fs/open.c
> > > @@ -671,11 +671,11 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
> > >  	return err;
> > >  }
> > >  
> > > -static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> > > +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)
> > 
> > I think it'd be much neater to do the conversion of AT_ flags here and
> > pass 0 as a flags argument for all of the wrappers (this is how most of
> > the other xyz(), fxyz(), fxyzat() syscall wrappers are done IIRC).
> 
> I just addressed the Al Viro's suggestion.
> 
> https://lore.kernel.org/lkml/20190717014802.GS17978@ZenIV.linux.org.uk/

I think Al misspoke, because he also said "pass it 0 as an extra
argument", but you actually have to pass LOOKUP_FOLLOW from the
wrappers. If you look at how faccessat2 and faccessat are implemented,
it follows the behaviour I described.

> > >  {
> > >  	struct path path;
> > >  	int error;
> > > -	unsigned int lookup_flags = LOOKUP_FOLLOW;
> > > +
> > >  retry:
> > >  	error = user_path_at(dfd, filename, lookup_flags, &path);
> > >  	if (!error) {
> > > @@ -689,15 +689,25 @@ static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> > >  	return error;
> > >  }
> > >  
> > > +SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
> > > +		umode_t, mode, int, flags)
> > > +{
> > > +	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
> > > +		return -EINVAL;
> > 
> > We almost certainly want to support AT_EMPTY_PATH at the same time.
> > Otherwise userspace will still need to go through /proc when trying to
> > chmod a file handle they have.
> 
> I'm not sure I understand. Can you explain what you mean?

You should add support for AT_EMPTY_PATH (LOOKUP_EMPTY) as well as
AT_SYMLINK_NOFOLLOW. It would only require something like:

	unsigned int lookup_flags = LOOKUP_FOLLOW;

	if (flags & ~(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
		return -EINVAL;

	if (flags & AT_EMPTY_PATH)
		lookup_flags |= LOOKUP_EMPTY;
	if (flags & AT_SYMLINK_NOFOLLOW)
		lookup_flags &= ~LOOKUP_FOLLOW;

	/* ... */

This would be effectively equivalent to fchmod(fd, mode). (I was wrong
when I said this wasn't already possible -- I forgot about fchmod(2).)
  
dalias@libc.org July 27, 2023, 5:13 p.m. UTC | #10
On Thu, Jul 27, 2023 at 07:02:53PM +0200, Christian Brauner wrote:
> On Thu, Jul 27, 2023 at 06:28:53PM +0200, Andreas Schwab wrote:
> > On Jul 27 2023, David Laight wrote:
> > 
> > > From: Aleksa Sarai
> > >> Sent: 25 July 2023 17:36
> > > ...
> > >> We almost certainly want to support AT_EMPTY_PATH at the same time.
> > >> Otherwise userspace will still need to go through /proc when trying to
> > >> chmod a file handle they have.
> > >
> > > That can't be allowed.
> > 
> > IIUC, fchmodat2(fd, "", m, AT_EMPTY_PATH) is equivalent to fchmod(fd,
> > m).  With that, new architectures only need to implement the fchmodat2
> > syscall to cover all chmod variants.
> 
> There's a difference though as fchmod() doesn't work with O_PATH file
> descriptors while AT_EMPTY_PATH does. Similar to how fchown() doesn't
> work with O_PATH file descriptors.
> 
> However, we do allow AT_EMPTY_PATH with fchownat() so there's no reason
> to not allow it for fchmodat2().
> 
> But it's a bit of a shame that O_PATH looks less and less like O_PATH.
> It came from can-do-barely-anything to can-do-quite-a-lot-of-things over
> the years.
> 
> In any case, AT_EMPTY_PATH for fchmodat2() can be an additional patch on
> top.

From a standpoint of implementing O_SEARCH/O_EXEC using it, I don't
see any reason fchown/fchmod should not work on O_PATH file
descriptors. And indeed when you have procfs available to emulate them
via procfs, it already does. So I don't see this as unwanted
functionality or an access control regression. I see it as things
behaving as expected.

Semantically, O_PATH is a reference to the inode, not to the dirent.
So there is no reason you should not be able to do things that need
permission to the inode (changing permissions on it) rather than to
the dirent (renaming/moving).

Rich
  
Christian Brauner July 27, 2023, 5:36 p.m. UTC | #11
On Thu, Jul 27, 2023 at 01:13:37PM -0400, dalias@libc.org wrote:
> On Thu, Jul 27, 2023 at 07:02:53PM +0200, Christian Brauner wrote:
> > On Thu, Jul 27, 2023 at 06:28:53PM +0200, Andreas Schwab wrote:
> > > On Jul 27 2023, David Laight wrote:
> > > 
> > > > From: Aleksa Sarai
> > > >> Sent: 25 July 2023 17:36
> > > > ...
> > > >> We almost certainly want to support AT_EMPTY_PATH at the same time.
> > > >> Otherwise userspace will still need to go through /proc when trying to
> > > >> chmod a file handle they have.
> > > >
> > > > That can't be allowed.
> > > 
> > > IIUC, fchmodat2(fd, "", m, AT_EMPTY_PATH) is equivalent to fchmod(fd,
> > > m).  With that, new architectures only need to implement the fchmodat2
> > > syscall to cover all chmod variants.
> > 
> > There's a difference though as fchmod() doesn't work with O_PATH file
> > descriptors while AT_EMPTY_PATH does. Similar to how fchown() doesn't
> > work with O_PATH file descriptors.
> > 
> > However, we do allow AT_EMPTY_PATH with fchownat() so there's no reason
> > to not allow it for fchmodat2().
> > 
> > But it's a bit of a shame that O_PATH looks less and less like O_PATH.
> > It came from can-do-barely-anything to can-do-quite-a-lot-of-things over
> > the years.
> > 
> > In any case, AT_EMPTY_PATH for fchmodat2() can be an additional patch on
> > top.
> 
> From a standpoint of implementing O_SEARCH/O_EXEC using it, I don't
> see any reason fchown/fchmod should not work on O_PATH file
> descriptors. And indeed when you have procfs available to emulate them
> via procfs, it already does. So I don't see this as unwanted

I'm really not talking about the fact that proc is a giant loophole for
basically everyhing related to O_PATH and reopening fds.

I'm saying that both fchmod() and fchown() don't work on O_PATH fds.
They explicitly reject them.

AT_EMPTY_PATH and therefore O_PATH for fchmodat2() is fine given that we
do it for fchownat() already.
  
Aleksa Sarai July 27, 2023, 5:39 p.m. UTC | #12
On 2023-07-28, Aleksa Sarai <cyphar@cyphar.com> wrote:
> On 2023-07-26, Alexey Gladkov <legion@kernel.org> wrote:
> > On Wed, Jul 26, 2023 at 02:36:25AM +1000, Aleksa Sarai wrote:
> > > On 2023-07-11, Alexey Gladkov <legion@kernel.org> wrote:
> > > > On the userspace side fchmodat(3) is implemented as a wrapper
> > > > function which implements the POSIX-specified interface. This
> > > > interface differs from the underlying kernel system call, which does not
> > > > have a flags argument. Most implementations require procfs [1][2].
> > > > 
> > > > There doesn't appear to be a good userspace workaround for this issue
> > > > but the implementation in the kernel is pretty straight-forward.
> > > > 
> > > > The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
> > > > unlike existing fchmodat.
> > > > 
> > > > [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
> > > > [2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28
> > > > 
> > > > Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
> > > > Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
> > > > Signed-off-by: Alexey Gladkov <legion@kernel.org>
> > > > Acked-by: Arnd Bergmann <arnd@arndb.de>
> > > > ---
> > > >  fs/open.c                | 18 ++++++++++++++----
> > > >  include/linux/syscalls.h |  2 ++
> > > >  2 files changed, 16 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/fs/open.c b/fs/open.c
> > > > index 0c55c8e7f837..39a7939f0d00 100644
> > > > --- a/fs/open.c
> > > > +++ b/fs/open.c
> > > > @@ -671,11 +671,11 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
> > > >  	return err;
> > > >  }
> > > >  
> > > > -static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> > > > +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)
> > > 
> > > I think it'd be much neater to do the conversion of AT_ flags here and
> > > pass 0 as a flags argument for all of the wrappers (this is how most of
> > > the other xyz(), fxyz(), fxyzat() syscall wrappers are done IIRC).
> > 
> > I just addressed the Al Viro's suggestion.
> > 
> > https://lore.kernel.org/lkml/20190717014802.GS17978@ZenIV.linux.org.uk/
> 
> I think Al misspoke, because he also said "pass it 0 as an extra
> argument", but you actually have to pass LOOKUP_FOLLOW from the
> wrappers. If you look at how faccessat2 and faccessat are implemented,
> it follows the behaviour I described.
> 
> > > >  {
> > > >  	struct path path;
> > > >  	int error;
> > > > -	unsigned int lookup_flags = LOOKUP_FOLLOW;
> > > > +
> > > >  retry:
> > > >  	error = user_path_at(dfd, filename, lookup_flags, &path);
> > > >  	if (!error) {
> > > > @@ -689,15 +689,25 @@ static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
> > > >  	return error;
> > > >  }
> > > >  
> > > > +SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
> > > > +		umode_t, mode, int, flags)
> > > > +{
> > > > +	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
> > > > +		return -EINVAL;
> > > 
> > > We almost certainly want to support AT_EMPTY_PATH at the same time.
> > > Otherwise userspace will still need to go through /proc when trying to
> > > chmod a file handle they have.
> > 
> > I'm not sure I understand. Can you explain what you mean?
> 
> You should add support for AT_EMPTY_PATH (LOOKUP_EMPTY) as well as
> AT_SYMLINK_NOFOLLOW. It would only require something like:
> 
> 	unsigned int lookup_flags = LOOKUP_FOLLOW;
> 
> 	if (flags & ~(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
> 		return -EINVAL;
> 
> 	if (flags & AT_EMPTY_PATH)
> 		lookup_flags |= LOOKUP_EMPTY;
> 	if (flags & AT_SYMLINK_NOFOLLOW)
> 		lookup_flags &= ~LOOKUP_FOLLOW;
> 
> 	/* ... */
> 
> This would be effectively equivalent to fchmod(fd, mode). (I was wrong
> when I said this wasn't already possible -- I forgot about fchmod(2).)

... with the exception (as Christian mentioned) of O_PATH descriptors.
However, there are two counter-points to this:

 * fchownat(AT_EMPTY_PATH) exists but fchown() doesn't work on O_PATH
   descriptors *by design* (according to open(2)).
 * chmod(/proc/self/fd/$n) works on O_PATH descriptors, meaning this
   behaviour is already allowed and all that AT_EMPTY_PATH would do is
   allow programs to avoid depending on procfs for this.

FWIW, I agree with Christian that these behaviours are not ideal (and
I'm working on a series that might allow for these things to be properly
blocked in the future) but there's also the consistency argument -- I
don't think fchownat() is much safer to allow in this way than
fchmodat() and (again) this behaviour is already possible through
procfs.

Ultimately, we can always add AT_EMPTY_PATH later. It just seemed like
an obvious omission to me that would be easy to resolve.
  
David Laight July 28, 2023, 8:43 a.m. UTC | #13
...
> FWIW, I agree with Christian that these behaviours are not ideal (and
> I'm working on a series that might allow for these things to be properly
> blocked in the future) but there's also the consistency argument -- I
> don't think fchownat() is much safer to allow in this way than
> fchmodat() and (again) this behaviour is already possible through
> procfs.

If the 'through procfs' involves readlink("/proc/self/fd/n") and
accessing through the returned path then the permission checks
are different.
Using the returned path requires search permissions on all the
directories.

This is all fine for xxxat() functions where a real open
directory fd is supplied.
But other cases definitely need a lot of thought to ensure
they don't let programs acquire permissions they aren't
supposed to have.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
dalias@libc.org July 28, 2023, 6:42 p.m. UTC | #14
On Fri, Jul 28, 2023 at 08:43:58AM +0000, David Laight wrote:
> ....
> > FWIW, I agree with Christian that these behaviours are not ideal (and
> > I'm working on a series that might allow for these things to be properly
> > blocked in the future) but there's also the consistency argument -- I
> > don't think fchownat() is much safer to allow in this way than
> > fchmodat() and (again) this behaviour is already possible through
> > procfs.
> 
> If the 'through procfs' involves readlink("/proc/self/fd/n") and
> accessing through the returned path then the permission checks
> are different.
> Using the returned path requires search permissions on all the
> directories.

That's *not* how "through procfs" works. The "magic symlinks" in
/proc/*/fd are not actual symlinks that get dereferenced to the
contents they readlink() to, but special-type objects that dereference
directly to the underlying file associated with the open file
description.

Rich
  

Patch

diff --git a/fs/open.c b/fs/open.c
index 0c55c8e7f837..39a7939f0d00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -671,11 +671,11 @@  SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 	return err;
 }
 
-static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
+static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, int lookup_flags)
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
+
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (!error) {
@@ -689,15 +689,25 @@  static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
 	return error;
 }
 
+SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
+		umode_t, mode, int, flags)
+{
+	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
+		return -EINVAL;
+
+	return do_fchmodat(dfd, filename, mode,
+			flags & AT_SYMLINK_NOFOLLOW ? 0 : LOOKUP_FOLLOW);
+}
+
 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
 		umode_t, mode)
 {
-	return do_fchmodat(dfd, filename, mode);
+	return do_fchmodat(dfd, filename, mode, LOOKUP_FOLLOW);
 }
 
 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 {
-	return do_fchmodat(AT_FDCWD, filename, mode);
+	return do_fchmodat(AT_FDCWD, filename, mode, LOOKUP_FOLLOW);
 }
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 584f404bf868..6e852279fbc3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -440,6 +440,8 @@  asmlinkage long sys_chroot(const char __user *filename);
 asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 			     umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+			     umode_t mode, int flags);
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);