[v3,0/3+1] fanotify accounting for fs/splice.c

Message ID cover.1687884029.git.nabijaczleweli@nabijaczleweli.xyz
State New
Headers
Series [v3,0/3+1] fanotify accounting for fs/splice.c |

Commit Message

Ahelenia Ziemiańska June 27, 2023, 4:55 p.m. UTC
  In 1/3 I've applied if/else if/else tree like you said,
and expounded a bit in the message.

This is less pretty now, however, since it turns out that
iter_file_splice_write() already marks the out fd as written because it
writes to it via vfs_iter_write(), and that sent a double notification.

$ git grep -F .splice_write | grep -v iter_file_splice_write
drivers/char/mem.c:     .splice_write   = splice_write_null,
drivers/char/virtio_console.c:  .splice_write = port_fops_splice_write,
fs/fuse/dev.c:  .splice_write   = fuse_dev_splice_write,
fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
fs/overlayfs/file.c:    .splice_write   = ovl_splice_write,
net/socket.c:   .splice_write = generic_splice_sendpage,
scripts/coccinelle/api/stream_open.cocci:    .splice_write = splice_write_f,

Of these, splice_write_null() doesn't mark out as written
(but it's for /dev/null so I think this is expected),
and I haven't been able to visually confirm whether
port_fops_splice_write() and generic_splice_sendpage() do.

All the others delegate to iter_file_splice_write().

In 2/3 I fixed the vmsplice notification placement
(access from pipe, modify to pipe).

I'm following this up with an LTP patch, where only sendfile_file_to_pipe
passes on 6.1.27-1 and all tests pass on v6.4 + this patchset.

Ahelenia Ziemiańska (3):
  splice: always fsnotify_access(in), fsnotify_modify(out) on success
  splice: fsnotify_access(fd)/fsnotify_modify(fd) in vmsplice
  splice: fsnotify_access(in), fsnotify_modify(out) on success in tee

 fs/splice.c | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)


Interdiff against v2:
  

Comments

Amir Goldstein June 27, 2023, 6:03 p.m. UTC | #1
On Tue, Jun 27, 2023 at 7:55 PM Ahelenia Ziemiańska
<nabijaczleweli@nabijaczleweli.xyz> wrote:
>
> In 1/3 I've applied if/else if/else tree like you said,
> and expounded a bit in the message.
>
> This is less pretty now, however, since it turns out that

If my advice turns out to be bad, then please drop it.

> iter_file_splice_write() already marks the out fd as written because it
> writes to it via vfs_iter_write(), and that sent a double notification.
>
> $ git grep -F .splice_write | grep -v iter_file_splice_write
> drivers/char/mem.c:     .splice_write   = splice_write_null,
> drivers/char/virtio_console.c:  .splice_write = port_fops_splice_write,
> fs/fuse/dev.c:  .splice_write   = fuse_dev_splice_write,
> fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
> fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
> fs/overlayfs/file.c:    .splice_write   = ovl_splice_write,
> net/socket.c:   .splice_write = generic_splice_sendpage,
> scripts/coccinelle/api/stream_open.cocci:    .splice_write = splice_write_f,
>
> Of these, splice_write_null() doesn't mark out as written
> (but it's for /dev/null so I think this is expected),
> and I haven't been able to visually confirm whether
> port_fops_splice_write() and generic_splice_sendpage() do.
>
> All the others delegate to iter_file_splice_write().
>

All this is very troubling to me.
It translates to a mental model that I cannot remember and
cannot maintain for fixes whose value are still questionable.

IIUC, the only thing you need to change in do_splice() for
making your use case work is to add fsnotify_modify()
for the splice_pipe_to_pipe() case. Right?

So either make the change that you need, or all the changes
that are simple to follow without trying to make the world
consistent - these pipe iterators business is really messy.
I don't know if avoiding a double event (which is likely not visible)
is worth the complicated code that is hard to understand.

> In 2/3 I fixed the vmsplice notification placement
> (access from pipe, modify to pipe).
>
> I'm following this up with an LTP patch, where only sendfile_file_to_pipe
> passes on 6.1.27-1 and all tests pass on v6.4 + this patchset.
>

Were these tests able to detect the double event?
Maybe it's not visible because double consequent events get merged.

> Ahelenia Ziemiańska (3):
>   splice: always fsnotify_access(in), fsnotify_modify(out) on success
>   splice: fsnotify_access(fd)/fsnotify_modify(fd) in vmsplice
>   splice: fsnotify_access(in), fsnotify_modify(out) on success in tee
>
>  fs/splice.c | 43 +++++++++++++++++++++++++------------------
>  1 file changed, 25 insertions(+), 18 deletions(-)
>
>
> Interdiff against v2:
> diff --git a/fs/splice.c b/fs/splice.c
> index 3234aaa6e957..0427f0a91c7d 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -1155,10 +1155,7 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
>                         flags |= SPLICE_F_NONBLOCK;
>
>                 ret = splice_pipe_to_pipe(ipipe, opipe, len, flags);
> -               goto notify;
> -       }
> -
> -       if (ipipe) {
> +       } else if (ipipe) {
>                 if (off_in)
>                         return -ESPIPE;
>                 if (off_out) {
> @@ -1188,10 +1185,10 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
>                 else
>                         *off_out = offset;
>
> -               goto notify;
> -       }
> -
> -       if (opipe) {
> +               // ->splice_write already marked out
> +               // as modified via vfs_iter_write()
> +               goto noaccessout;

That's too ugly IMO.
Are you claiming that the code in master is wrong?
Because in master there is fsnotify_modify(out) for (ipipe) case.

> +       } else if (opipe) {
>                 if (off_out)
>                         return -ESPIPE;
>                 if (off_in) {
> @@ -1211,17 +1208,14 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
>                         in->f_pos = offset;
>                 else
>                         *off_in = offset;
> +       } else
> +               return -EINVAL;
>
> -               goto notify;
> -       }
> -
> -       return -EINVAL;
> -
> -notify:
> -       if (ret > 0) {
> -               fsnotify_access(in);
> +       if (ret > 0)
>                 fsnotify_modify(out);
> -       }
> +noaccessout:
> +       if (ret > 0)
> +               fsnotify_access(in);
>

Not to mention that it should be nomodifyout, but I dislike this
"common" code that it not common at all, so either just handle
the pipe_to_pipe case to fix your use case or leave this code
completely common ignoring the possible double events.

Thanks,
Amir.
  
Ahelenia Ziemiańska June 27, 2023, 8:34 p.m. UTC | #2
On Tue, Jun 27, 2023 at 09:03:17PM +0300, Amir Goldstein wrote:
> On Tue, Jun 27, 2023 at 7:55 PM Ahelenia Ziemiańska
> <nabijaczleweli@nabijaczleweli.xyz> wrote:
> >
> > In 1/3 I've applied if/else if/else tree like you said,
> > and expounded a bit in the message.
> >
> > This is less pretty now, however, since it turns out that
> If my advice turns out to be bad, then please drop it.
The if/else if/else with no goto is better than before;
it was made ugly by the special-casing below.

> > iter_file_splice_write() already marks the out fd as written because it
> > writes to it via vfs_iter_write(), and that sent a double notification.
> >
> > $ git grep -F .splice_write | grep -v iter_file_splice_write
> > drivers/char/mem.c:     .splice_write   = splice_write_null,
> > drivers/char/virtio_console.c:  .splice_write = port_fops_splice_write,
> > fs/fuse/dev.c:  .splice_write   = fuse_dev_splice_write,
> > fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
> > fs/gfs2/file.c: .splice_write   = gfs2_file_splice_write,
> > fs/overlayfs/file.c:    .splice_write   = ovl_splice_write,
> > net/socket.c:   .splice_write = generic_splice_sendpage,
> > scripts/coccinelle/api/stream_open.cocci:    .splice_write = splice_write_f,
> >
> > Of these, splice_write_null() doesn't mark out as written
> > (but it's for /dev/null so I think this is expected),
> > and I haven't been able to visually confirm whether
> > port_fops_splice_write() and generic_splice_sendpage() do.
> >
> > All the others delegate to iter_file_splice_write().
> All this is very troubling to me.
> It translates to a mental model that I cannot remember and
> cannot maintain for fixes whose value are still questionable.
> 
> IIUC, the only thing you need to change in do_splice() for
> making your use case work is to add fsnotify_modify()
> for the splice_pipe_to_pipe() case. Right?
No, all splice/tee/vmsplice cases need to generate modify events for the
output fd. Really, all I/O syscalls do, but those are for today.

> So either make the change that you need, or all the changes
> that are simple to follow without trying to make the world
> consistent
Thus I also originally had all the aforementioned generate access/modify
for in/out.

> - these pipe iterators business is really messy.
> I don't know if avoiding a double event (which is likely not visible)
> is worth the complicated code that is hard to understand.
> 
> > In 2/3 I fixed the vmsplice notification placement
> > (access from pipe, modify to pipe).
> >
> > I'm following this up with an LTP patch, where only sendfile_file_to_pipe
> > passes on 6.1.27-1 and all tests pass on v6.4 + this patchset.
> Were these tests able to detect the double event?
> Maybe it's not visible because double consequent events get merged.
That's how I discovered it, yes. They aren't merged because we'd generate
  modify out  <- from the VFS callback
  access in   <- from do_splice
  modify out  <- ibid.

I agree this got very ugly very fast for a weird edge case ‒
maybe I did get a little over-zealous on having a consistent
"one syscall↔one event for each affected file" model.

OTOH: I've found that just using
	if (ret > 0) {
		fsnotify_modify(out);
		fsnotify_access(in);
	}
does get the events merged from
  modify out  <- from the VFS callback
  modify out  <- from do_splice
  access in   <- ibid.
into
  modify out
  access in
which solves all issues
(reliable wake-up regardless of backing file, no spurious wake-ups)
at no cost. I would've done this originally, but I hadn't known
inotify events get merged :v
  
Christoph Hellwig June 28, 2023, 4:51 a.m. UTC | #3
Can you please resend this outside this thread?  I really cant't see
what's new or old here if you have a reply-to in the old thread.

On Tue, Jun 27, 2023 at 06:55:22PM +0200, Ahelenia Ziemiańska wrote:
> In 1/3 I've applied if/else if/else tree like you said,
> and expounded a bit in the message.
> 
> This is less pretty now, however, since it turns out that
> iter_file_splice_write() already marks the out fd as written because it
> writes to it via vfs_iter_write(), and that sent a double notification.

It seems like vfs_iter_write is the wrong level to implement
->splice_write given that the the ->splice_write caller has already
checked f_mode, done the equivalent of rw_verify_area and
should do the fsnotify_modify.  I'd suggest to just open code the
relevant parts of vfs_iocb_iter_write in iter_file_splice_write.
  
Jan Kara June 28, 2023, 10:38 a.m. UTC | #4
On Tue 27-06-23 21:51:05, Christoph Hellwig wrote:
> Can you please resend this outside this thread?  I really cant't see
> what's new or old here if you have a reply-to in the old thread.
> 
> On Tue, Jun 27, 2023 at 06:55:22PM +0200, Ahelenia Ziemiańska wrote:
> > In 1/3 I've applied if/else if/else tree like you said,
> > and expounded a bit in the message.
> > 
> > This is less pretty now, however, since it turns out that
> > iter_file_splice_write() already marks the out fd as written because it
> > writes to it via vfs_iter_write(), and that sent a double notification.
> 
> It seems like vfs_iter_write is the wrong level to implement
> ->splice_write given that the the ->splice_write caller has already
> checked f_mode, done the equivalent of rw_verify_area and
> should do the fsnotify_modify.  I'd suggest to just open code the
> relevant parts of vfs_iocb_iter_write in iter_file_splice_write.

Yeah, looking into the code I agree (with a small remark that unlike
vfs_iocb_iter_write() this particular variant also needs to work with files
providing only ->write and not ->write_iter). But we can live with
duplicate events for now and this seems like a rather separate cleanup to
do.

								Honza
  

Patch

diff --git a/fs/splice.c b/fs/splice.c
index 3234aaa6e957..0427f0a91c7d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1155,10 +1155,7 @@  long do_splice(struct file *in, loff_t *off_in, struct file *out,
 			flags |= SPLICE_F_NONBLOCK;
 
 		ret = splice_pipe_to_pipe(ipipe, opipe, len, flags);
-		goto notify;
-	}
-
-	if (ipipe) {
+	} else if (ipipe) {
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1188,10 +1185,10 @@  long do_splice(struct file *in, loff_t *off_in, struct file *out,
 		else
 			*off_out = offset;
 
-		goto notify;
-	}
-
-	if (opipe) {
+		// ->splice_write already marked out
+		// as modified via vfs_iter_write()
+		goto noaccessout;
+	} else if (opipe) {
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1211,17 +1208,14 @@  long do_splice(struct file *in, loff_t *off_in, struct file *out,
 			in->f_pos = offset;
 		else
 			*off_in = offset;
+	} else
+		return -EINVAL;
 
-		goto notify;
-	}
-
-	return -EINVAL;
-
-notify:
-	if (ret > 0) {
-		fsnotify_access(in);
+	if (ret > 0)
 		fsnotify_modify(out);
-	}
+noaccessout:
+	if (ret > 0)
+		fsnotify_access(in);
 
 	return ret;
 }
@@ -1352,6 +1346,9 @@  static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
 		pipe_unlock(pipe);
 	}
 
+	if (ret > 0)
+		fsnotify_access(file);
+
 	return ret;
 }
 
@@ -1381,8 +1378,10 @@  static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
 	if (!ret)
 		ret = iter_to_pipe(iter, pipe, buf_flag);
 	pipe_unlock(pipe);
-	if (ret > 0)
+	if (ret > 0) {
 		wakeup_pipe_readers(pipe);
+		fsnotify_modify(file);
+	}
 	return ret;
 }
 
@@ -1447,9 +1446,6 @@  SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 	else
 		error = vmsplice_to_user(f.file, &iter, flags);
 
-	if (error > 0)
-		fsnotify_modify(f.file);
-
 	kfree(iov);
 out_fdput:
 	fdput(f);