[1/4] fs/pipe: Convert to lockdep_cmp_fn
Commit Message
*_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.
the replacement is to define lock ordering with a proper comparison
function.
fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
1 file changed, 36 insertions(+), 45 deletions(-)
Comments
On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
>
> the replacement is to define lock ordering with a proper comparison
> function.
>
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
I had to digest for a while what this new lockdep lock ordering feature is
about. I have one pending question - what is the motivation of this
conversion of pipe code? AFAIU we don't have any problems with lockdep
annotations on pipe->mutex because there are always only two subclasses?
Honza
> ---
> fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> 1 file changed, 36 insertions(+), 45 deletions(-)
>
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
> */
>
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r) ((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> + const struct lockdep_map *b)
> {
> - if (pipe->files)
> - mutex_lock_nested(&pipe->mutex, subclass);
> + return cmp_int((unsigned long) a, (unsigned long) b);
> }
> +#endif
>
> void pipe_lock(struct pipe_inode_info *pipe)
> {
> - /*
> - * pipe_lock() nests non-pipe inode locks (for writing to a file)
> - */
> - pipe_lock_nested(pipe, I_MUTEX_PARENT);
> + if (pipe->files)
> + mutex_lock(&pipe->mutex);
> }
> EXPORT_SYMBOL(pipe_lock);
>
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> }
> EXPORT_SYMBOL(pipe_unlock);
>
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> - mutex_unlock(&pipe->mutex);
> -}
> -
> void pipe_double_lock(struct pipe_inode_info *pipe1,
> struct pipe_inode_info *pipe2)
> {
> BUG_ON(pipe1 == pipe2);
>
> - if (pipe1 < pipe2) {
> - pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> - } else {
> - pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> - }
> + if (pipe1 > pipe2)
> + swap(pipe1, pipe2);
> +
> + pipe_lock(pipe1);
> + pipe_lock(pipe2);
> }
>
> static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> return 0;
>
> ret = 0;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /*
> * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> ret = -EAGAIN;
> break;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> return -ERESTARTSYS;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> wake_next_reader = true;
> }
> if (pipe_empty(pipe->head, pipe->tail))
> wake_next_reader = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> if (was_full)
> wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> if (unlikely(total_len == 0))
> return 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> if (!pipe->readers) {
> send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> * after waiting we need to re-check whether the pipe
> * become empty while we dropped the lock.
> */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> if (was_empty)
> wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_empty = pipe_empty(pipe->head, pipe->tail);
> wake_next_writer = true;
> }
> out:
> if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> wake_next_writer = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>
> switch (cmd) {
> case FIONREAD:
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> count = 0;
> head = pipe->head;
> tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> count += pipe->bufs[tail & mask].len;
> tail++;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> return put_user(count, (int __user *)arg);
>
> #ifdef CONFIG_WATCH_QUEUE
> case IOC_WATCH_QUEUE_SET_SIZE: {
> int ret;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> ret = watch_queue_set_size(pipe, arg);
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> {
> struct pipe_inode_info *pipe = file->private_data;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (file->f_mode & FMODE_READ)
> pipe->readers--;
> if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> struct pipe_inode_info *pipe = filp->private_data;
> int retval = 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (filp->f_mode & FMODE_READ)
> retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> /* this can happen only if on == T */
> fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return retval;
> }
>
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> pipe->nr_accounted = pipe_bufs;
> pipe->user = user;
> mutex_init(&pipe->mutex);
> + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> return pipe;
> }
>
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> filp->private_data = pipe;
> /* OK, we have a pipe and it's pinned down */
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /* We can only do regular read/write on fifos */
> stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> }
>
> /* Ok! */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return 0;
>
> err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> goto err;
>
> err:
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> if (!pipe)
> return -EBADF;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> switch (cmd) {
> case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> break;
> }
>
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> --
> 2.43.0
>
On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?
>
> Honza
Hi,
"Numbers talk - Bullshit walks." (Linus Torvalds)
In things of pipes - I normally benchmark like this (example):
root# cat /dev/sdc | pipebench > /dev/null
Do you have numbers for your patch-series?
Thanks.
BG,
-Sedat-
[1] https://packages.debian.org/pipebench
>
> > ---
> > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> > 1 file changed, 36 insertions(+), 45 deletions(-)
> >
> > diff --git a/fs/pipe.c b/fs/pipe.c
> > index f1adbfe743d4..50c8a8596b52 100644
> > --- a/fs/pipe.c
> > +++ b/fs/pipe.c
> > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> > * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
> > */
> >
> > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> > +#define cmp_int(l, r) ((l > r) - (l < r))
> > +
> > +#ifdef CONFIG_PROVE_LOCKING
> > +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> > + const struct lockdep_map *b)
> > {
> > - if (pipe->files)
> > - mutex_lock_nested(&pipe->mutex, subclass);
> > + return cmp_int((unsigned long) a, (unsigned long) b);
> > }
> > +#endif
> >
> > void pipe_lock(struct pipe_inode_info *pipe)
> > {
> > - /*
> > - * pipe_lock() nests non-pipe inode locks (for writing to a file)
> > - */
> > - pipe_lock_nested(pipe, I_MUTEX_PARENT);
> > + if (pipe->files)
> > + mutex_lock(&pipe->mutex);
> > }
> > EXPORT_SYMBOL(pipe_lock);
> >
> > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> > }
> > EXPORT_SYMBOL(pipe_unlock);
> >
> > -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > -{
> > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > -}
> > -
> > -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > -{
> > - mutex_unlock(&pipe->mutex);
> > -}
> > -
> > void pipe_double_lock(struct pipe_inode_info *pipe1,
> > struct pipe_inode_info *pipe2)
> > {
> > BUG_ON(pipe1 == pipe2);
> >
> > - if (pipe1 < pipe2) {
> > - pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> > - pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> > - } else {
> > - pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> > - pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> > - }
> > + if (pipe1 > pipe2)
> > + swap(pipe1, pipe2);
> > +
> > + pipe_lock(pipe1);
> > + pipe_lock(pipe2);
> > }
> >
> > static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > return 0;
> >
> > ret = 0;
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > /*
> > * We only wake up writers if the pipe was full when we started
> > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > ret = -EAGAIN;
> > break;
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > /*
> > * We only get here if we didn't actually read anything.
> > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> > return -ERESTARTSYS;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> > wake_next_reader = true;
> > }
> > if (pipe_empty(pipe->head, pipe->tail))
> > wake_next_reader = false;
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > if (was_full)
> > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> > if (unlikely(total_len == 0))
> > return 0;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > if (!pipe->readers) {
> > send_sig(SIGPIPE, current, 0);
> > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> > * after waiting we need to re-check whether the pipe
> > * become empty while we dropped the lock.
> > */
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > if (was_empty)
> > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > was_empty = pipe_empty(pipe->head, pipe->tail);
> > wake_next_writer = true;
> > }
> > out:
> > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> > wake_next_writer = false;
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > /*
> > * If we do do a wakeup event, we do a 'sync' wakeup, because we
> > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >
> > switch (cmd) {
> > case FIONREAD:
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > count = 0;
> > head = pipe->head;
> > tail = pipe->tail;
> > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> > count += pipe->bufs[tail & mask].len;
> > tail++;
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > return put_user(count, (int __user *)arg);
> >
> > #ifdef CONFIG_WATCH_QUEUE
> > case IOC_WATCH_QUEUE_SET_SIZE: {
> > int ret;
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > ret = watch_queue_set_size(pipe, arg);
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return ret;
> > }
> >
> > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> > {
> > struct pipe_inode_info *pipe = file->private_data;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > if (file->f_mode & FMODE_READ)
> > pipe->readers--;
> > if (file->f_mode & FMODE_WRITE)
> > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > put_pipe_info(inode, pipe);
> > return 0;
> > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> > struct pipe_inode_info *pipe = filp->private_data;
> > int retval = 0;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > if (filp->f_mode & FMODE_READ)
> > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> > /* this can happen only if on == T */
> > fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return retval;
> > }
> >
> > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> > pipe->nr_accounted = pipe_bufs;
> > pipe->user = user;
> > mutex_init(&pipe->mutex);
> > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> > return pipe;
> > }
> >
> > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > filp->private_data = pipe;
> > /* OK, we have a pipe and it's pinned down */
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > /* We can only do regular read/write on fifos */
> > stream_open(inode, filp);
> > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > }
> >
> > /* Ok! */
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return 0;
> >
> > err_rd:
> > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > goto err;
> >
> > err:
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > put_pipe_info(inode, pipe);
> > return ret;
> > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> > if (!pipe)
> > return -EBADF;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > switch (cmd) {
> > case F_SETPIPE_SZ:
> > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> > break;
> > }
> >
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return ret;
> > }
> >
> > --
> > 2.43.0
> >
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
>
On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?
It's one of the easier conversions to do, and ideally /all/ users of
subclasses would go away.
Start with the easier ones, figure out those patterns, then the
harder...
On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> >
> > Honza
>
> Hi,
>
> "Numbers talk - Bullshit walks." (Linus Torvalds)
>
> In things of pipes - I normally benchmark like this (example):
>
> root# cat /dev/sdc | pipebench > /dev/null
>
> Do you have numbers for your patch-series?
Sedat AFAIU this patch is not about performance at all but rather about
lockdep instrumentation... But maybe I'm missing your point?
Honza
On Mon, Feb 5, 2024 at 10:54 AM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > > ordering, but we cannot device a total ordering on an unbounded number
> > > > of elements with only a few subclasses.
> > > >
> > > > the replacement is to define lock ordering with a proper comparison
> > > > function.
> > > >
> > > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > > much changes here.
> > > >
> > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > Cc: Jan Kara <jack@suse.cz>
> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > >
> > > I had to digest for a while what this new lockdep lock ordering feature is
> > > about. I have one pending question - what is the motivation of this
> > > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > > annotations on pipe->mutex because there are always only two subclasses?
> > >
> > > Honza
> >
> > Hi,
> >
> > "Numbers talk - Bullshit walks." (Linus Torvalds)
> >
> > In things of pipes - I normally benchmark like this (example):
> >
> > root# cat /dev/sdc | pipebench > /dev/null
> >
> > Do you have numbers for your patch-series?
>
> Sedat AFAIU this patch is not about performance at all but rather about
> lockdep instrumentation... But maybe I'm missing your point?
>
Sorry, I missed the point, Jan.
-Sedat-
On Fri 02-02-24 07:47:50, Kent Overstreet wrote:
> On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
>
> It's one of the easier conversions to do, and ideally /all/ users of
> subclasses would go away.
>
> Start with the easier ones, figure out those patterns, then the
> harder...
I see, thanks for explanation. So in the pipes case I actually like that
the patch makes the locking less obfuscated with lockdep details (to which
I'm mostly used to but still ;)). So feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
for this patch. I'm not 100% convinced it will be always possible to
replace subclasses with the new ordering mechanism but I guess time will
show.
Honza
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
{
- if (pipe->files)
- mutex_lock_nested(&pipe->mutex, subclass);
+ return cmp_int((unsigned long) a, (unsigned long) b);
}
+#endif
void pipe_lock(struct pipe_inode_info *pipe)
{
- /*
- * pipe_lock() nests non-pipe inode locks (for writing to a file)
- */
- pipe_lock_nested(pipe, I_MUTEX_PARENT);
+ if (pipe->files)
+ mutex_lock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_lock);
@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
}
EXPORT_SYMBOL(pipe_unlock);
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
- mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
- mutex_unlock(&pipe->mutex);
-}
-
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);
- if (pipe1 < pipe2) {
- pipe_lock_nested(pipe1, I_MUTEX_PARENT);
- pipe_lock_nested(pipe2, I_MUTEX_CHILD);
- } else {
- pipe_lock_nested(pipe2, I_MUTEX_PARENT);
- pipe_lock_nested(pipe1, I_MUTEX_CHILD);
- }
+ if (pipe1 > pipe2)
+ swap(pipe1, pipe2);
+
+ pipe_lock(pipe1);
+ pipe_lock(pipe2);
}
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
ret = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
/*
* We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
/*
* We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
wake_next_reader = true;
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (unlikely(total_len == 0))
return 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* after waiting we need to re-check whether the pipe
* become empty while we dropped the lock.
*/
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_empty = pipe_empty(pipe->head, pipe->tail);
wake_next_writer = true;
}
out:
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
wake_next_writer = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
/*
* If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FIONREAD:
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
count = 0;
head = pipe->head;
tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count += pipe->bufs[tail & mask].len;
tail++;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return put_user(count, (int __user *)arg);
#ifdef CONFIG_WATCH_QUEUE
case IOC_WATCH_QUEUE_SET_SIZE: {
int ret;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
ret = watch_queue_set_size(pipe, arg);
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}
@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
{
struct pipe_inode_info *pipe = file->private_data;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (file->f_mode & FMODE_READ)
pipe->readers--;
if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
put_pipe_info(inode, pipe);
return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
struct pipe_inode_info *pipe = filp->private_data;
int retval = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (filp->f_mode & FMODE_READ)
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
/* this can happen only if on == T */
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return retval;
}
@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
+ lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
return pipe;
}
@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
filp->private_data = pipe;
/* OK, we have a pipe and it's pinned down */
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
/* We can only do regular read/write on fifos */
stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
}
/* Ok! */
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return 0;
err_rd:
@@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
goto err;
err:
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
put_pipe_info(inode, pipe);
return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
if (!pipe)
return -EBADF;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
switch (cmd) {
case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}