[1/4] fs/pipe: Convert to lockdep_cmp_fn

Message ID 20240127020833.487907-2-kent.overstreet@linux.dev
State New
Headers
Series lockdep cmp fn conversions |

Commit Message

Kent Overstreet Jan. 27, 2024, 2:08 a.m. UTC
  *_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.

the replacement is to define lock ordering with a proper comparison
function.

fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
 1 file changed, 36 insertions(+), 45 deletions(-)
  

Comments

Jan Kara Feb. 2, 2024, 12:03 p.m. UTC | #1
On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
> 
> the replacement is to define lock ordering with a proper comparison
> function.
> 
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
> 
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

I had to digest for a while what this new lockdep lock ordering feature is
about. I have one pending question - what is the motivation of this
conversion of pipe code? AFAIU we don't have any problems with lockdep
annotations on pipe->mutex because there are always only two subclasses?

								Honza

> ---
>  fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
>  1 file changed, 36 insertions(+), 45 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
>   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
>   */
>  
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r)		((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> +			    const struct lockdep_map *b)
>  {
> -	if (pipe->files)
> -		mutex_lock_nested(&pipe->mutex, subclass);
> +	return cmp_int((unsigned long) a, (unsigned long) b);
>  }
> +#endif
>  
>  void pipe_lock(struct pipe_inode_info *pipe)
>  {
> -	/*
> -	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
> -	 */
> -	pipe_lock_nested(pipe, I_MUTEX_PARENT);
> +	if (pipe->files)
> +		mutex_lock(&pipe->mutex);
>  }
>  EXPORT_SYMBOL(pipe_lock);
>  
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>  }
>  EXPORT_SYMBOL(pipe_unlock);
>  
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> -	mutex_unlock(&pipe->mutex);
> -}
> -
>  void pipe_double_lock(struct pipe_inode_info *pipe1,
>  		      struct pipe_inode_info *pipe2)
>  {
>  	BUG_ON(pipe1 == pipe2);
>  
> -	if (pipe1 < pipe2) {
> -		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> -	} else {
> -		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> -	}
> +	if (pipe1 > pipe2)
> +		swap(pipe1, pipe2);
> +
> +	pipe_lock(pipe1);
> +	pipe_lock(pipe2);
>  }
>  
>  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		return 0;
>  
>  	ret = 0;
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/*
>  	 * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  			ret = -EAGAIN;
>  			break;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		/*
>  		 * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
>  			return -ERESTARTSYS;
>  
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>  		wake_next_reader = true;
>  	}
>  	if (pipe_empty(pipe->head, pipe->tail))
>  		wake_next_reader = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	if (was_full)
>  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (unlikely(total_len == 0))
>  		return 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	if (!pipe->readers) {
>  		send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  		 * after waiting we need to re-check whether the pipe
>  		 * become empty while we dropped the lock.
>  		 */
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		if (was_empty)
>  			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_empty = pipe_empty(pipe->head, pipe->tail);
>  		wake_next_writer = true;
>  	}
>  out:
>  	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
>  		wake_next_writer = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	/*
>  	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  
>  	switch (cmd) {
>  	case FIONREAD:
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		count = 0;
>  		head = pipe->head;
>  		tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			count += pipe->bufs[tail & mask].len;
>  			tail++;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		return put_user(count, (int __user *)arg);
>  
>  #ifdef CONFIG_WATCH_QUEUE
>  	case IOC_WATCH_QUEUE_SET_SIZE: {
>  		int ret;
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		ret = watch_queue_set_size(pipe, arg);
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		return ret;
>  	}
>  
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
>  {
>  	struct pipe_inode_info *pipe = file->private_data;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (file->f_mode & FMODE_READ)
>  		pipe->readers--;
>  	if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  	struct pipe_inode_info *pipe = filp->private_data;
>  	int retval = 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (filp->f_mode & FMODE_READ)
>  		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
>  	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  			/* this can happen only if on == T */
>  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return retval;
>  }
>  
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
>  		pipe->nr_accounted = pipe_bufs;
>  		pipe->user = user;
>  		mutex_init(&pipe->mutex);
> +		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
>  		return pipe;
>  	}
>  
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	filp->private_data = pipe;
>  	/* OK, we have a pipe and it's pinned down */
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/* We can only do regular read/write on fifos */
>  	stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	}
>  
>  	/* Ok! */
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return 0;
>  
>  err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	goto err;
>  
>  err:
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  	if (!pipe)
>  		return -EBADF;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	switch (cmd) {
>  	case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  		break;
>  	}
>  
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return ret;
>  }
>  
> -- 
> 2.43.0
>
  
Sedat Dilek Feb. 2, 2024, 12:25 p.m. UTC | #2
On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?
>
>                                                                 Honza

Hi,

"Numbers talk - Bullshit walks." (Linus Torvalds)

In things of pipes - I normally benchmark like this (example):

root# cat /dev/sdc | pipebench > /dev/null

Do you have numbers for your patch-series?

Thanks.

BG,
-Sedat-

[1] https://packages.debian.org/pipebench

>
> > ---
> >  fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> >  1 file changed, 36 insertions(+), 45 deletions(-)
> >
> > diff --git a/fs/pipe.c b/fs/pipe.c
> > index f1adbfe743d4..50c8a8596b52 100644
> > --- a/fs/pipe.c
> > +++ b/fs/pipe.c
> > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> >   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
> >   */
> >
> > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> > +#define cmp_int(l, r)                ((l > r) - (l < r))
> > +
> > +#ifdef CONFIG_PROVE_LOCKING
> > +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> > +                         const struct lockdep_map *b)
> >  {
> > -     if (pipe->files)
> > -             mutex_lock_nested(&pipe->mutex, subclass);
> > +     return cmp_int((unsigned long) a, (unsigned long) b);
> >  }
> > +#endif
> >
> >  void pipe_lock(struct pipe_inode_info *pipe)
> >  {
> > -     /*
> > -      * pipe_lock() nests non-pipe inode locks (for writing to a file)
> > -      */
> > -     pipe_lock_nested(pipe, I_MUTEX_PARENT);
> > +     if (pipe->files)
> > +             mutex_lock(&pipe->mutex);
> >  }
> >  EXPORT_SYMBOL(pipe_lock);
> >
> > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> >  }
> >  EXPORT_SYMBOL(pipe_unlock);
> >
> > -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > -}
> > -
> > -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > -{
> > -     mutex_unlock(&pipe->mutex);
> > -}
> > -
> >  void pipe_double_lock(struct pipe_inode_info *pipe1,
> >                     struct pipe_inode_info *pipe2)
> >  {
> >       BUG_ON(pipe1 == pipe2);
> >
> > -     if (pipe1 < pipe2) {
> > -             pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> > -             pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> > -     } else {
> > -             pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> > -             pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> > -     }
> > +     if (pipe1 > pipe2)
> > +             swap(pipe1, pipe2);
> > +
> > +     pipe_lock(pipe1);
> > +     pipe_lock(pipe2);
> >  }
> >
> >  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >               return 0;
> >
> >       ret = 0;
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       /*
> >        * We only wake up writers if the pipe was full when we started
> > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >                       ret = -EAGAIN;
> >                       break;
> >               }
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >
> >               /*
> >                * We only get here if we didn't actually read anything.
> > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> >               if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> >                       return -ERESTARTSYS;
> >
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> >               wake_next_reader = true;
> >       }
> >       if (pipe_empty(pipe->head, pipe->tail))
> >               wake_next_reader = false;
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       if (was_full)
> >               wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> >       if (unlikely(total_len == 0))
> >               return 0;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       if (!pipe->readers) {
> >               send_sig(SIGPIPE, current, 0);
> > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> >                * after waiting we need to re-check whether the pipe
> >                * become empty while we dropped the lock.
> >                */
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >               if (was_empty)
> >                       wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> >               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> >               wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               was_empty = pipe_empty(pipe->head, pipe->tail);
> >               wake_next_writer = true;
> >       }
> >  out:
> >       if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> >               wake_next_writer = false;
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       /*
> >        * If we do do a wakeup event, we do a 'sync' wakeup, because we
> > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >
> >       switch (cmd) {
> >       case FIONREAD:
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               count = 0;
> >               head = pipe->head;
> >               tail = pipe->tail;
> > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >                       count += pipe->bufs[tail & mask].len;
> >                       tail++;
> >               }
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >
> >               return put_user(count, (int __user *)arg);
> >
> >  #ifdef CONFIG_WATCH_QUEUE
> >       case IOC_WATCH_QUEUE_SET_SIZE: {
> >               int ret;
> > -             __pipe_lock(pipe);
> > +             mutex_lock(&pipe->mutex);
> >               ret = watch_queue_set_size(pipe, arg);
> > -             __pipe_unlock(pipe);
> > +             mutex_unlock(&pipe->mutex);
> >               return ret;
> >       }
> >
> > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> >  {
> >       struct pipe_inode_info *pipe = file->private_data;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >       if (file->f_mode & FMODE_READ)
> >               pipe->readers--;
> >       if (file->f_mode & FMODE_WRITE)
> > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> >               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> >               kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> >       }
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       put_pipe_info(inode, pipe);
> >       return 0;
> > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> >       struct pipe_inode_info *pipe = filp->private_data;
> >       int retval = 0;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >       if (filp->f_mode & FMODE_READ)
> >               retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> >       if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> >                       /* this can happen only if on == T */
> >                       fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> >       }
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return retval;
> >  }
> >
> > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> >               pipe->nr_accounted = pipe_bufs;
> >               pipe->user = user;
> >               mutex_init(&pipe->mutex);
> > +             lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> >               return pipe;
> >       }
> >
> > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       filp->private_data = pipe;
> >       /* OK, we have a pipe and it's pinned down */
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       /* We can only do regular read/write on fifos */
> >       stream_open(inode, filp);
> > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       }
> >
> >       /* Ok! */
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return 0;
> >
> >  err_rd:
> > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> >       goto err;
> >
> >  err:
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >
> >       put_pipe_info(inode, pipe);
> >       return ret;
> > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> >       if (!pipe)
> >               return -EBADF;
> >
> > -     __pipe_lock(pipe);
> > +     mutex_lock(&pipe->mutex);
> >
> >       switch (cmd) {
> >       case F_SETPIPE_SZ:
> > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> >               break;
> >       }
> >
> > -     __pipe_unlock(pipe);
> > +     mutex_unlock(&pipe->mutex);
> >       return ret;
> >  }
> >
> > --
> > 2.43.0
> >
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
>
  
Kent Overstreet Feb. 2, 2024, 12:47 p.m. UTC | #3
On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> > 
> > the replacement is to define lock ordering with a proper comparison
> > function.
> > 
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> > 
> > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> 
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?

It's one of the easier conversions to do, and ideally /all/ users of
subclasses would go away.

Start with the easier ones, figure out those patterns, then the
harder...
  
Jan Kara Feb. 5, 2024, 9:53 a.m. UTC | #4
On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> >
> >                                                                 Honza
> 
> Hi,
> 
> "Numbers talk - Bullshit walks." (Linus Torvalds)
> 
> In things of pipes - I normally benchmark like this (example):
> 
> root# cat /dev/sdc | pipebench > /dev/null
> 
> Do you have numbers for your patch-series?

Sedat AFAIU this patch is not about performance at all but rather about
lockdep instrumentation... But maybe I'm missing your point?

								Honza
  
Sedat Dilek Feb. 5, 2024, 9:59 a.m. UTC | #5
On Mon, Feb 5, 2024 at 10:54 AM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > > ordering, but we cannot device a total ordering on an unbounded number
> > > > of elements with only a few subclasses.
> > > >
> > > > the replacement is to define lock ordering with a proper comparison
> > > > function.
> > > >
> > > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > > much changes here.
> > > >
> > > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > Cc: Jan Kara <jack@suse.cz>
> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > >
> > > I had to digest for a while what this new lockdep lock ordering feature is
> > > about. I have one pending question - what is the motivation of this
> > > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > > annotations on pipe->mutex because there are always only two subclasses?
> > >
> > >                                                                 Honza
> >
> > Hi,
> >
> > "Numbers talk - Bullshit walks." (Linus Torvalds)
> >
> > In things of pipes - I normally benchmark like this (example):
> >
> > root# cat /dev/sdc | pipebench > /dev/null
> >
> > Do you have numbers for your patch-series?
>
> Sedat AFAIU this patch is not about performance at all but rather about
> lockdep instrumentation... But maybe I'm missing your point?
>

Sorry, I missed the point, Jan.

-Sedat-
  
Jan Kara Feb. 5, 2024, 10:09 a.m. UTC | #6
On Fri 02-02-24 07:47:50, Kent Overstreet wrote:
> On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > > 
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > > 
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > > 
> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > 
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> 
> It's one of the easier conversions to do, and ideally /all/ users of
> subclasses would go away.
> 
> Start with the easier ones, figure out those patterns, then the
> harder...

I see, thanks for explanation. So in the pipes case I actually like that
the patch makes the locking less obfuscated with lockdep details (to which
I'm mostly used to but still ;)). So feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

for this patch. I'm not 100% convinced it will be always possible to
replace subclasses with the new ordering mechanism but I guess time will
show.

								Honza
  

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@  static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  */
 
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r)		((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+			    const struct lockdep_map *b)
 {
-	if (pipe->files)
-		mutex_lock_nested(&pipe->mutex, subclass);
+	return cmp_int((unsigned long) a, (unsigned long) b);
 }
+#endif
 
 void pipe_lock(struct pipe_inode_info *pipe)
 {
-	/*
-	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
-	 */
-	pipe_lock_nested(pipe, I_MUTEX_PARENT);
+	if (pipe->files)
+		mutex_lock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_lock);
 
@@ -98,28 +100,16 @@  void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
 	BUG_ON(pipe1 == pipe2);
 
-	if (pipe1 < pipe2) {
-		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
-	} else {
-		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
-	}
+	if (pipe1 > pipe2)
+		swap(pipe1, pipe2);
+
+	pipe_lock(pipe1);
+	pipe_lock(pipe2);
 }
 
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			ret = -EAGAIN;
 			break;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		/*
 		 * We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 		wake_next_reader = true;
 	}
 	if (pipe_empty(pipe->head, pipe->tail))
 		wake_next_reader = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	if (was_full)
 		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		 * after waiting we need to re-check whether the pipe
 		 * become empty while we dropped the lock.
 		 */
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		if (was_empty)
 			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_empty = pipe_empty(pipe->head, pipe->tail);
 		wake_next_writer = true;
 	}
 out:
 	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 		wake_next_writer = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	/*
 	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@  static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case FIONREAD:
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		count = 0;
 		head = pipe->head;
 		tail = pipe->tail;
@@ -639,16 +629,16 @@  static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			count += pipe->bufs[tail & mask].len;
 			tail++;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		return put_user(count, (int __user *)arg);
 
 #ifdef CONFIG_WATCH_QUEUE
 	case IOC_WATCH_QUEUE_SET_SIZE: {
 		int ret;
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		ret = watch_queue_set_size(pipe, arg);
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		return ret;
 	}
 
@@ -734,7 +724,7 @@  pipe_release(struct inode *inode, struct file *file)
 {
 	struct pipe_inode_info *pipe = file->private_data;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
 	if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@  pipe_release(struct inode *inode, struct file *file)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return 0;
@@ -759,7 +749,7 @@  pipe_fasync(int fd, struct file *filp, int on)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@  pipe_fasync(int fd, struct file *filp, int on)
 			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return retval;
 }
 
@@ -834,6 +824,7 @@  struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
+		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
 		return pipe;
 	}
 
@@ -1144,7 +1135,7 @@  static int fifo_open(struct inode *inode, struct file *filp)
 	filp->private_data = pipe;
 	/* OK, we have a pipe and it's pinned down */
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/* We can only do regular read/write on fifos */
 	stream_open(inode, filp);
@@ -1214,7 +1205,7 @@  static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return 0;
 
 err_rd:
@@ -1230,7 +1221,7 @@  static int fifo_open(struct inode *inode, struct file *filp)
 	goto err;
 
 err:
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return ret;
@@ -1411,7 +1402,7 @@  long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 	if (!pipe)
 		return -EBADF;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	switch (cmd) {
 	case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@  long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 		break;
 	}
 
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return ret;
 }