diff mbox series

fs/aio: fix uaf in sys_io_cancel

Message ID	tencent_DC4C9767C786D2D2FDC64F099FAEFDEEC106@qq.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel+bounces-89730-ouuuleilei=gmail.com@vger.kernel.org designates 139.178.88.99 as permitted sender) client-ip=139.178.88.99; Message-ID: <tencent_DC4C9767C786D2D2FDC64F099FAEFDEEC106@qq.com> From: Edward Adam Davis <eadavis@qq.com> To: syzbot+b91eb2ed18f599dd3c31@syzkaller.appspotmail.com Cc: bcrl@kvack.org, brauner@kernel.org, bvanassche@acm.org, jack@suse.cz, linux-aio@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, syzkaller-bugs@googlegroups.com, viro@zeniv.linux.org.uk Subject: [PATCH] fs/aio: fix uaf in sys_io_cancel Date: Sun, 3 Mar 2024 20:21:00 +0800 In-Reply-To: <0000000000006945730612bc9173@google.com> References: <0000000000006945730612bc9173@google.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-getmail-retrieved-from-mailbox: INBOX
Series	fs/aio: fix uaf in sys_io_cancel \| fs/aio: fix uaf in sys_io_cancel

Commit Message

Edward Adam Davis March 3, 2024, 12:21 p.m. UTC

  The aio poll work aio_poll_complete_work() need to be synchronized with syscall
io_cancel(). Otherwise, when poll work executes first, syscall may access the
released aio_kiocb object.

Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
Reported-and-tested-by: syzbot+b91eb2ed18f599dd3c31@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
---
 fs/aio.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

Comments

Bart Van Assche March 4, 2024, 4:15 p.m. UTC | #1

On 3/3/24 04:21, Edward Adam Davis wrote:
> The aio poll work aio_poll_complete_work() need to be synchronized with syscall
> io_cancel(). Otherwise, when poll work executes first, syscall may access the
> released aio_kiocb object.
> 
> Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
> Reported-and-tested-by: syzbot+b91eb2ed18f599dd3c31@syzkaller.appspotmail.com
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> ---
>   fs/aio.c | 5 ++---
>   1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 28223f511931..0fed22ed9eb8 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1762,9 +1762,8 @@ static void aio_poll_complete_work(struct work_struct *work)
>   	} /* else, POLLFREE has freed the waitqueue, so we must complete */
>   	list_del_init(&iocb->ki_list);
>   	iocb->ki_res.res = mangle_poll(mask);
> -	spin_unlock_irq(&ctx->ctx_lock);
> -
>   	iocb_put(iocb);
> +	spin_unlock_irq(&ctx->ctx_lock);
>   }
>   
>   /* assumes we are called with irqs disabled */
> @@ -2198,7 +2197,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>   			break;
>   		}
>   	}
> -	spin_unlock_irq(&ctx->ctx_lock);
>   
>   	/*
>   	 * The result argument is no longer used - the io_event is always
> @@ -2206,6 +2204,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>   	 */
>   	if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
>   		aio_complete_rw(&kiocb->rw, -EINTR);
> +	spin_unlock_irq(&ctx->ctx_lock);
>   
>   	percpu_ref_put(&ctx->users);

I'm not enthusiast about the above patch because it increases the amount
of code executed with the ctx_lock held. Wouldn't something like the
untested patch below be a better solution?

Thanks,

Bart.


diff --git a/fs/aio.c b/fs/aio.c
index 28223f511931..c6fb10321e48 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2177,6 +2177,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
struct iocb __user *, iocb,
  	struct kioctx *ctx;
  	struct aio_kiocb *kiocb;
  	int ret = -EINVAL;
+	bool is_cancelled_rw = false;
  	u32 key;
  	u64 obj = (u64)(unsigned long)iocb;

@@ -2193,6 +2194,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
struct iocb __user *, iocb,
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
  		if (kiocb->ki_res.obj == obj) {
+			is_cancelled_rw = kiocb->rw.ki_flags & IOCB_AIO_RW;
  			ret = kiocb->ki_cancel(&kiocb->rw);
  			list_del_init(&kiocb->ki_list);
  			break;
@@ -2204,7 +2206,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
struct iocb __user *, iocb,
  	 * The result argument is no longer used - the io_event is always
  	 * delivered via the ring buffer.
  	 */
-	if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
+	if (ret == 0 && is_cancelled_rw)
  		aio_complete_rw(&kiocb->rw, -EINTR);

  	percpu_ref_put(&ctx->users);

Benjamin LaHaise March 4, 2024, 5:03 p.m. UTC | #2

On Mon, Mar 04, 2024 at 08:15:15AM -0800, Bart Van Assche wrote:
> On 3/3/24 04:21, Edward Adam Davis wrote:
> >The aio poll work aio_poll_complete_work() need to be synchronized with 
> >syscall
> >io_cancel(). Otherwise, when poll work executes first, syscall may access 
> >the
> >released aio_kiocb object.
> >
> >Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
> >Reported-and-tested-by: 
> >syzbot+b91eb2ed18f599dd3c31@syzkaller.appspotmail.com
> >Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> >---
> >  fs/aio.c | 5 ++---
> >  1 file changed, 2 insertions(+), 3 deletions(-)
> >
> >diff --git a/fs/aio.c b/fs/aio.c
> >index 28223f511931..0fed22ed9eb8 100644
> >--- a/fs/aio.c
> >+++ b/fs/aio.c
> >@@ -1762,9 +1762,8 @@ static void aio_poll_complete_work(struct 
> >work_struct *work)
> >  	} /* else, POLLFREE has freed the waitqueue, so we must complete */
> >  	list_del_init(&iocb->ki_list);
> >  	iocb->ki_res.res = mangle_poll(mask);
> >-	spin_unlock_irq(&ctx->ctx_lock);
> >-
> >  	iocb_put(iocb);
> >+	spin_unlock_irq(&ctx->ctx_lock);
> >  }
> >  
> >  /* assumes we are called with irqs disabled */
> >@@ -2198,7 +2197,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> >struct iocb __user *, iocb,
> >  			break;
> >  		}
> >  	}
> >-	spin_unlock_irq(&ctx->ctx_lock);
> >  
> >  	/*
> >  	 * The result argument is no longer used - the io_event is always
> >@@ -2206,6 +2204,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> >struct iocb __user *, iocb,
> >  	 */
> >  	if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
> >  		aio_complete_rw(&kiocb->rw, -EINTR);
> >+	spin_unlock_irq(&ctx->ctx_lock);
> >  
> >  	percpu_ref_put(&ctx->users);

This is just so wrong there aren't even words to describe it.  I
recommending reverting all of Bart's patches since they were not reviewed
by anyone with a sufficient level of familiarity with fs/aio.c to get it
right.

		-ben

> I'm not enthusiast about the above patch because it increases the amount
> of code executed with the ctx_lock held. Wouldn't something like the
> untested patch below be a better solution?
> 
> Thanks,
> 
> Bart.
> 
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 28223f511931..c6fb10321e48 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -2177,6 +2177,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> struct iocb __user *, iocb,
>  	struct kioctx *ctx;
>  	struct aio_kiocb *kiocb;
>  	int ret = -EINVAL;
> +	bool is_cancelled_rw = false;
>  	u32 key;
>  	u64 obj = (u64)(unsigned long)iocb;
> 
> @@ -2193,6 +2194,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> struct iocb __user *, iocb,
>  	/* TODO: use a hash or array, this sucks. */
>  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
>  		if (kiocb->ki_res.obj == obj) {
> +			is_cancelled_rw = kiocb->rw.ki_flags & IOCB_AIO_RW;
>  			ret = kiocb->ki_cancel(&kiocb->rw);
>  			list_del_init(&kiocb->ki_list);
>  			break;
> @@ -2204,7 +2206,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> struct iocb __user *, iocb,
>  	 * The result argument is no longer used - the io_event is always
>  	 * delivered via the ring buffer.
>  	 */
> -	if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
> +	if (ret == 0 && is_cancelled_rw)
>  		aio_complete_rw(&kiocb->rw, -EINTR);
> 
>  	percpu_ref_put(&ctx->users);
> 
>

Bart Van Assche March 4, 2024, 5:15 p.m. UTC | #3

On 3/4/24 09:03, Benjamin LaHaise wrote:
> This is just so wrong there aren't even words to describe it.  I
> recommending reverting all of Bart's patches since they were not reviewed
> by anyone with a sufficient level of familiarity with fs/aio.c to get it
> right.

Where were you while my patches were posted for review on the fsdevel
mailing list and before these were sent to Linus?

A revert request should include a detailed explanation of why the revert
should happen. Just claiming that something is wrong is not sufficient
to motivate a revert.

Bart.

Benjamin LaHaise March 4, 2024, 5:31 p.m. UTC | #4

On Mon, Mar 04, 2024 at 09:15:47AM -0800, Bart Van Assche wrote:
> On 3/4/24 09:03, Benjamin LaHaise wrote:
> >This is just so wrong there aren't even words to describe it.  I
> >recommending reverting all of Bart's patches since they were not reviewed
> >by anyone with a sufficient level of familiarity with fs/aio.c to get it
> >right.
> 
> Where were you while my patches were posted for review on the fsdevel
> mailing list and before these were sent to Linus?

That doesn't negate the need for someone with experience in a given
subsystem to sign off on the patches.  There are at least 2 other people I
would trust to properly review these patches, and none of their signoffs
are present either.

> A revert request should include a detailed explanation of why the revert
> should happen. Just claiming that something is wrong is not sufficient
> to motivate a revert.

A revert is justified when a series of patches is buggy and had
insufficient review prior to merging.  Mainline is not the place to be
testing half baked changes.  Getting cancellation semantics and locking
right is *VERY HARD*, and the results of getting it wrong are very subtle
and user exploitable.  Using the "a kernel warning hit" approach for work
on cancellation is very much a sign that the patches were half baked.

What testing did you do on your patch series?  The commit messages show
nothing of interest regarding testing.  Why are you touching the kiocb
after ownership has already been passed on to another entity?  This is as
bad as touching memory that has been freed.  You clearly do not understand
how that code works.

		-ben

Bart Van Assche March 4, 2024, 5:40 p.m. UTC | #5

On 3/4/24 09:31, Benjamin LaHaise wrote:
> A revert is justified when a series of patches is buggy and had
> insufficient review prior to merging.

That's not how Linux kernel development works. If a bug can get fixed
easily, a fix is preferred instead of reverting + reapplying a patch.

> Using the "a kernel warning hit" approach for work on cancellation is
> very much a sign that the patches were half baked.
Is there perhaps a misunderstanding? My patches fix a kernel warning and
did not introduce any new WARN*() statements.

> Why are you touching the kiocb after ownership has already been
> passed on to another entity?
Touching the kiocb after ownership has been passed is the result of an
oversight. Whether or not kiocb->ki_cancel() transfers ownership depends
on the I/O type. The use-after-free was not introduced on purpose.

Bart.

Benjamin LaHaise March 4, 2024, 5:47 p.m. UTC | #6

On Mon, Mar 04, 2024 at 09:40:35AM -0800, Bart Van Assche wrote:
> On 3/4/24 09:31, Benjamin LaHaise wrote:
> >A revert is justified when a series of patches is buggy and had
> >insufficient review prior to merging.
> 
> That's not how Linux kernel development works. If a bug can get fixed
> easily, a fix is preferred instead of reverting + reapplying a patch.

Your original "fix" is not right, and it wasn't properly tested.  Commit
54cbc058d86beca3515c994039b5c0f0a34f53dd needs to be reverted.

> >Using the "a kernel warning hit" approach for work on cancellation is
> >very much a sign that the patches were half baked.
> Is there perhaps a misunderstanding? My patches fix a kernel warning and
> did not introduce any new WARN*() statements.

The change that introduced that callback by you was incorrect and should
be reverted.

> >Why are you touching the kiocb after ownership has already been
> >passed on to another entity?
> Touching the kiocb after ownership has been passed is the result of an
> oversight. Whether or not kiocb->ki_cancel() transfers ownership depends
> on the I/O type. The use-after-free was not introduced on purpose.

Your fix is still incorrect.  You're still touching memory that you don't
own.  The event should be generated via the ->ki_cancel method, not in the
io_cancel() syscall.

		-ben

> Bart.
> 
>

diff mbox series

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 28223f511931..0fed22ed9eb8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1762,9 +1762,8 @@  static void aio_poll_complete_work(struct work_struct *work)
 	} /* else, POLLFREE has freed the waitqueue, so we must complete */
 	list_del_init(&iocb->ki_list);
 	iocb->ki_res.res = mangle_poll(mask);
-	spin_unlock_irq(&ctx->ctx_lock);
-
 	iocb_put(iocb);
+	spin_unlock_irq(&ctx->ctx_lock);
 }
 
 /* assumes we are called with irqs disabled */
@@ -2198,7 +2197,6 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 			break;
 		}
 	}
-	spin_unlock_irq(&ctx->ctx_lock);
 
 	/*
 	 * The result argument is no longer used - the io_event is always
@@ -2206,6 +2204,7 @@  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	 */
 	if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
 		aio_complete_rw(&kiocb->rw, -EINTR);
+	spin_unlock_irq(&ctx->ctx_lock);
 
 	percpu_ref_put(&ctx->users);