diff mbox series

umh: fix UAF when the process is being killed

Message ID	20221115140233.21981-1-schspa@gmail.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: Schspa Shi <schspa@gmail.com> To: mcgrof@kernel.org Cc: linux-kernel@vger.kernel.org, Schspa Shi <schspa@gmail.com>, syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com, syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com, syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com Subject: [PATCH] umh: fix UAF when the process is being killed Date: Tue, 15 Nov 2022 22:02:33 +0800 Message-Id: <20221115140233.21981-1-schspa@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	umh: fix UAF when the process is being killed \| umh: fix UAF when the process is being killed

Commit Message

Schspa Shi Nov. 15, 2022, 2:02 p.m. UTC

  When the process is killed, wait_for_completion_state will return with
-ERESTARTSYS, and the completion variable in the stack will be freed.
If the user-mode thread is complete at the same time, there will be a UAF.

Please refer to the following scenarios.
            T1                                  T2
------------------------------------------------------------------
call_usermodehelper_exec
                                   call_usermodehelper_exec_async
                                   << do something >>
                                   umh_complete(sub_info);
                                   comp = xchg(&sub_info->complete, NULL);
                                   /* we got the completion */
                                   << context switch >>

    << Being killed >>
	retval = wait_for_completion_state(sub_info->complete, state);
	if (!retval)
		goto wait_done;

	if (wait & UMH_KILLABLE) {
		/* umh_complete() will see NULL and free sub_info */
		if (xchg(&sub_info->complete, NULL))
			goto unlock;
        << we can't got the completion >>
	}
	....
unlock:
	helper_unlock();
	return retval;
}

/**
 * the completion variable in stack is end of life cycle.
 * and maybe freed due to process is recycled.
 */
                                   --------UAF here----------
                                   if (comp)
                                       complete(comp);

To fix it, we can put the completion variable in the subprocess_info
variable.

Reported-by: syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com
Reported-by: syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com
Reported-by: syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com

Signed-off-by: Schspa Shi <schspa@gmail.com>
---
 include/linux/umh.h | 1 +
 kernel/umh.c        | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

Comments

Schspa Shi Dec. 5, 2022, 11:38 a.m. UTC | #1

Schspa Shi <schspa@gmail.com> writes:

> When the process is killed, wait_for_completion_state will return with
> -ERESTARTSYS, and the completion variable in the stack will be freed.
> If the user-mode thread is complete at the same time, there will be a UAF.
>
> Please refer to the following scenarios.
>             T1                                  T2
> ------------------------------------------------------------------
> call_usermodehelper_exec
>                                    call_usermodehelper_exec_async
>                                    << do something >>
>                                    umh_complete(sub_info);
>                                    comp = xchg(&sub_info->complete, NULL);
>                                    /* we got the completion */
>                                    << context switch >>
>
>     << Being killed >>
> 	retval = wait_for_completion_state(sub_info->complete, state);
> 	if (!retval)
> 		goto wait_done;
>
> 	if (wait & UMH_KILLABLE) {
> 		/* umh_complete() will see NULL and free sub_info */
> 		if (xchg(&sub_info->complete, NULL))
> 			goto unlock;
>         << we can't got the completion >>
> 	}
> 	....
> unlock:
> 	helper_unlock();
> 	return retval;
> }
>
> /**
>  * the completion variable in stack is end of life cycle.
>  * and maybe freed due to process is recycled.
>  */
>                                    --------UAF here----------
>                                    if (comp)
>                                        complete(comp);
>
> To fix it, we can put the completion variable in the subprocess_info
> variable.
>
> Reported-by: syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com
> Reported-by: syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com
> Reported-by: syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com
>
> Signed-off-by: Schspa Shi <schspa@gmail.com>
> ---
>  include/linux/umh.h | 1 +
>  kernel/umh.c        | 6 +++---
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/umh.h b/include/linux/umh.h
> index 5d1f6129b847..801f7efbc825 100644
> --- a/include/linux/umh.h
> +++ b/include/linux/umh.h
> @@ -20,6 +20,7 @@ struct file;
>  struct subprocess_info {
>  	struct work_struct work;
>  	struct completion *complete;
> +	struct completion done;
>  	const char *path;
>  	char **argv;
>  	char **envp;
> diff --git a/kernel/umh.c b/kernel/umh.c
> index 850631518665..3ed39956c777 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -380,6 +380,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
>  	sub_info->cleanup = cleanup;
>  	sub_info->init = init;
>  	sub_info->data = data;
> +	init_completion(&sub_info->done);
>    out:
>  	return sub_info;
>  }
> @@ -405,7 +406,6 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
>  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>  {
>  	unsigned int state = TASK_UNINTERRUPTIBLE;
> -	DECLARE_COMPLETION_ONSTACK(done);
>  	int retval = 0;
>  
>  	if (!sub_info->path) {
> @@ -431,7 +431,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>  	 * This makes it possible to use umh_complete to free
>  	 * the data structure in case of UMH_NO_WAIT.
>  	 */
> -	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
> +	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &sub_info->done;
>  	sub_info->wait = wait;
>  
>  	queue_work(system_unbound_wq, &sub_info->work);
> @@ -444,7 +444,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>  	if (wait & UMH_FREEZABLE)
>  		state |= TASK_FREEZABLE;
>  
> -	retval = wait_for_completion_state(&done, state);
> +	retval = wait_for_completion_state(sub_info->complete, state);
>  	if (!retval)
>  		goto wait_done;

Hi Luis Chamberlain:

Could you help to review this patch? I'm not sure why we define the
amount of completion here on the stack. But this UAF can be fixed by
moving the completion variable to the heap.

Luis Chamberlain Dec. 12, 2022, 5:10 a.m. UTC | #2

On Mon, Dec 05, 2022 at 07:38:21PM +0800, Schspa Shi wrote:
> 
> Schspa Shi <schspa@gmail.com> writes:
> 
> > When the process is killed, wait_for_completion_state will return with
> > -ERESTARTSYS, and the completion variable in the stack will be freed.

There is no free'ing here, it's just not availabel anymore, which is
different.

> > If the user-mode thread is complete at the same time, there will be a UAF.
> >
> > Please refer to the following scenarios.
> >             T1                                  T2
> > ------------------------------------------------------------------
> > call_usermodehelper_exec
> >                                    call_usermodehelper_exec_async
> >                                    << do something >>
> >                                    umh_complete(sub_info);
> >                                    comp = xchg(&sub_info->complete, NULL);
> >                                    /* we got the completion */
> >                                    << context switch >>
> >
> >     << Being killed >>
> > 	retval = wait_for_completion_state(sub_info->complete, state);
> > 	if (!retval)
> > 		goto wait_done;
> >
> > 	if (wait & UMH_KILLABLE) {
> > 		/* umh_complete() will see NULL and free sub_info */
> > 		if (xchg(&sub_info->complete, NULL))
> > 			goto unlock;
> >         << we can't got the completion >>

I'm sorry I don't understand what you tried to say here, we can't got?

> > 	}
> > 	....
> > unlock:
> > 	helper_unlock();
> > 	return retval;
> > }
> >
> > /**
> >  * the completion variable in stack is end of life cycle.
> >  * and maybe freed due to process is recycled.
> >  */
> >                                    --------UAF here----------
> >                                    if (comp)
> >                                        complete(comp);
> >
> > To fix it, we can put the completion variable in the subprocess_info
> > variable.
> >
> > Reported-by: syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com
> > Reported-by: syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com
> > Reported-by: syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com
> >
> > Signed-off-by: Schspa Shi <schspa@gmail.com>
> > ---
> >  include/linux/umh.h | 1 +
> >  kernel/umh.c        | 6 +++---
> >  2 files changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/umh.h b/include/linux/umh.h
> > index 5d1f6129b847..801f7efbc825 100644
> > --- a/include/linux/umh.h
> > +++ b/include/linux/umh.h
> > @@ -20,6 +20,7 @@ struct file;
> >  struct subprocess_info {
> >  	struct work_struct work;
> >  	struct completion *complete;
> > +	struct completion done;
> >  	const char *path;
> >  	char **argv;
> >  	char **envp;
> > diff --git a/kernel/umh.c b/kernel/umh.c
> > index 850631518665..3ed39956c777 100644
> > --- a/kernel/umh.c
> > +++ b/kernel/umh.c
> > @@ -380,6 +380,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
> >  	sub_info->cleanup = cleanup;
> >  	sub_info->init = init;
> >  	sub_info->data = data;
> > +	init_completion(&sub_info->done);
> >    out:
> >  	return sub_info;
> >  }
> > @@ -405,7 +406,6 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
> >  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> >  {
> >  	unsigned int state = TASK_UNINTERRUPTIBLE;
> > -	DECLARE_COMPLETION_ONSTACK(done);
> >  	int retval = 0;
> >  
> >  	if (!sub_info->path) {
> > @@ -431,7 +431,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> >  	 * This makes it possible to use umh_complete to free
> >  	 * the data structure in case of UMH_NO_WAIT.
> >  	 */
> > -	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
> > +	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &sub_info->done;
> >  	sub_info->wait = wait;
> >  
> >  	queue_work(system_unbound_wq, &sub_info->work);
> > @@ -444,7 +444,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> >  	if (wait & UMH_FREEZABLE)
> >  		state |= TASK_FREEZABLE;
> >  
> > -	retval = wait_for_completion_state(&done, state);
> > +	retval = wait_for_completion_state(sub_info->complete, state);
> >  	if (!retval)
> >  		goto wait_done;
> 
> Hi Luis Chamberlain:
> 
> Could you help to review this patch? I'm not sure why we define the
> amount of completion here on the stack. But this UAF can be fixed by
> moving the completion variable to the heap.

It would seem to me that if this is an issue other areas would have
similar races as well, so I was hard pressed about the approach / fix.

Wouldn't something like this be a bit more explicit about ensuring
we don't let the work item race beyond?

diff --git a/kernel/umh.c b/kernel/umh.c
index 850631518665..55fc698115a7 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -447,6 +447,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 	retval = wait_for_completion_state(&done, state);
 	if (!retval)
 		goto wait_done;
+	else if (retval == -ERESTARTSYS)
+		cancel_work_sync(&sub_info->work);
 
 	if (wait & UMH_KILLABLE) {
 		/* umh_complete() will see NULL and free sub_info */

Schspa Shi Dec. 12, 2022, 11:04 a.m. UTC | #3

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Mon, Dec 05, 2022 at 07:38:21PM +0800, Schspa Shi wrote:
>> 
>> Schspa Shi <schspa@gmail.com> writes:
>> 
>> > When the process is killed, wait_for_completion_state will return with
>> > -ERESTARTSYS, and the completion variable in the stack will be freed.
>
> There is no free'ing here, it's just not availabel anymore, which is
> different.
>

No, the whole thread stack will be freed when the process died. There
will be some cases where it will be released. It will be more accurate
to modify it to be unavailable.

>> > If the user-mode thread is complete at the same time, there will be a UAF.
>> >
>> > Please refer to the following scenarios.
>> >             T1                                  T2
>> > ------------------------------------------------------------------
>> > call_usermodehelper_exec
>> >                                    call_usermodehelper_exec_async
>> >                                    << do something >>
>> >                                    umh_complete(sub_info);
>> >                                    comp = xchg(&sub_info->complete, NULL);
>> >                                    /* we got the completion */
>> >                                    << context switch >>

The sub_info->complete will be set to NULL.

>> >
>> >     << Being killed >>
>> > 	retval = wait_for_completion_state(sub_info->complete, state);
>> > 	if (!retval)
>> > 		goto wait_done;
>> >
>> > 	if (wait & UMH_KILLABLE) {
>> > 		/* umh_complete() will see NULL and free sub_info */
>> > 		if (xchg(&sub_info->complete, NULL))
>> > 			goto unlock;
>> >         << we can't got the completion >>
>
> I'm sorry I don't understand what you tried to say here, we can't got?
>

In this scenario, the sub_info->complete will be NULL, at the
call_usermodehelper_exec_async, and we will go to the unlock branch here.

>> > 	}
>> > 	....
>> > unlock:
>> > 	helper_unlock();
>> > 	return retval;
>> > }
>> >
>> > /**
>> >  * the completion variable in stack is end of life cycle.
>> >  * and maybe freed due to process is recycled.
>> >  */
>> >                                    --------UAF here----------
>> >                                    if (comp)
>> >                                        complete(comp);
>> >
>> > To fix it, we can put the completion variable in the subprocess_info
>> > variable.
>> >
>> > Reported-by: syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com
>> > Reported-by: syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com
>> > Reported-by: syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com
>> >
>> > Signed-off-by: Schspa Shi <schspa@gmail.com>
>> > ---
>> >  include/linux/umh.h | 1 +
>> >  kernel/umh.c        | 6 +++---
>> >  2 files changed, 4 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/include/linux/umh.h b/include/linux/umh.h
>> > index 5d1f6129b847..801f7efbc825 100644
>> > --- a/include/linux/umh.h
>> > +++ b/include/linux/umh.h
>> > @@ -20,6 +20,7 @@ struct file;
>> >  struct subprocess_info {
>> >  	struct work_struct work;
>> >  	struct completion *complete;
>> > +	struct completion done;
>> >  	const char *path;
>> >  	char **argv;
>> >  	char **envp;
>> > diff --git a/kernel/umh.c b/kernel/umh.c
>> > index 850631518665..3ed39956c777 100644
>> > --- a/kernel/umh.c
>> > +++ b/kernel/umh.c
>> > @@ -380,6 +380,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
>> >  	sub_info->cleanup = cleanup;
>> >  	sub_info->init = init;
>> >  	sub_info->data = data;
>> > +	init_completion(&sub_info->done);
>> >    out:
>> >  	return sub_info;
>> >  }
>> > @@ -405,7 +406,6 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
>> >  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>> >  {
>> >  	unsigned int state = TASK_UNINTERRUPTIBLE;
>> > -	DECLARE_COMPLETION_ONSTACK(done);
>> >  	int retval = 0;
>> >  
>> >  	if (!sub_info->path) {
>> > @@ -431,7 +431,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>> >  	 * This makes it possible to use umh_complete to free
>> >  	 * the data structure in case of UMH_NO_WAIT.
>> >  	 */
>> > -	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
>> > +	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &sub_info->done;
>> >  	sub_info->wait = wait;
>> >  
>> >  	queue_work(system_unbound_wq, &sub_info->work);
>> > @@ -444,7 +444,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>> >  	if (wait & UMH_FREEZABLE)
>> >  		state |= TASK_FREEZABLE;
>> >  
>> > -	retval = wait_for_completion_state(&done, state);
>> > +	retval = wait_for_completion_state(sub_info->complete, state);
>> >  	if (!retval)
>> >  		goto wait_done;
>> 
>> Hi Luis Chamberlain:
>> 
>> Could you help to review this patch? I'm not sure why we define the
>> amount of completion here on the stack. But this UAF can be fixed by
>> moving the completion variable to the heap.
>
> It would seem to me that if this is an issue other areas would have
> similar races as well, so I was hard pressed about the approach / fix.
>

I think other modules will have similar bugs, but this is a limitation
on the use of the DECLARE_COMPLETION_ONSTACK macro, and it has been
specifically stated in the completion's documentation.

There is the description from completion's documentation:

Note that when using completion objects as local variables you must be
acutely aware of the short life time of the function stack: the function
must not return to a calling context until all activities (such as waiting
threads) have ceased and the completion object is completely unused.

> Wouldn't something like this be a bit more explicit about ensuring
> we don't let the work item race beyond?
>
> diff --git a/kernel/umh.c b/kernel/umh.c
> index 850631518665..55fc698115a7 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -447,6 +447,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>  	retval = wait_for_completion_state(&done, state);
>  	if (!retval)
>  		goto wait_done;
> +	else if (retval == -ERESTARTSYS)
> +		cancel_work_sync(&sub_info->work);
>

I think this modification will make UMH_KILLABLE useless, we have to
wait for this task to complete, even if it is killed.

>  	if (wait & UMH_KILLABLE) {
>  		/* umh_complete() will see NULL and free sub_info */

Schspa Shi Dec. 12, 2022, 1:38 p.m. UTC | #4

Schspa Shi <schspa@gmail.com> writes:

> Luis Chamberlain <mcgrof@kernel.org> writes:
>
>> On Mon, Dec 05, 2022 at 07:38:21PM +0800, Schspa Shi wrote:
>>> 
>>> Schspa Shi <schspa@gmail.com> writes:
>>> 
>>> > When the process is killed, wait_for_completion_state will return with
>>> > -ERESTARTSYS, and the completion variable in the stack will be freed.
>>
>> There is no free'ing here, it's just not availabel anymore, which is
>> different.
>>
>
> No, the whole thread stack will be freed when the process died. There
> will be some cases where it will be released. It will be more accurate
> to modify it to be unavailable.
>
>>> > If the user-mode thread is complete at the same time, there will be a UAF.
>>> >
>>> > Please refer to the following scenarios.
>>> >             T1                                  T2
>>> > ------------------------------------------------------------------
>>> > call_usermodehelper_exec
>>> >                                    call_usermodehelper_exec_async
>>> >                                    << do something >>
>>> >                                    umh_complete(sub_info);
>>> >                                    comp = xchg(&sub_info->complete, NULL);
>>> >                                    /* we got the completion */
>>> >                                    << context switch >>
>
> The sub_info->complete will be set to NULL.
>
>>> >
>>> >     << Being killed >>
>>> > 	retval = wait_for_completion_state(sub_info->complete, state);
>>> > 	if (!retval)
>>> > 		goto wait_done;
>>> >
>>> > 	if (wait & UMH_KILLABLE) {
>>> > 		/* umh_complete() will see NULL and free sub_info */
>>> > 		if (xchg(&sub_info->complete, NULL))
>>> > 			goto unlock;
>>> >         << we can't got the completion >>
>>
>> I'm sorry I don't understand what you tried to say here, we can't got?
>>
>
> In this scenario, the sub_info->complete will be NULL, at the
> call_usermodehelper_exec_async, and we will go to the unlock branch here.
>
>>> > 	}
>>> > 	....
>>> > unlock:
>>> > 	helper_unlock();
>>> > 	return retval;
>>> > }
>>> >
>>> > /**
>>> >  * the completion variable in stack is end of life cycle.
>>> >  * and maybe freed due to process is recycled.
>>> >  */
>>> >                                    --------UAF here----------
>>> >                                    if (comp)
>>> >                                        complete(comp);
>>> >
>>> > To fix it, we can put the completion variable in the subprocess_info
>>> > variable.
>>> >
>>> > Reported-by: syzbot+10d19d528d9755d9af22@syzkaller.appspotmail.com
>>> > Reported-by: syzbot+70d5d5d83d03db2c813d@syzkaller.appspotmail.com
>>> > Reported-by: syzbot+83cb0411d0fcf0a30fc1@syzkaller.appspotmail.com
>>> >
>>> > Signed-off-by: Schspa Shi <schspa@gmail.com>
>>> > ---
>>> >  include/linux/umh.h | 1 +
>>> >  kernel/umh.c        | 6 +++---
>>> >  2 files changed, 4 insertions(+), 3 deletions(-)
>>> >
>>> > diff --git a/include/linux/umh.h b/include/linux/umh.h
>>> > index 5d1f6129b847..801f7efbc825 100644
>>> > --- a/include/linux/umh.h
>>> > +++ b/include/linux/umh.h
>>> > @@ -20,6 +20,7 @@ struct file;
>>> >  struct subprocess_info {
>>> >  	struct work_struct work;
>>> >  	struct completion *complete;
>>> > +	struct completion done;
>>> >  	const char *path;
>>> >  	char **argv;
>>> >  	char **envp;
>>> > diff --git a/kernel/umh.c b/kernel/umh.c
>>> > index 850631518665..3ed39956c777 100644
>>> > --- a/kernel/umh.c
>>> > +++ b/kernel/umh.c
>>> > @@ -380,6 +380,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
>>> >  	sub_info->cleanup = cleanup;
>>> >  	sub_info->init = init;
>>> >  	sub_info->data = data;
>>> > +	init_completion(&sub_info->done);
>>> >    out:
>>> >  	return sub_info;
>>> >  }
>>> > @@ -405,7 +406,6 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
>>> >  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>> >  {
>>> >  	unsigned int state = TASK_UNINTERRUPTIBLE;
>>> > -	DECLARE_COMPLETION_ONSTACK(done);
>>> >  	int retval = 0;
>>> >  
>>> >  	if (!sub_info->path) {
>>> > @@ -431,7 +431,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>> >  	 * This makes it possible to use umh_complete to free
>>> >  	 * the data structure in case of UMH_NO_WAIT.
>>> >  	 */
>>> > -	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
>>> > +	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &sub_info->done;
>>> >  	sub_info->wait = wait;
>>> >  
>>> >  	queue_work(system_unbound_wq, &sub_info->work);
>>> > @@ -444,7 +444,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>> >  	if (wait & UMH_FREEZABLE)
>>> >  		state |= TASK_FREEZABLE;
>>> >  
>>> > -	retval = wait_for_completion_state(&done, state);
>>> > +	retval = wait_for_completion_state(sub_info->complete, state);
>>> >  	if (!retval)
>>> >  		goto wait_done;
>>> 
>>> Hi Luis Chamberlain:
>>> 
>>> Could you help to review this patch? I'm not sure why we define the
>>> amount of completion here on the stack. But this UAF can be fixed by
>>> moving the completion variable to the heap.
>>
>> It would seem to me that if this is an issue other areas would have
>> similar races as well, so I was hard pressed about the approach / fix.
>>
>
> I think other modules will have similar bugs, but this is a limitation
> on the use of the DECLARE_COMPLETION_ONSTACK macro, and it has been
> specifically stated in the completion's documentation.
>
> There is the description from completion's documentation:
>
> Note that when using completion objects as local variables you must be
> acutely aware of the short life time of the function stack: the function
> must not return to a calling context until all activities (such as waiting
> threads) have ceased and the completion object is completely unused.
>
>> Wouldn't something like this be a bit more explicit about ensuring
>> we don't let the work item race beyond?
>>
>> diff --git a/kernel/umh.c b/kernel/umh.c
>> index 850631518665..55fc698115a7 100644
>> --- a/kernel/umh.c
>> +++ b/kernel/umh.c
>> @@ -447,6 +447,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>  	retval = wait_for_completion_state(&done, state);
>>  	if (!retval)
>>  		goto wait_done;
>> +	else if (retval == -ERESTARTSYS)
>> +		cancel_work_sync(&sub_info->work);
>>
>
> I think this modification will make UMH_KILLABLE useless, we have to
> wait for this task to complete, even if it is killed.
>
>>  	if (wait & UMH_KILLABLE) {
>>  		/* umh_complete() will see NULL and free sub_info */

Hi Luis Chamberlain:

I checked the code from __kthread_create_on_node, and we can fix this
with the following change too.

I'd like to upload a V2 patch with the new solution if you prefer the
following way.

diff --git a/kernel/umh.c b/kernel/umh.c
index 850631518665..8023f11fcfc0 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
                /* umh_complete() will see NULL and free sub_info */
                if (xchg(&sub_info->complete, NULL))
                        goto unlock;
+               /*
+                * kthreadd (or new kernel thread) will call complete()
+                * shortly.
+                */
+               wait_for_completion(&done);
        }

 wait_done:

Luis Chamberlain Dec. 13, 2022, 11:03 p.m. UTC | #5

On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
> I'd like to upload a V2 patch with the new solution if you prefer the
> following way.
> 
> diff --git a/kernel/umh.c b/kernel/umh.c
> index 850631518665..8023f11fcfc0 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>                 /* umh_complete() will see NULL and free sub_info */
>                 if (xchg(&sub_info->complete, NULL))
>                         goto unlock;
> +               /*
> +                * kthreadd (or new kernel thread) will call complete()
> +                * shortly.
> +                */
> +               wait_for_completion(&done);
>         }

Yes much better. Did you verify it fixes the splat found by the bots?

  Luis

Schspa Shi Dec. 14, 2022, 2:28 a.m. UTC | #6

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
>> I'd like to upload a V2 patch with the new solution if you prefer the
>> following way.
>> 
>> diff --git a/kernel/umh.c b/kernel/umh.c
>> index 850631518665..8023f11fcfc0 100644
>> --- a/kernel/umh.c
>> +++ b/kernel/umh.c
>> @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>                 /* umh_complete() will see NULL and free sub_info */
>>                 if (xchg(&sub_info->complete, NULL))
>>                         goto unlock;
>> +               /*
>> +                * kthreadd (or new kernel thread) will call complete()
>> +                * shortly.
>> +                */
>> +               wait_for_completion(&done);
>>         }
>
> Yes much better. Did you verify it fixes the splat found by the bots?
>

Yes, it will fix it.

>   Luis

Luis Chamberlain Dec. 14, 2022, 7:57 p.m. UTC | #7

Peter, Ingo, Steven would like you're review.

On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
> > I'd like to upload a V2 patch with the new solution if you prefer the
> > following way.
> > 
> > diff --git a/kernel/umh.c b/kernel/umh.c
> > index 850631518665..8023f11fcfc0 100644
> > --- a/kernel/umh.c
> > +++ b/kernel/umh.c
> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> >                 /* umh_complete() will see NULL and free sub_info */
> >                 if (xchg(&sub_info->complete, NULL))
> >                         goto unlock;
> > +               /*
> > +                * kthreadd (or new kernel thread) will call complete()
> > +                * shortly.
> > +                */
> > +               wait_for_completion(&done);
> >         }
> 
> Yes much better. Did you verify it fixes the splat found by the bots?

Wait, I'm not sure yet why this would fix it... I first started thinking
that this may be a good example of a Coccinelle SmPL rule, something like:

	DECLARE_COMPLETION_ONSTACK(done);
	foo *foo;
	...
	foo->completion = &done;
	...
	queue_work(system_unbound_wq, &foo->work);
	....
	ret = wait_for_completion_state(&done, state);
	...
	if (!ret)
		S
	...
	+wait_for_completion(&done);

But that is pretty complex, and while it may be useful to know how many
patterns we have like this, it begs the question if generalizing this
inside the callers is best for -ERESTARTSYS condition is best. What
do folks think?

The rationale here is that if you queue stuff and give access to the
completion variable but its on-stack obviously you can end up with the
queued stuff complete() on a on-stack variable. The issue seems to
be that wait_for_completion_state() for -ERESTARTSYS still means
that the already scheduled queue'd work is *about* to run and
the process with the completion on-stack completed. So we race with
the end of the routine and the completion on-stack.

It makes me wonder if wait_for_completion() above really is doing
something more, if it is just helping with timing and is still error
prone.

The queued work will try the the completion as follows:

static void umh_complete(struct subprocess_info *sub_info)
{
	struct completion *comp = xchg(&sub_info->complete, NULL);              
	/*
	 * See call_usermodehelper_exec(). If xchg() returns NULL
	 * we own sub_info, the UMH_KILLABLE caller has gone away
	 * or the caller used UMH_NO_WAIT.
	 */
	if (comp)
		complete(comp);
	else
		call_usermodehelper_freeinfo(sub_info);
}

So the race is getting -ERESTARTSYS on the process with completion
on-stack and the above running complete(comp). Why would sprinkling
wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
fix this UAF?
												    }   
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index d57a5c1c1cd9..aa7031faca04 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -205,8 +205,10 @@ int __sched wait_for_completion_interruptible(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
 
-	if (t == -ERESTARTSYS)
+	if (t == -ERESTARTSYS) {
+		wait_for_completion(x);
 		return t;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible);
@@ -243,8 +245,10 @@ int __sched wait_for_completion_killable(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
 
-	if (t == -ERESTARTSYS)
+	if (t == -ERESTARTSYS) {
+		wait_for_completion(x);
 		return t;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(wait_for_completion_killable);
@@ -253,8 +257,10 @@ int __sched wait_for_completion_state(struct completion *x, unsigned int state)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
 
-	if (t == -ERESTARTSYS)
+	if (t == -ERESTARTSYS) {
+		wait_for_completion(x);
 		return t;
+	}
 	return 0;
 }
 EXPORT_SYMBOL(wait_for_completion_state);

Schspa Shi Dec. 15, 2022, 6:16 a.m. UTC | #8

Luis Chamberlain <mcgrof@kernel.org> writes:

> Peter, Ingo, Steven would like you're review.
>
> On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
>> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
>> > I'd like to upload a V2 patch with the new solution if you prefer the
>> > following way.
>> > 
>> > diff --git a/kernel/umh.c b/kernel/umh.c
>> > index 850631518665..8023f11fcfc0 100644
>> > --- a/kernel/umh.c
>> > +++ b/kernel/umh.c
>> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>> >                 /* umh_complete() will see NULL and free sub_info */
>> >                 if (xchg(&sub_info->complete, NULL))
>> >                         goto unlock;
>> > +               /*
>> > +                * kthreadd (or new kernel thread) will call complete()
>> > +                * shortly.
>> > +                */
>> > +               wait_for_completion(&done);
>> >         }
>> 
>> Yes much better. Did you verify it fixes the splat found by the bots?
>
> Wait, I'm not sure yet why this would fix it... I first started thinking
> that this may be a good example of a Coccinelle SmPL rule, something like:
>
> 	DECLARE_COMPLETION_ONSTACK(done);
> 	foo *foo;
> 	...
> 	foo->completion = &done;
> 	...
> 	queue_work(system_unbound_wq, &foo->work);
> 	....
> 	ret = wait_for_completion_state(&done, state);
> 	...
> 	if (!ret)
> 		S
> 	...
> 	+wait_for_completion(&done);
>
> But that is pretty complex, and while it may be useful to know how many
> patterns we have like this, it begs the question if generalizing this
> inside the callers is best for -ERESTARTSYS condition is best. What
> do folks think?
>
> The rationale here is that if you queue stuff and give access to the
> completion variable but its on-stack obviously you can end up with the
> queued stuff complete() on a on-stack variable. The issue seems to
> be that wait_for_completion_state() for -ERESTARTSYS still means
> that the already scheduled queue'd work is *about* to run and
> the process with the completion on-stack completed. So we race with
> the end of the routine and the completion on-stack.
>
> It makes me wonder if wait_for_completion() above really is doing
> something more, if it is just helping with timing and is still error
> prone.
>
> The queued work will try the the completion as follows:
>
> static void umh_complete(struct subprocess_info *sub_info)
> {
> 	struct completion *comp = xchg(&sub_info->complete, NULL);              
> 	/*
> 	 * See call_usermodehelper_exec(). If xchg() returns NULL
> 	 * we own sub_info, the UMH_KILLABLE caller has gone away
> 	 * or the caller used UMH_NO_WAIT.
> 	 */
> 	if (comp)
> 		complete(comp);
> 	else
> 		call_usermodehelper_freeinfo(sub_info);
> }
>
> So the race is getting -ERESTARTSYS on the process with completion
> on-stack and the above running complete(comp). Why would sprinkling
> wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
> fix this UAF?

The wait_for_completion(&done) is added when xchg(&sub_info->complete,
NULL) return NULL. When it returns NULL, it means the umh_complete was
using the completion variable at the same time and will call complete
in a very short time.

Add wait_for_completion *after* wait_for_completion_state will make the
interruptible/timeout version API not working anymore.

> 												    }   
> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
> index d57a5c1c1cd9..aa7031faca04 100644
> --- a/kernel/sched/completion.c
> +++ b/kernel/sched/completion.c
> @@ -205,8 +205,10 @@ int __sched wait_for_completion_interruptible(struct completion *x)
>  {
>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
>  
> -	if (t == -ERESTARTSYS)
> +	if (t == -ERESTARTSYS) {
> +		wait_for_completion(x);
>  		return t;
> +	}
>  	return 0;
>  }
>  EXPORT_SYMBOL(wait_for_completion_interruptible);
> @@ -243,8 +245,10 @@ int __sched wait_for_completion_killable(struct completion *x)
>  {
>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
>  
> -	if (t == -ERESTARTSYS)
> +	if (t == -ERESTARTSYS) {
> +		wait_for_completion(x);
>  		return t;
> +	}
>  	return 0;
>  }
>  EXPORT_SYMBOL(wait_for_completion_killable);
> @@ -253,8 +257,10 @@ int __sched wait_for_completion_state(struct completion *x, unsigned int state)
>  {
>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
>  
> -	if (t == -ERESTARTSYS)
> +	if (t == -ERESTARTSYS) {
> +		wait_for_completion(x);
>  		return t;
> +	}
>  	return 0;
>  }
>  EXPORT_SYMBOL(wait_for_completion_state);

If we want to make it a generic fix, syntactic sugar can be added to
simplify usage for users.

Consider the following patch.

diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index d57a5c1c1cd9..67b7d02c0098 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -341,3 +341,33 @@ bool completion_done(struct completion *x)
        return true;
 }
 EXPORT_SYMBOL(completion_done);
+
+void complete_on_stack(struct completion **x)
+{
+       struct completion *comp = xchg(*x, NULL);
+
+       if (comp)
+               complete(comp);
+}
+EXPORT_SYMBOL(complete_on_stack);
+
+int __sched wait_for_completion_state_on_stack(struct completion **x,
+                                       unsigned int state)
+{
+       struct completion *comp = *x;
+       int retval;
+
+       retval = wait_for_completion_state(comp, state);
+       if (retval) {
+               if (xchg(*x, NULL))
+                       return retval;
+
+               /*
+                * complete_on_stack will call complete shortly.
+                */
+               wait_for_completion(comp);
+       }
+
+       return retval;
+}
+EXPORT_SYMBOL(wait_for_completion_state_on_stack);

Schspa Shi Dec. 22, 2022, 5:45 a.m. UTC | #9

Schspa Shi <schspa@gmail.com> writes:

> Luis Chamberlain <mcgrof@kernel.org> writes:
>
>> Peter, Ingo, Steven would like you're review.
>>
>> On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
>>> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
>>> > I'd like to upload a V2 patch with the new solution if you prefer the
>>> > following way.
>>> > 
>>> > diff --git a/kernel/umh.c b/kernel/umh.c
>>> > index 850631518665..8023f11fcfc0 100644
>>> > --- a/kernel/umh.c
>>> > +++ b/kernel/umh.c
>>> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>> >                 /* umh_complete() will see NULL and free sub_info */
>>> >                 if (xchg(&sub_info->complete, NULL))
>>> >                         goto unlock;
>>> > +               /*
>>> > +                * kthreadd (or new kernel thread) will call complete()
>>> > +                * shortly.
>>> > +                */
>>> > +               wait_for_completion(&done);
>>> >         }
>>> 
>>> Yes much better. Did you verify it fixes the splat found by the bots?
>>
>> Wait, I'm not sure yet why this would fix it... I first started thinking
>> that this may be a good example of a Coccinelle SmPL rule, something like:
>>
>> 	DECLARE_COMPLETION_ONSTACK(done);
>> 	foo *foo;
>> 	...
>> 	foo->completion = &done;
>> 	...
>> 	queue_work(system_unbound_wq, &foo->work);
>> 	....
>> 	ret = wait_for_completion_state(&done, state);
>> 	...
>> 	if (!ret)
>> 		S
>> 	...
>> 	+wait_for_completion(&done);
>>
>> But that is pretty complex, and while it may be useful to know how many
>> patterns we have like this, it begs the question if generalizing this
>> inside the callers is best for -ERESTARTSYS condition is best. What
>> do folks think?
>>
>> The rationale here is that if you queue stuff and give access to the
>> completion variable but its on-stack obviously you can end up with the
>> queued stuff complete() on a on-stack variable. The issue seems to
>> be that wait_for_completion_state() for -ERESTARTSYS still means
>> that the already scheduled queue'd work is *about* to run and
>> the process with the completion on-stack completed. So we race with
>> the end of the routine and the completion on-stack.
>>
>> It makes me wonder if wait_for_completion() above really is doing
>> something more, if it is just helping with timing and is still error
>> prone.
>>
>> The queued work will try the the completion as follows:
>>
>> static void umh_complete(struct subprocess_info *sub_info)
>> {
>> 	struct completion *comp = xchg(&sub_info->complete, NULL);              
>> 	/*
>> 	 * See call_usermodehelper_exec(). If xchg() returns NULL
>> 	 * we own sub_info, the UMH_KILLABLE caller has gone away
>> 	 * or the caller used UMH_NO_WAIT.
>> 	 */
>> 	if (comp)
>> 		complete(comp);
>> 	else
>> 		call_usermodehelper_freeinfo(sub_info);
>> }
>>
>> So the race is getting -ERESTARTSYS on the process with completion
>> on-stack and the above running complete(comp). Why would sprinkling
>> wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
>> fix this UAF?
>
> The wait_for_completion(&done) is added when xchg(&sub_info->complete,
> NULL) return NULL. When it returns NULL, it means the umh_complete was
> using the completion variable at the same time and will call complete
> in a very short time.
>
Hi Luis:

Is there any further progress on this problem? Does the above
explanation answer your doubts?

> Add wait_for_completion *after* wait_for_completion_state will make the
> interruptible/timeout version API not working anymore.
>
>> 												    }   
>> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
>> index d57a5c1c1cd9..aa7031faca04 100644
>> --- a/kernel/sched/completion.c
>> +++ b/kernel/sched/completion.c
>> @@ -205,8 +205,10 @@ int __sched wait_for_completion_interruptible(struct completion *x)
>>  {
>>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
>>  
>> -	if (t == -ERESTARTSYS)
>> +	if (t == -ERESTARTSYS) {
>> +		wait_for_completion(x);
>>  		return t;
>> +	}
>>  	return 0;
>>  }
>>  EXPORT_SYMBOL(wait_for_completion_interruptible);
>> @@ -243,8 +245,10 @@ int __sched wait_for_completion_killable(struct completion *x)
>>  {
>>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
>>  
>> -	if (t == -ERESTARTSYS)
>> +	if (t == -ERESTARTSYS) {
>> +		wait_for_completion(x);
>>  		return t;
>> +	}
>>  	return 0;
>>  }
>>  EXPORT_SYMBOL(wait_for_completion_killable);
>> @@ -253,8 +257,10 @@ int __sched wait_for_completion_state(struct completion *x, unsigned int state)
>>  {
>>  	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
>>  
>> -	if (t == -ERESTARTSYS)
>> +	if (t == -ERESTARTSYS) {
>> +		wait_for_completion(x);
>>  		return t;
>> +	}
>>  	return 0;
>>  }
>>  EXPORT_SYMBOL(wait_for_completion_state);
>
> If we want to make it a generic fix, syntactic sugar can be added to
> simplify usage for users.
>
> Consider the following patch.
>
> diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
> index d57a5c1c1cd9..67b7d02c0098 100644
> --- a/kernel/sched/completion.c
> +++ b/kernel/sched/completion.c
> @@ -341,3 +341,33 @@ bool completion_done(struct completion *x)
>         return true;
>  }
>  EXPORT_SYMBOL(completion_done);
> +
> +void complete_on_stack(struct completion **x)
> +{
> +       struct completion *comp = xchg(*x, NULL);
> +
> +       if (comp)
> +               complete(comp);
> +}
> +EXPORT_SYMBOL(complete_on_stack);
> +
> +int __sched wait_for_completion_state_on_stack(struct completion **x,
> +                                       unsigned int state)
> +{
> +       struct completion *comp = *x;
> +       int retval;
> +
> +       retval = wait_for_completion_state(comp, state);
> +       if (retval) {
> +               if (xchg(*x, NULL))
> +                       return retval;
> +
> +               /*
> +                * complete_on_stack will call complete shortly.
> +                */
> +               wait_for_completion(comp);
> +       }
> +
> +       return retval;
> +}
> +EXPORT_SYMBOL(wait_for_completion_state_on_stack);

Luis Chamberlain Dec. 22, 2022, 6:16 a.m. UTC | #10

On Thu, Dec 22, 2022 at 01:45:46PM +0800, Schspa Shi wrote:
> 
> Schspa Shi <schspa@gmail.com> writes:
> 
> > Luis Chamberlain <mcgrof@kernel.org> writes:
> >
> >> Peter, Ingo, Steven would like you're review.
> >>
> >> On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
> >>> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
> >>> > I'd like to upload a V2 patch with the new solution if you prefer the
> >>> > following way.
> >>> > 
> >>> > diff --git a/kernel/umh.c b/kernel/umh.c
> >>> > index 850631518665..8023f11fcfc0 100644
> >>> > --- a/kernel/umh.c
> >>> > +++ b/kernel/umh.c
> >>> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
> >>> >                 /* umh_complete() will see NULL and free sub_info */
> >>> >                 if (xchg(&sub_info->complete, NULL))
> >>> >                         goto unlock;
> >>> > +               /*
> >>> > +                * kthreadd (or new kernel thread) will call complete()
> >>> > +                * shortly.
> >>> > +                */
> >>> > +               wait_for_completion(&done);
> >>> >         }
> >>> 
> >>> Yes much better. Did you verify it fixes the splat found by the bots?
> >>
> >> Wait, I'm not sure yet why this would fix it... I first started thinking
> >> that this may be a good example of a Coccinelle SmPL rule, something like:
> >>
> >> 	DECLARE_COMPLETION_ONSTACK(done);
> >> 	foo *foo;
> >> 	...
> >> 	foo->completion = &done;
> >> 	...
> >> 	queue_work(system_unbound_wq, &foo->work);
> >> 	....
> >> 	ret = wait_for_completion_state(&done, state);
> >> 	...
> >> 	if (!ret)
> >> 		S
> >> 	...
> >> 	+wait_for_completion(&done);
> >>
> >> But that is pretty complex, and while it may be useful to know how many
> >> patterns we have like this, it begs the question if generalizing this
> >> inside the callers is best for -ERESTARTSYS condition is best. What
> >> do folks think?
> >>
> >> The rationale here is that if you queue stuff and give access to the
> >> completion variable but its on-stack obviously you can end up with the
> >> queued stuff complete() on a on-stack variable. The issue seems to
> >> be that wait_for_completion_state() for -ERESTARTSYS still means
> >> that the already scheduled queue'd work is *about* to run and
> >> the process with the completion on-stack completed. So we race with
> >> the end of the routine and the completion on-stack.
> >>
> >> It makes me wonder if wait_for_completion() above really is doing
> >> something more, if it is just helping with timing and is still error
> >> prone.
> >>
> >> The queued work will try the the completion as follows:
> >>
> >> static void umh_complete(struct subprocess_info *sub_info)
> >> {
> >> 	struct completion *comp = xchg(&sub_info->complete, NULL);              
> >> 	/*
> >> 	 * See call_usermodehelper_exec(). If xchg() returns NULL
> >> 	 * we own sub_info, the UMH_KILLABLE caller has gone away
> >> 	 * or the caller used UMH_NO_WAIT.
> >> 	 */
> >> 	if (comp)
> >> 		complete(comp);
> >> 	else
> >> 		call_usermodehelper_freeinfo(sub_info);
> >> }
> >>
> >> So the race is getting -ERESTARTSYS on the process with completion
> >> on-stack and the above running complete(comp). Why would sprinkling
> >> wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
> >> fix this UAF?
> >
> > The wait_for_completion(&done) is added when xchg(&sub_info->complete,
> > NULL) return NULL. When it returns NULL, it means the umh_complete was
> > using the completion variable at the same time and will call complete
> > in a very short time.
> >
> Hi Luis:
> 
> Is there any further progress on this problem? Does the above
> explanation answer your doubts?

I think it would be useful to proove your work for you to either
hunt with SmPL coccinelle a similar flaw / how rampant this issue
is and then also try to create the same UAF there and prove how
your changes fixes it.

  Luis

Schspa Shi Dec. 22, 2022, 6:50 a.m. UTC | #11

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Thu, Dec 22, 2022 at 01:45:46PM +0800, Schspa Shi wrote:
>> 
>> Schspa Shi <schspa@gmail.com> writes:
>> 
>> > Luis Chamberlain <mcgrof@kernel.org> writes:
>> >
>> >> Peter, Ingo, Steven would like you're review.
>> >>
>> >> On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
>> >>> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
>> >>> > I'd like to upload a V2 patch with the new solution if you prefer the
>> >>> > following way.
>> >>> > 
>> >>> > diff --git a/kernel/umh.c b/kernel/umh.c
>> >>> > index 850631518665..8023f11fcfc0 100644
>> >>> > --- a/kernel/umh.c
>> >>> > +++ b/kernel/umh.c
>> >>> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>> >>> >                 /* umh_complete() will see NULL and free sub_info */
>> >>> >                 if (xchg(&sub_info->complete, NULL))
>> >>> >                         goto unlock;
>> >>> > +               /*
>> >>> > +                * kthreadd (or new kernel thread) will call complete()
>> >>> > +                * shortly.
>> >>> > +                */
>> >>> > +               wait_for_completion(&done);
>> >>> >         }
>> >>> 
>> >>> Yes much better. Did you verify it fixes the splat found by the bots?
>> >>
>> >> Wait, I'm not sure yet why this would fix it... I first started thinking
>> >> that this may be a good example of a Coccinelle SmPL rule, something like:
>> >>
>> >> 	DECLARE_COMPLETION_ONSTACK(done);
>> >> 	foo *foo;
>> >> 	...
>> >> 	foo->completion = &done;
>> >> 	...
>> >> 	queue_work(system_unbound_wq, &foo->work);
>> >> 	....
>> >> 	ret = wait_for_completion_state(&done, state);
>> >> 	...
>> >> 	if (!ret)
>> >> 		S
>> >> 	...
>> >> 	+wait_for_completion(&done);
>> >>
>> >> But that is pretty complex, and while it may be useful to know how many
>> >> patterns we have like this, it begs the question if generalizing this
>> >> inside the callers is best for -ERESTARTSYS condition is best. What
>> >> do folks think?
>> >>
>> >> The rationale here is that if you queue stuff and give access to the
>> >> completion variable but its on-stack obviously you can end up with the
>> >> queued stuff complete() on a on-stack variable. The issue seems to
>> >> be that wait_for_completion_state() for -ERESTARTSYS still means
>> >> that the already scheduled queue'd work is *about* to run and
>> >> the process with the completion on-stack completed. So we race with
>> >> the end of the routine and the completion on-stack.
>> >>
>> >> It makes me wonder if wait_for_completion() above really is doing
>> >> something more, if it is just helping with timing and is still error
>> >> prone.
>> >>
>> >> The queued work will try the the completion as follows:
>> >>
>> >> static void umh_complete(struct subprocess_info *sub_info)
>> >> {
>> >> 	struct completion *comp = xchg(&sub_info->complete, NULL);              
>> >> 	/*
>> >> 	 * See call_usermodehelper_exec(). If xchg() returns NULL
>> >> 	 * we own sub_info, the UMH_KILLABLE caller has gone away
>> >> 	 * or the caller used UMH_NO_WAIT.
>> >> 	 */
>> >> 	if (comp)
>> >> 		complete(comp);
>> >> 	else
>> >> 		call_usermodehelper_freeinfo(sub_info);
>> >> }
>> >>
>> >> So the race is getting -ERESTARTSYS on the process with completion
>> >> on-stack and the above running complete(comp). Why would sprinkling
>> >> wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
>> >> fix this UAF?
>> >
>> > The wait_for_completion(&done) is added when xchg(&sub_info->complete,
>> > NULL) return NULL. When it returns NULL, it means the umh_complete was
>> > using the completion variable at the same time and will call complete
>> > in a very short time.
>> >
>> Hi Luis:
>> 
>> Is there any further progress on this problem? Does the above
>> explanation answer your doubts?
>
> I think it would be useful to proove your work for you to either
> hunt with SmPL coccinelle a similar flaw / how rampant this issue
> is and then also try to create the same UAF there and prove how
> your changes fixes it.
>

OK, but it will take some time.

>   Luis

Schspa Shi Dec. 22, 2022, 11:56 a.m. UTC | #12

Schspa Shi <schspa@gmail.com> writes:

> Luis Chamberlain <mcgrof@kernel.org> writes:
>
>> On Thu, Dec 22, 2022 at 01:45:46PM +0800, Schspa Shi wrote:
>>> 
>>> Schspa Shi <schspa@gmail.com> writes:
>>> 
>>> > Luis Chamberlain <mcgrof@kernel.org> writes:
>>> >
>>> >> Peter, Ingo, Steven would like you're review.
>>> >>
>>> >> On Tue, Dec 13, 2022 at 03:03:53PM -0800, Luis Chamberlain wrote:
>>> >>> On Mon, Dec 12, 2022 at 09:38:31PM +0800, Schspa Shi wrote:
>>> >>> > I'd like to upload a V2 patch with the new solution if you prefer the
>>> >>> > following way.
>>> >>> > 
>>> >>> > diff --git a/kernel/umh.c b/kernel/umh.c
>>> >>> > index 850631518665..8023f11fcfc0 100644
>>> >>> > --- a/kernel/umh.c
>>> >>> > +++ b/kernel/umh.c
>>> >>> > @@ -452,6 +452,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
>>> >>> >                 /* umh_complete() will see NULL and free sub_info */
>>> >>> >                 if (xchg(&sub_info->complete, NULL))
>>> >>> >                         goto unlock;
>>> >>> > +               /*
>>> >>> > +                * kthreadd (or new kernel thread) will call complete()
>>> >>> > +                * shortly.
>>> >>> > +                */
>>> >>> > +               wait_for_completion(&done);
>>> >>> >         }
>>> >>> 
>>> >>> Yes much better. Did you verify it fixes the splat found by the bots?
>>> >>
>>> >> Wait, I'm not sure yet why this would fix it... I first started thinking
>>> >> that this may be a good example of a Coccinelle SmPL rule, something like:
>>> >>
>>> >> 	DECLARE_COMPLETION_ONSTACK(done);
>>> >> 	foo *foo;
>>> >> 	...
>>> >> 	foo->completion = &done;
>>> >> 	...
>>> >> 	queue_work(system_unbound_wq, &foo->work);
>>> >> 	....
>>> >> 	ret = wait_for_completion_state(&done, state);
>>> >> 	...
>>> >> 	if (!ret)
>>> >> 		S
>>> >> 	...
>>> >> 	+wait_for_completion(&done);
>>> >>
>>> >> But that is pretty complex, and while it may be useful to know how many
>>> >> patterns we have like this, it begs the question if generalizing this
>>> >> inside the callers is best for -ERESTARTSYS condition is best. What
>>> >> do folks think?
>>> >>
>>> >> The rationale here is that if you queue stuff and give access to the
>>> >> completion variable but its on-stack obviously you can end up with the
>>> >> queued stuff complete() on a on-stack variable. The issue seems to
>>> >> be that wait_for_completion_state() for -ERESTARTSYS still means
>>> >> that the already scheduled queue'd work is *about* to run and
>>> >> the process with the completion on-stack completed. So we race with
>>> >> the end of the routine and the completion on-stack.
>>> >>
>>> >> It makes me wonder if wait_for_completion() above really is doing
>>> >> something more, if it is just helping with timing and is still error
>>> >> prone.
>>> >>
>>> >> The queued work will try the the completion as follows:
>>> >>
>>> >> static void umh_complete(struct subprocess_info *sub_info)
>>> >> {
>>> >> 	struct completion *comp = xchg(&sub_info->complete, NULL);              
>>> >> 	/*
>>> >> 	 * See call_usermodehelper_exec(). If xchg() returns NULL
>>> >> 	 * we own sub_info, the UMH_KILLABLE caller has gone away
>>> >> 	 * or the caller used UMH_NO_WAIT.
>>> >> 	 */
>>> >> 	if (comp)
>>> >> 		complete(comp);
>>> >> 	else
>>> >> 		call_usermodehelper_freeinfo(sub_info);
>>> >> }
>>> >>
>>> >> So the race is getting -ERESTARTSYS on the process with completion
>>> >> on-stack and the above running complete(comp). Why would sprinkling
>>> >> wait_for_completion(&done) *after* wait_for_completion_state(&done, state)
>>> >> fix this UAF?
>>> >
>>> > The wait_for_completion(&done) is added when xchg(&sub_info->complete,
>>> > NULL) return NULL. When it returns NULL, it means the umh_complete was
>>> > using the completion variable at the same time and will call complete
>>> > in a very short time.
>>> >
>>> Hi Luis:
>>> 
>>> Is there any further progress on this problem? Does the above
>>> explanation answer your doubts?
>>
>> I think it would be useful to proove your work for you to either
>> hunt with SmPL coccinelle a similar flaw / how rampant this issue
>> is and then also try to create the same UAF there and prove how
>> your changes fixes it.
>>
>
> OK, but it will take some time.

Hi Luis:

I have made a kernel module to prove this fix. Please check this at:
Link: https://github.com/schspa/code_snippet/tree/master/kernel_module/completion

There is both bad & ok test case in the README.org.

>
>>   Luis

Schspa Shi Dec. 22, 2022, 12:09 p.m. UTC | #13

Attaching the full test program in case anyone wants to add some
comments.

/*
 * complete-uaf.c --- UAF test for complete
 *
 * Copyright (C) 2022, Schspa Shi, all rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#define pr_fmt(fmt) "complete-uaf-test:" fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/completion.h>

struct test_work {
	struct completion *complete;
	struct pid *caller_pid;
	unsigned long delay_time;
	int id;
};

#define MAX_WAIT_TIMEOUT (50)

static atomic_t test_instance_count;

static bool use_fix = false;
module_param(use_fix, bool, 0444);
MODULE_PARM_DESC(use_fix, "Use fix");

static void mdelay_with_yield(unsigned long timeout_ms)
{
	unsigned long start = jiffies;

	do {
		yield();
	} while (jiffies_to_msecs(jiffies - start) < timeout_ms);

	return;
}

static void test_work_complete(struct test_work *workdata)
{
	struct completion *comp = xchg(&workdata->complete, NULL);

	/* Sleep for 1 millisecond to simulate preemption */
	msleep(100);
	if (comp)
		complete(comp);

	kfree(workdata);
}

static int completion_thread(void *data)
{
	struct test_work *workdata = data;

	mdelay_with_yield(workdata->delay_time);

	/* Simulate an external kill signal */
	kill_pid(workdata->caller_pid, SIGKILL, 1);

	test_work_complete(workdata);

	return 0;
}

static int complete_uaf_test_proc_show(struct seq_file *m, void *v)
{
	struct task_struct *thread;
	DECLARE_COMPLETION_ONSTACK(done);
	struct test_work *workdata;
	int retval;
	int id;

	workdata = kzalloc(sizeof(*workdata), GFP_KERNEL);
        if (!workdata) {
		return -ENOMEM;
	}

	id = atomic_inc_return(&test_instance_count);

	workdata->complete = &done;
	workdata->id = id;
	workdata->delay_time = get_random_u32() % (MAX_WAIT_TIMEOUT);
	workdata->caller_pid = get_pid(task_tgid(current));

	thread = kthread_run(completion_thread, workdata,
			"complete_uaf_test_kthread-%d", workdata->id);
	if (IS_ERR(thread)) {
		seq_printf(m, "kthread create failed with status %ld",
			PTR_ERR(thread));
		kfree(workdata);
		return PTR_ERR(thread);
	}

	retval = wait_for_completion_killable(&done);
	if (retval) {
		if (xchg(&workdata->complete, NULL))
			goto exit;

		if (use_fix) {
			wait_for_completion(&done);
		}
	}

	seq_printf(m, "test %d success\n", id);
exit:
	return 0;
}

static int __init complete_uaf_test_init(void)
{
	proc_create_single("complete_uaf_test", 0, NULL,
			complete_uaf_test_proc_show);

	return 0;
}

module_init(complete_uaf_test_init);
MODULE_AUTHOR("Schspa <schspa@gmail.com>");
MODULE_LICENSE("GPL v2");

Luis Chamberlain Dec. 23, 2022, 3:01 p.m. UTC | #14

On Thu, Dec 22, 2022 at 08:09:38PM +0800, Schspa Shi wrote:
> 
> Attaching the full test program in case anyone wants to add some
> comments.

Good stuff.

That looks like a kernel sefltest. So you can just add it as an
initial selftest for completion so lib/test_completion.c and extend
lib/Kconfig.debug for a new kconfig symbol for it, and then just add
a script on tools/testing/selftets/completion/ with a simple makefile
which references a script which just calls modprobe. You can look at
tools/testing/selftests/kmod/ for an example.

But I still think you may want an SmPL Coccinelle grammer patch to hunt
down other users with this pattern. The beneefit is that then you can
use the same Coccinelle patch to also then *fix* the issue in other
places.

The current uaf on umh is not something I'm terribly concerned to be
exploited in the wild. I don't think other use cases would be easier,
but, all this work would close the gap completely.

Thanks for doing this.

  Luis

Schspa Shi Jan. 13, 2023, 5:42 a.m. UTC | #15

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Thu, Dec 22, 2022 at 08:09:38PM +0800, Schspa Shi wrote:
>> 
>> Attaching the full test program in case anyone wants to add some
>> comments.
>
> Good stuff.
>
> That looks like a kernel sefltest. So you can just add it as an
> initial selftest for completion so lib/test_completion.c and extend
> lib/Kconfig.debug for a new kconfig symbol for it, and then just add
> a script on tools/testing/selftets/completion/ with a simple makefile
> which references a script which just calls modprobe. You can look at
> tools/testing/selftests/kmod/ for an example.

OK, but I want to know, is it enough to add only positive examples for
the test items here? Do we need a reverse example to prove that the
previous writing is wrong?

>
> But I still think you may want an SmPL Coccinelle grammer patch to hunt
> down other users with this pattern. The beneefit is that then you can
> use the same Coccinelle patch to also then *fix* the issue in other
> places.
>

Yes, I'm learning about SmPL, and I'll add this syntax patch later to
find more problems.

> The current uaf on umh is not something I'm terribly concerned to be
> exploited in the wild. I don't think other use cases would be easier,
> but, all this work would close the gap completely.
>
> Thanks for doing this.
>
>   Luis

Luis Chamberlain Jan. 24, 2023, 5:39 p.m. UTC | #16

On Fri, Jan 13, 2023 at 01:42:05PM +0800, Schspa Shi wrote:
> 
> Luis Chamberlain <mcgrof@kernel.org> writes:
> 
> > On Thu, Dec 22, 2022 at 08:09:38PM +0800, Schspa Shi wrote:
> >> 
> >> Attaching the full test program in case anyone wants to add some
> >> comments.
> >
> > Good stuff.
> >
> > That looks like a kernel sefltest. So you can just add it as an
> > initial selftest for completion so lib/test_completion.c and extend
> > lib/Kconfig.debug for a new kconfig symbol for it, and then just add
> > a script on tools/testing/selftets/completion/ with a simple makefile
> > which references a script which just calls modprobe. You can look at
> > tools/testing/selftests/kmod/ for an example.
> 
> OK, but I want to know, is it enough to add only positive examples for
> the test items here? Do we need a reverse example to prove that the
> previous writing is wrong?

That would mean adding code which would cause a UAF, perhaps useful if
disabled by default.

> > But I still think you may want an SmPL Coccinelle grammer patch to hunt
> > down other users with this pattern. The beneefit is that then you can
> > use the same Coccinelle patch to also then *fix* the issue in other
> > places.
> >
> 
> Yes, I'm learning about SmPL, and I'll add this syntax patch later to
> find more problems.

Great thanks.

  Luis

diff mbox series

Patch

diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5d1f6129b847..801f7efbc825 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -20,6 +20,7 @@  struct file;
 struct subprocess_info {
 	struct work_struct work;
 	struct completion *complete;
+	struct completion done;
 	const char *path;
 	char **argv;
 	char **envp;
diff --git a/kernel/umh.c b/kernel/umh.c
index 850631518665..3ed39956c777 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -380,6 +380,7 @@  struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
 	sub_info->cleanup = cleanup;
 	sub_info->init = init;
 	sub_info->data = data;
+	init_completion(&sub_info->done);
   out:
 	return sub_info;
 }
@@ -405,7 +406,6 @@  EXPORT_SYMBOL(call_usermodehelper_setup);
 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 {
 	unsigned int state = TASK_UNINTERRUPTIBLE;
-	DECLARE_COMPLETION_ONSTACK(done);
 	int retval = 0;
 
 	if (!sub_info->path) {
@@ -431,7 +431,7 @@  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 	 * This makes it possible to use umh_complete to free
 	 * the data structure in case of UMH_NO_WAIT.
 	 */
-	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
+	sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &sub_info->done;
 	sub_info->wait = wait;
 
 	queue_work(system_unbound_wq, &sub_info->work);
@@ -444,7 +444,7 @@  int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 	if (wait & UMH_FREEZABLE)
 		state |= TASK_FREEZABLE;
 
-	retval = wait_for_completion_state(&done, state);
+	retval = wait_for_completion_state(sub_info->complete, state);
 	if (!retval)
 		goto wait_done;