[v4,02/13] locking/ww_mutex: Remove wakeups from under mutex::wait_lock

Message ID 20230601055846.2349566-3-jstultz@google.com
State New
Headers
Series Generalized Priority Inheritance via Proxy Execution v3 |

Commit Message

John Stultz June 1, 2023, 5:58 a.m. UTC
  From: Peter Zijlstra <peterz@infradead.org>

In preparation to nest mutex::wait_lock under rq::lock we need to remove
wakeups from under it.

Cc: Joel Fernandes <joelaf@google.com>
Cc: Qais Yousef <qyousef@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Youssef Esmat <youssefesmat@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: kernel-team@android.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Connor O'Brien <connoro@google.com>
Signed-off-by: John Stultz <jstultz@google.com>
---
v2:
* Move wake_q_init() as suggested by Waiman Long
---
 include/linux/ww_mutex.h  |  3 +++
 kernel/locking/mutex.c    |  8 ++++++++
 kernel/locking/ww_mutex.h | 10 ++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)
  

Comments

Dietmar Eggemann June 23, 2023, 12:21 p.m. UTC | #1
Hi John,

On 01/06/2023 07:58, John Stultz wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> In preparation to nest mutex::wait_lock under rq::lock we need to remove
> wakeups from under it.

[...]

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Connor O'Brien <connoro@google.com>
> Signed-off-by: John Stultz <jstultz@google.com>
> ---
> v2:
> * Move wake_q_init() as suggested by Waiman Long
> ---
>  include/linux/ww_mutex.h  |  3 +++
>  kernel/locking/mutex.c    |  8 ++++++++
>  kernel/locking/ww_mutex.h | 10 ++++++++--
>  3 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
> index bb763085479a..9335b2202017 100644
> --- a/include/linux/ww_mutex.h
> +++ b/include/linux/ww_mutex.h
> @@ -19,6 +19,7 @@
>  
>  #include <linux/mutex.h>
>  #include <linux/rtmutex.h>
> +#include <linux/sched/wake_q.h>
>  
>  #if defined(CONFIG_DEBUG_MUTEXES) || \
>     (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES))
> @@ -58,6 +59,7 @@ struct ww_acquire_ctx {
>  	unsigned int acquired;
>  	unsigned short wounded;
>  	unsigned short is_wait_die;
> +	struct wake_q_head wake_q;

you told me that there is already an issue in this patch even w/o PE
when running `insmod /lib/modules/test-ww_mutex.ko`.

The issue is related to Connor's version (1):

https://lkml.kernel.org/r/20221003214501.2050087-2-connoro@google.com

  struct ww_acquire_ctx {

	  struct wake_q_head wake_q;


  __mutex_lock_common()

    if (ww_ctx)
        ww_ctx_wake(ww_ctx)

          wake_up_q(&ww_ctx->wake_q);
          wake_q_init(&ww_ctx->wake_q);


Juri's version (2):

https://lkml.kernel.org/r/20181009092434.26221-3-juri.lelli@redhat.com

  __mutex_lock_common()

      DEFINE_WAKE_Q(wake_q) <-- !!!

      __ww_mutex_check_waiters(..., wake_q)

        __ww_mutex_die(..., wake_q)

          wake_q_add(wake_q, waiter->task)

      wake_up_q(&wake_q)


`insmod /lib/modules/test-ww_mutex.ko` runs fine with (2) but not with
(1) (both w/o the remaining PE patches).

So to test the PE issues we talked about already which come with `[PATCH
v4 09/13] sched: Add proxy execution` and CONFIG_PROXY_EXEC=y we need to
fix (1) or go back to (2).

I haven't found any clues why (2) was changed to (1) so far.

[...]
  
John Stultz June 24, 2023, 12:41 a.m. UTC | #2
On Fri, Jun 23, 2023 at 5:21 AM Dietmar Eggemann
<dietmar.eggemann@arm.com> wrote:
>
> Hi John,
>
> On 01/06/2023 07:58, John Stultz wrote:
> > From: Peter Zijlstra <peterz@infradead.org>
> >
> > In preparation to nest mutex::wait_lock under rq::lock we need to remove
> > wakeups from under it.
>
> [...]
>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Signed-off-by: Connor O'Brien <connoro@google.com>
> > Signed-off-by: John Stultz <jstultz@google.com>
> > ---
> > v2:
> > * Move wake_q_init() as suggested by Waiman Long
> > ---
> >  include/linux/ww_mutex.h  |  3 +++
> >  kernel/locking/mutex.c    |  8 ++++++++
> >  kernel/locking/ww_mutex.h | 10 ++++++++--
> >  3 files changed, 19 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
> > index bb763085479a..9335b2202017 100644
> > --- a/include/linux/ww_mutex.h
> > +++ b/include/linux/ww_mutex.h
> > @@ -19,6 +19,7 @@
> >
> >  #include <linux/mutex.h>
> >  #include <linux/rtmutex.h>
> > +#include <linux/sched/wake_q.h>
> >
> >  #if defined(CONFIG_DEBUG_MUTEXES) || \
> >     (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES))
> > @@ -58,6 +59,7 @@ struct ww_acquire_ctx {
> >       unsigned int acquired;
> >       unsigned short wounded;
> >       unsigned short is_wait_die;
> > +     struct wake_q_head wake_q;
>
> you told me that there is already an issue in this patch even w/o PE
> when running `insmod /lib/modules/test-ww_mutex.ko`.
>
> The issue is related to Connor's version (1):
>
> https://lkml.kernel.org/r/20221003214501.2050087-2-connoro@google.com
>
>   struct ww_acquire_ctx {
>
>           struct wake_q_head wake_q;
>
>
>   __mutex_lock_common()
>
>     if (ww_ctx)
>         ww_ctx_wake(ww_ctx)
>
>           wake_up_q(&ww_ctx->wake_q);
>           wake_q_init(&ww_ctx->wake_q);
>
>
> Juri's version (2):
>
> https://lkml.kernel.org/r/20181009092434.26221-3-juri.lelli@redhat.com
>
>   __mutex_lock_common()
>
>       DEFINE_WAKE_Q(wake_q) <-- !!!
>
>       __ww_mutex_check_waiters(..., wake_q)
>
>         __ww_mutex_die(..., wake_q)
>
>           wake_q_add(wake_q, waiter->task)
>
>       wake_up_q(&wake_q)
>
>
> `insmod /lib/modules/test-ww_mutex.ko` runs fine with (2) but not with
> (1) (both w/o the remaining PE patches).
>
> So to test the PE issues we talked about already which come with `[PATCH
> v4 09/13] sched: Add proxy execution` and CONFIG_PROXY_EXEC=y we need to
> fix (1) or go back to (2).
>
> I haven't found any clues why (2) was changed to (1) so far.

Right. I don't have context for why, but moving the wake_q to the
ww_ctx does seem to break the wake_q assumptions, and results in lost
wakeups.

In my current tree, I've switched back to Juri's older version of the
patch, but adding one fix from Connor's, and with that the patch
doesn't run into this trouble.

That said, I still am catching and debugging problems with later
patches in the series, which has required breaking up the core proxy
change into much finer grained changes so I can debug what's going
wrong.  My v5 patch set will reflect this.

thanks
-john
  

Patch

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index bb763085479a..9335b2202017 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -19,6 +19,7 @@ 
 
 #include <linux/mutex.h>
 #include <linux/rtmutex.h>
+#include <linux/sched/wake_q.h>
 
 #if defined(CONFIG_DEBUG_MUTEXES) || \
    (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES))
@@ -58,6 +59,7 @@  struct ww_acquire_ctx {
 	unsigned int acquired;
 	unsigned short wounded;
 	unsigned short is_wait_die;
+	struct wake_q_head wake_q;
 #ifdef DEBUG_WW_MUTEXES
 	unsigned int done_acquire;
 	struct ww_class *ww_class;
@@ -137,6 +139,7 @@  static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
 	ctx->acquired = 0;
 	ctx->wounded = false;
 	ctx->is_wait_die = ww_class->is_wait_die;
+	wake_q_init(&ctx->wake_q);
 #ifdef DEBUG_WW_MUTEXES
 	ctx->ww_class = ww_class;
 	ctx->done_acquire = 0;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d973fe6041bf..1582756914df 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -676,6 +676,8 @@  __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 		}
 
 		raw_spin_unlock(&lock->wait_lock);
+		if (ww_ctx)
+			ww_ctx_wake(ww_ctx);
 		schedule_preempt_disabled();
 
 		first = __mutex_waiter_is_first(lock, &waiter);
@@ -725,6 +727,8 @@  __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 		ww_mutex_lock_acquired(ww, ww_ctx);
 
 	raw_spin_unlock(&lock->wait_lock);
+	if (ww_ctx)
+		ww_ctx_wake(ww_ctx);
 	preempt_enable();
 	return 0;
 
@@ -736,6 +740,8 @@  __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 	raw_spin_unlock(&lock->wait_lock);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, ip);
+	if (ww_ctx)
+		ww_ctx_wake(ww_ctx);
 	preempt_enable();
 	return ret;
 }
@@ -946,9 +952,11 @@  static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 	if (owner & MUTEX_FLAG_HANDOFF)
 		__mutex_handoff(lock, next);
 
+	preempt_disable();
 	raw_spin_unlock(&lock->wait_lock);
 
 	wake_up_q(&wake_q);
+	preempt_enable();
 }
 
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 56f139201f24..e49ea5336473 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -161,6 +161,12 @@  static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock)
 
 #endif /* WW_RT */
 
+void ww_ctx_wake(struct ww_acquire_ctx *ww_ctx)
+{
+	wake_up_q(&ww_ctx->wake_q);
+	wake_q_init(&ww_ctx->wake_q);
+}
+
 /*
  * Wait-Die:
  *   The newer transactions are killed when:
@@ -284,7 +290,7 @@  __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
 #ifndef WW_RT
 		debug_mutex_wake_waiter(lock, waiter);
 #endif
-		wake_up_process(waiter->task);
+		wake_q_add(&ww_ctx->wake_q, waiter->task);
 	}
 
 	return true;
@@ -331,7 +337,7 @@  static bool __ww_mutex_wound(struct MUTEX *lock,
 		 * wakeup pending to re-read the wounded state.
 		 */
 		if (owner != current)
-			wake_up_process(owner);
+			wake_q_add(&ww_ctx->wake_q, owner);
 
 		return true;
 	}