[RFC,net-next,3/4] net: add flag to indicate NAPI/GRO is running right now

Message ID 20230629152305.905962-4-aleksander.lobakin@intel.com
State New
Headers
Series net: page_pool: a couple assorted optimizations |

Commit Message

Alexander Lobakin June 29, 2023, 3:23 p.m. UTC
  Currently, there's no easy way to check if a NAPI polling cycle is
running and on which CPU, although this might come very handy in
several cases.
Commit 8c48eea3adf3 ("page_pool: allow caching from safely localized
NAPI") added napi_struct::list_owner, BUT it's set when the NAPI is
*scheduled*. `->list_owner == smp_processor_id()` doesn't mean we're
inside the corresponding polling loop.
Introduce new NAPI state flag, NAPI{,F}_STATE_RUNNING. Set it right
before calling to ->poll() and clear after all napi_gro_flush() and
gro_normal_list() are done. They are run in the same context and, in
fact, are part of the receive routine.
When `packets == budget`, napi_complete_done() is not called, so in
that case it's safe to clear the flag after ->poll() ends. Otherwise,
however, napi_complete_done() can lead to reenabling interrupts and
scheduling the NAPI already on another CPU. In that case, clear the
flag in napi_complete_done() itself.
The main usecase for the flag is as follows:

	if (test_bit(NAPI_STATE_RUNNING, &n->state) &&
	    READ_ONCE(n->list_owner) == smp_processor_id())
		/* you're inside n->poll() or the following GRO
		 * processing context
		 */

IOW, when the condition is true, feel free to use resources protected
by this NAPI, such as page_pools covered by it, percpu NAPI caches etc.
Just make sure interrupts are enabled.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 23 +++++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)
  

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b828c7a75be2..db3aea863ea9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -389,6 +389,7 @@  enum {
 	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/
 	NAPI_STATE_THREADED,		/* The poll is performed inside its own thread*/
 	NAPI_STATE_SCHED_THREADED,	/* Napi is currently scheduled in threaded mode */
+	NAPI_STATE_RUNNING,		/* This NAPI or GRO is running on ::list_owner */
 };
 
 enum {
@@ -402,6 +403,7 @@  enum {
 	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL),
 	NAPIF_STATE_THREADED		= BIT(NAPI_STATE_THREADED),
 	NAPIF_STATE_SCHED_THREADED	= BIT(NAPI_STATE_SCHED_THREADED),
+	NAPIF_STATE_RUNNING		= BIT(NAPI_STATE_RUNNING),
 };
 
 enum gro_result {
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..7f0d23c9e25e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6039,7 +6039,8 @@  bool napi_complete_done(struct napi_struct *n, int work_done)
 
 		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
 			      NAPIF_STATE_SCHED_THREADED |
-			      NAPIF_STATE_PREFER_BUSY_POLL);
+			      NAPIF_STATE_PREFER_BUSY_POLL |
+			      NAPIF_STATE_RUNNING);
 
 		/* If STATE_MISSED was set, leave STATE_SCHED set,
 		 * because we will call napi->poll() one more time.
@@ -6128,6 +6129,7 @@  static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
 	/* All we really want here is to re-enable device interrupts.
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
+	set_bit(NAPI_STATE_RUNNING, &napi->state);
 	rc = napi->poll(napi, budget);
 	/* We can't gro_normal_list() here, because napi->poll() might have
 	 * rearmed the napi (napi_complete_done()) in which case it could
@@ -6135,8 +6137,10 @@  static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
 	 */
 	trace_napi_poll(napi, rc, budget);
 	netpoll_poll_unlock(have_poll_lock);
-	if (rc == budget)
+	if (rc == budget) {
 		__busy_poll_stop(napi, skip_schedule);
+		clear_bit(NAPI_STATE_RUNNING, &napi->state);
+	}
 	local_bh_enable();
 }
 
@@ -6186,9 +6190,11 @@  void napi_busy_loop(unsigned int napi_id,
 			have_poll_lock = netpoll_poll_lock(napi);
 			napi_poll = napi->poll;
 		}
+		set_bit(NAPI_STATE_RUNNING, &napi->state);
 		work = napi_poll(napi, budget);
 		trace_napi_poll(napi, work, budget);
 		gro_normal_list(napi);
+		clear_bit(NAPI_STATE_RUNNING, &napi->state);
 count:
 		if (work > 0)
 			__NET_ADD_STATS(dev_net(napi->dev),
@@ -6457,6 +6463,7 @@  static int __napi_poll(struct napi_struct *n, bool *repoll)
 	 */
 	work = 0;
 	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+		set_bit(NAPI_STATE_RUNNING, &n->state);
 		work = n->poll(n, weight);
 		trace_napi_poll(n, work, weight);
 	}
@@ -6466,7 +6473,7 @@  static int __napi_poll(struct napi_struct *n, bool *repoll)
 				n->poll, work, weight);
 
 	if (likely(work < weight))
-		return work;
+		goto out;
 
 	/* Drivers must not modify the NAPI state if they
 	 * consume the entire weight.  In such cases this code
@@ -6475,7 +6482,7 @@  static int __napi_poll(struct napi_struct *n, bool *repoll)
 	 */
 	if (unlikely(napi_disable_pending(n))) {
 		napi_complete(n);
-		return work;
+		goto out;
 	}
 
 	/* The NAPI context has more processing work, but busy-polling
@@ -6488,7 +6495,7 @@  static int __napi_poll(struct napi_struct *n, bool *repoll)
 			 */
 			napi_schedule(n);
 		}
-		return work;
+		goto out;
 	}
 
 	if (n->gro_bitmask) {
@@ -6506,11 +6513,15 @@  static int __napi_poll(struct napi_struct *n, bool *repoll)
 	if (unlikely(!list_empty(&n->poll_list))) {
 		pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
 			     n->dev ? n->dev->name : "backlog");
-		return work;
+		goto out;
 	}
 
 	*repoll = true;
 
+out:
+	if (READ_ONCE(n->list_owner) == smp_processor_id())
+		clear_bit(NAPI_STATE_RUNNING, &n->state);
+
 	return work;
 }