[net-next,v1,02/16] net: page_pool: create hooks for custom page providers

Message ID 20231208005250.2910004-3-almasrymina@google.com
State New
Headers
Series Device Memory TCP |

Commit Message

Mina Almasry Dec. 8, 2023, 12:52 a.m. UTC
  From: Jakub Kicinski <kuba@kernel.org>

The page providers which try to reuse the same pages will
need to hold onto the ref, even if page gets released from
the pool - as in releasing the page from the pp just transfers
the "ownership" reference from pp to the provider, and provider
will wait for other references to be gone before feeding this
page back into the pool.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mina Almasry <almasrymina@google.com>

---

This is implemented by Jakub in his RFC:
https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com/T/

I take no credit for the idea or implementation; I only added minor
edits to make this workable with device memory TCP, and removed some
hacky test code. This is a critical dependency of device memory TCP
and thus I'm pulling it into this series to make it revewable and
mergable.

RFC v3 -> v1
- Removed unusued mem_provider. (Yunsheng).
- Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub).

---
 include/net/page_pool/types.h | 12 ++++++++++
 net/core/page_pool.c          | 43 +++++++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 5 deletions(-)
  

Comments

Ilias Apalodimas Dec. 12, 2023, 8:07 a.m. UTC | #1
Hi Mina,

Apologies for not participating in the party earlier.

On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@google.com> wrote:
>
> From: Jakub Kicinski <kuba@kernel.org>
>
> The page providers which try to reuse the same pages will
> need to hold onto the ref, even if page gets released from
> the pool - as in releasing the page from the pp just transfers
> the "ownership" reference from pp to the provider, and provider
> will wait for other references to be gone before feeding this
> page back into the pool.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
>
> ---
>
> This is implemented by Jakub in his RFC:
> https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com/T/
>
> I take no credit for the idea or implementation; I only added minor
> edits to make this workable with device memory TCP, and removed some
> hacky test code. This is a critical dependency of device memory TCP
> and thus I'm pulling it into this series to make it revewable and
> mergable.
>
> RFC v3 -> v1
> - Removed unusued mem_provider. (Yunsheng).
> - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub).
>
> ---
>  include/net/page_pool/types.h | 12 ++++++++++
>  net/core/page_pool.c          | 43 +++++++++++++++++++++++++++++++----
>  2 files changed, 50 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index ac286ea8ce2d..0e9fa79a5ef1 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -51,6 +51,7 @@ struct pp_alloc_cache {
>   * @dev:       device, for DMA pre-mapping purposes
>   * @netdev:    netdev this pool will serve (leave as NULL if none or multiple)
>   * @napi:      NAPI which is the sole consumer of pages, otherwise NULL
> + * @queue:     struct netdev_rx_queue this page_pool is being created for.
>   * @dma_dir:   DMA mapping direction
>   * @max_len:   max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
>   * @offset:    DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
> @@ -63,6 +64,7 @@ struct page_pool_params {
>                 int             nid;
>                 struct device   *dev;
>                 struct napi_struct *napi;
> +               struct netdev_rx_queue *queue;
>                 enum dma_data_direction dma_dir;
>                 unsigned int    max_len;
>                 unsigned int    offset;
> @@ -125,6 +127,13 @@ struct page_pool_stats {
>  };
>  #endif
>
> +struct memory_provider_ops {
> +       int (*init)(struct page_pool *pool);
> +       void (*destroy)(struct page_pool *pool);
> +       struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
> +       bool (*release_page)(struct page_pool *pool, struct page *page);
> +};
> +
>  struct page_pool {
>         struct page_pool_params_fast p;
>
> @@ -174,6 +183,9 @@ struct page_pool {
>          */
>         struct ptr_ring ring;
>
> +       void *mp_priv;
> +       const struct memory_provider_ops *mp_ops;
> +
>  #ifdef CONFIG_PAGE_POOL_STATS
>         /* recycle stats are per-cpu to avoid locking */
>         struct page_pool_recycle_stats __percpu *recycle_stats;
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ca1b3b65c9b5..f5c84d2a4510 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -25,6 +25,8 @@
>
>  #include "page_pool_priv.h"
>
> +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);

We could add the existing page pool mechanisms as another 'provider',
but I assume this is coded like this for performance reasons (IOW skip
the expensive ptr call for the default case?)

> +
>  #define DEFER_TIME (msecs_to_jiffies(1000))
>  #define DEFER_WARN_INTERVAL (60 * HZ)
>
> @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool,
>                           const struct page_pool_params *params)
>  {
>         unsigned int ring_qsize = 1024; /* Default */
> +       int err;
>
>         memcpy(&pool->p, &params->fast, sizeof(pool->p));
>         memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
> @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool,
>         /* Driver calling page_pool_create() also call page_pool_destroy() */
>         refcount_set(&pool->user_cnt, 1);
>
> +       if (pool->mp_ops) {
> +               err = pool->mp_ops->init(pool);
> +               if (err) {
> +                       pr_warn("%s() mem-provider init failed %d\n",
> +                               __func__, err);
> +                       goto free_ptr_ring;
> +               }
> +
> +               static_branch_inc(&page_pool_mem_providers);
> +       }
> +
>         if (pool->p.flags & PP_FLAG_DMA_MAP)
>                 get_device(pool->p.dev);
>
>         return 0;
> +
> +free_ptr_ring:
> +       ptr_ring_cleanup(&pool->ring, NULL);
> +       return err;
>  }
>
>  static void page_pool_uninit(struct page_pool *pool)
> @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
>                 return page;
>
>         /* Slow-path: cache empty, do real allocation */
> -       page = __page_pool_alloc_pages_slow(pool, gfp);
> +       if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)

Why do we need && pool->mp_ops? On the init function, we only bump
page_pool_mem_providers if the ops are there

> +               page = pool->mp_ops->alloc_pages(pool, gfp);
> +       else
> +               page = __page_pool_alloc_pages_slow(pool, gfp);
>         return page;
>  }
>  EXPORT_SYMBOL(page_pool_alloc_pages);
> @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
>  void page_pool_return_page(struct page_pool *pool, struct page *page)
>  {
>         int count;
> +       bool put;
>
> -       __page_pool_release_page_dma(pool, page);
> -
> -       page_pool_clear_pp_info(page);
> +       put = true;
> +       if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)

ditto

> +               put = pool->mp_ops->release_page(pool, page);
> +       else
> +               __page_pool_release_page_dma(pool, page);
>
>         /* This may be the last page returned, releasing the pool, so
>          * it is not safe to reference pool afterwards.
> @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
>         count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
>         trace_page_pool_state_release(pool, page, count);
>
> -       put_page(page);
> +       if (put) {
> +               page_pool_clear_pp_info(page);
> +               put_page(page);
> +       }
>         /* An optimization would be to call __free_pages(page, pool->p.order)
>          * knowing page is not part of page-cache (thus avoiding a
>          * __page_cache_release() call).
> @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool)
>
>         page_pool_unlist(pool);
>         page_pool_uninit(pool);
> +
> +       if (pool->mp_ops) {

Same here. Using a mix of pool->mp_ops and page_pool_mem_providers
will work, but since we always check the ptr on init, can't we simply
rely on page_pool_mem_providers for the rest of the code?

Thanks
/Ilias
> +               pool->mp_ops->destroy(pool);
> +               static_branch_dec(&page_pool_mem_providers);
> +       }
> +
>         kfree(pool);
>  }
>
> --
> 2.43.0.472.g3155946c3a-goog
>
  
Mina Almasry Dec. 12, 2023, 2:47 p.m. UTC | #2
On Tue, Dec 12, 2023 at 12:07 AM Ilias Apalodimas
<ilias.apalodimas@linaro.org> wrote:
>
> Hi Mina,
>
> Apologies for not participating in the party earlier.
>

No worries, thanks for looking.

> On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@google.com> wrote:
> >
> > From: Jakub Kicinski <kuba@kernel.org>
> >
> > The page providers which try to reuse the same pages will
> > need to hold onto the ref, even if page gets released from
> > the pool - as in releasing the page from the pp just transfers
> > the "ownership" reference from pp to the provider, and provider
> > will wait for other references to be gone before feeding this
> > page back into the pool.
> >
> > Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> > Signed-off-by: Mina Almasry <almasrymina@google.com>
> >
> > ---
> >
> > This is implemented by Jakub in his RFC:
> > https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com/T/
> >
> > I take no credit for the idea or implementation; I only added minor
> > edits to make this workable with device memory TCP, and removed some
> > hacky test code. This is a critical dependency of device memory TCP
> > and thus I'm pulling it into this series to make it revewable and
> > mergable.
> >
> > RFC v3 -> v1
> > - Removed unusued mem_provider. (Yunsheng).
> > - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub).
> >
> > ---
> >  include/net/page_pool/types.h | 12 ++++++++++
> >  net/core/page_pool.c          | 43 +++++++++++++++++++++++++++++++----
> >  2 files changed, 50 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> > index ac286ea8ce2d..0e9fa79a5ef1 100644
> > --- a/include/net/page_pool/types.h
> > +++ b/include/net/page_pool/types.h
> > @@ -51,6 +51,7 @@ struct pp_alloc_cache {
> >   * @dev:       device, for DMA pre-mapping purposes
> >   * @netdev:    netdev this pool will serve (leave as NULL if none or multiple)
> >   * @napi:      NAPI which is the sole consumer of pages, otherwise NULL
> > + * @queue:     struct netdev_rx_queue this page_pool is being created for.
> >   * @dma_dir:   DMA mapping direction
> >   * @max_len:   max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
> >   * @offset:    DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
> > @@ -63,6 +64,7 @@ struct page_pool_params {
> >                 int             nid;
> >                 struct device   *dev;
> >                 struct napi_struct *napi;
> > +               struct netdev_rx_queue *queue;
> >                 enum dma_data_direction dma_dir;
> >                 unsigned int    max_len;
> >                 unsigned int    offset;
> > @@ -125,6 +127,13 @@ struct page_pool_stats {
> >  };
> >  #endif
> >
> > +struct memory_provider_ops {
> > +       int (*init)(struct page_pool *pool);
> > +       void (*destroy)(struct page_pool *pool);
> > +       struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
> > +       bool (*release_page)(struct page_pool *pool, struct page *page);
> > +};
> > +
> >  struct page_pool {
> >         struct page_pool_params_fast p;
> >
> > @@ -174,6 +183,9 @@ struct page_pool {
> >          */
> >         struct ptr_ring ring;
> >
> > +       void *mp_priv;
> > +       const struct memory_provider_ops *mp_ops;
> > +
> >  #ifdef CONFIG_PAGE_POOL_STATS
> >         /* recycle stats are per-cpu to avoid locking */
> >         struct page_pool_recycle_stats __percpu *recycle_stats;
> > diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> > index ca1b3b65c9b5..f5c84d2a4510 100644
> > --- a/net/core/page_pool.c
> > +++ b/net/core/page_pool.c
> > @@ -25,6 +25,8 @@
> >
> >  #include "page_pool_priv.h"
> >
> > +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
>
> We could add the existing page pool mechanisms as another 'provider',
> but I assume this is coded like this for performance reasons (IOW skip
> the expensive ptr call for the default case?)
>

Correct, it's done like this for performance reasons.

> > +
> >  #define DEFER_TIME (msecs_to_jiffies(1000))
> >  #define DEFER_WARN_INTERVAL (60 * HZ)
> >
> > @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool,
> >                           const struct page_pool_params *params)
> >  {
> >         unsigned int ring_qsize = 1024; /* Default */
> > +       int err;
> >
> >         memcpy(&pool->p, &params->fast, sizeof(pool->p));
> >         memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
> > @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool,
> >         /* Driver calling page_pool_create() also call page_pool_destroy() */
> >         refcount_set(&pool->user_cnt, 1);
> >
> > +       if (pool->mp_ops) {
> > +               err = pool->mp_ops->init(pool);
> > +               if (err) {
> > +                       pr_warn("%s() mem-provider init failed %d\n",
> > +                               __func__, err);
> > +                       goto free_ptr_ring;
> > +               }
> > +
> > +               static_branch_inc(&page_pool_mem_providers);
> > +       }
> > +
> >         if (pool->p.flags & PP_FLAG_DMA_MAP)
> >                 get_device(pool->p.dev);
> >
> >         return 0;
> > +
> > +free_ptr_ring:
> > +       ptr_ring_cleanup(&pool->ring, NULL);
> > +       return err;
> >  }
> >
> >  static void page_pool_uninit(struct page_pool *pool)
> > @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
> >                 return page;
> >
> >         /* Slow-path: cache empty, do real allocation */
> > -       page = __page_pool_alloc_pages_slow(pool, gfp);
> > +       if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
>
> Why do we need && pool->mp_ops? On the init function, we only bump
> page_pool_mem_providers if the ops are there
>

Note that page_pool_mem_providers is a static variable (not part of
the page_pool struct), so if you have 2 page_pools on the system, one
using devmem and one not, we need to check pool->mp_ops to make sure
this page_pool is using a memory provider.

> > +               page = pool->mp_ops->alloc_pages(pool, gfp);
> > +       else
> > +               page = __page_pool_alloc_pages_slow(pool, gfp);
> >         return page;
> >  }
> >  EXPORT_SYMBOL(page_pool_alloc_pages);
> > @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
> >  void page_pool_return_page(struct page_pool *pool, struct page *page)
> >  {
> >         int count;
> > +       bool put;
> >
> > -       __page_pool_release_page_dma(pool, page);
> > -
> > -       page_pool_clear_pp_info(page);
> > +       put = true;
> > +       if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
>
> ditto
>
> > +               put = pool->mp_ops->release_page(pool, page);
> > +       else
> > +               __page_pool_release_page_dma(pool, page);
> >
> >         /* This may be the last page returned, releasing the pool, so
> >          * it is not safe to reference pool afterwards.
> > @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
> >         count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
> >         trace_page_pool_state_release(pool, page, count);
> >
> > -       put_page(page);
> > +       if (put) {
> > +               page_pool_clear_pp_info(page);
> > +               put_page(page);
> > +       }
> >         /* An optimization would be to call __free_pages(page, pool->p.order)
> >          * knowing page is not part of page-cache (thus avoiding a
> >          * __page_cache_release() call).
> > @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool)
> >
> >         page_pool_unlist(pool);
> >         page_pool_uninit(pool);
> > +
> > +       if (pool->mp_ops) {
>
> Same here. Using a mix of pool->mp_ops and page_pool_mem_providers
> will work, but since we always check the ptr on init, can't we simply
> rely on page_pool_mem_providers for the rest of the code?
>
> Thanks
> /Ilias
> > +               pool->mp_ops->destroy(pool);
> > +               static_branch_dec(&page_pool_mem_providers);
> > +       }
> > +
> >         kfree(pool);
> >  }
> >
> > --
> > 2.43.0.472.g3155946c3a-goog
> >



--
Thanks,
Mina
  

Patch

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index ac286ea8ce2d..0e9fa79a5ef1 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -51,6 +51,7 @@  struct pp_alloc_cache {
  * @dev:	device, for DMA pre-mapping purposes
  * @netdev:	netdev this pool will serve (leave as NULL if none or multiple)
  * @napi:	NAPI which is the sole consumer of pages, otherwise NULL
+ * @queue:	struct netdev_rx_queue this page_pool is being created for.
  * @dma_dir:	DMA mapping direction
  * @max_len:	max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
  * @offset:	DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
@@ -63,6 +64,7 @@  struct page_pool_params {
 		int		nid;
 		struct device	*dev;
 		struct napi_struct *napi;
+		struct netdev_rx_queue *queue;
 		enum dma_data_direction dma_dir;
 		unsigned int	max_len;
 		unsigned int	offset;
@@ -125,6 +127,13 @@  struct page_pool_stats {
 };
 #endif
 
+struct memory_provider_ops {
+	int (*init)(struct page_pool *pool);
+	void (*destroy)(struct page_pool *pool);
+	struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
+	bool (*release_page)(struct page_pool *pool, struct page *page);
+};
+
 struct page_pool {
 	struct page_pool_params_fast p;
 
@@ -174,6 +183,9 @@  struct page_pool {
 	 */
 	struct ptr_ring ring;
 
+	void *mp_priv;
+	const struct memory_provider_ops *mp_ops;
+
 #ifdef CONFIG_PAGE_POOL_STATS
 	/* recycle stats are per-cpu to avoid locking */
 	struct page_pool_recycle_stats __percpu *recycle_stats;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ca1b3b65c9b5..f5c84d2a4510 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -25,6 +25,8 @@ 
 
 #include "page_pool_priv.h"
 
+static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
@@ -174,6 +176,7 @@  static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
 {
 	unsigned int ring_qsize = 1024; /* Default */
+	int err;
 
 	memcpy(&pool->p, &params->fast, sizeof(pool->p));
 	memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
@@ -234,10 +237,25 @@  static int page_pool_init(struct page_pool *pool,
 	/* Driver calling page_pool_create() also call page_pool_destroy() */
 	refcount_set(&pool->user_cnt, 1);
 
+	if (pool->mp_ops) {
+		err = pool->mp_ops->init(pool);
+		if (err) {
+			pr_warn("%s() mem-provider init failed %d\n",
+				__func__, err);
+			goto free_ptr_ring;
+		}
+
+		static_branch_inc(&page_pool_mem_providers);
+	}
+
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
 		get_device(pool->p.dev);
 
 	return 0;
+
+free_ptr_ring:
+	ptr_ring_cleanup(&pool->ring, NULL);
+	return err;
 }
 
 static void page_pool_uninit(struct page_pool *pool)
@@ -519,7 +537,10 @@  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
 		return page;
 
 	/* Slow-path: cache empty, do real allocation */
-	page = __page_pool_alloc_pages_slow(pool, gfp);
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		page = pool->mp_ops->alloc_pages(pool, gfp);
+	else
+		page = __page_pool_alloc_pages_slow(pool, gfp);
 	return page;
 }
 EXPORT_SYMBOL(page_pool_alloc_pages);
@@ -576,10 +597,13 @@  void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
 void page_pool_return_page(struct page_pool *pool, struct page *page)
 {
 	int count;
+	bool put;
 
-	__page_pool_release_page_dma(pool, page);
-
-	page_pool_clear_pp_info(page);
+	put = true;
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		put = pool->mp_ops->release_page(pool, page);
+	else
+		__page_pool_release_page_dma(pool, page);
 
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
@@ -587,7 +611,10 @@  void page_pool_return_page(struct page_pool *pool, struct page *page)
 	count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
 	trace_page_pool_state_release(pool, page, count);
 
-	put_page(page);
+	if (put) {
+		page_pool_clear_pp_info(page);
+		put_page(page);
+	}
 	/* An optimization would be to call __free_pages(page, pool->p.order)
 	 * knowing page is not part of page-cache (thus avoiding a
 	 * __page_cache_release() call).
@@ -857,6 +884,12 @@  static void __page_pool_destroy(struct page_pool *pool)
 
 	page_pool_unlist(pool);
 	page_pool_uninit(pool);
+
+	if (pool->mp_ops) {
+		pool->mp_ops->destroy(pool);
+		static_branch_dec(&page_pool_mem_providers);
+	}
+
 	kfree(pool);
 }