[RFC,02/10] dma-buf: add support for NET_RX pages

Message ID 20230710223304.1174642-3-almasrymina@google.com
State New
Headers
Series Device Memory TCP |

Commit Message

Mina Almasry July 10, 2023, 10:32 p.m. UTC
  Use the paged attachment mappings support to create NET_RX pages.
NET_RX pages are pages that can be used in the networking receive path:

Bind the pages to the driver's rx queues specified by the create_flags
param, and create a gen_pool to hold the free pages available for the
driver to allocate.

Signed-off-by: Mina Almasry <almasrymina@google.com>
---
 drivers/dma-buf/dma-buf.c    | 174 +++++++++++++++++++++++++++++++++++
 include/linux/dma-buf.h      |  20 ++++
 include/linux/netdevice.h    |   1 +
 include/uapi/linux/dma-buf.h |   2 +
 4 files changed, 197 insertions(+)
  

Patch

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 50b1d813cf5c..acb86bf406f4 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -27,6 +27,7 @@ 
 #include <linux/dma-resv.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/netdevice.h>
 #include <linux/pseudo_fs.h>
 
 #include <uapi/linux/dma-buf.h>
@@ -1681,6 +1682,8 @@  static void dma_buf_pages_destroy(struct percpu_ref *ref)
 	pci_dev_put(priv->pci_dev);
 }
 
+const struct dma_buf_pages_type_ops net_rx_ops;
+
 static long dma_buf_create_pages(struct file *file,
 				 struct dma_buf_create_pages_info *create_info)
 {
@@ -1793,6 +1796,9 @@  static long dma_buf_create_pages(struct file *file,
 	priv->create_flags = create_info->create_flags;
 
 	switch (priv->type) {
+	case DMA_BUF_PAGES_NET_RX:
+		priv->type_ops = &net_rx_ops;
+		break;
 	default:
 		err = -EINVAL;
 		goto out_put_new_file;
@@ -1966,3 +1972,171 @@  static void __exit dma_buf_deinit(void)
 	dma_buf_uninit_sysfs_statistics();
 }
 __exitcall(dma_buf_deinit);
+
+/********************************
+ *	dma_buf_pages_net_rx	*
+ ********************************/
+
+void dma_buf_pages_net_rx_release(struct dma_buf_pages *priv, struct file *file)
+{
+	struct netdev_rx_queue *rxq;
+	unsigned long xa_idx;
+
+	xa_for_each(&priv->net_rx.bound_rxq_list, xa_idx, rxq)
+		if (rxq->dmabuf_pages == file)
+			rxq->dmabuf_pages = NULL;
+}
+
+static int dev_is_class(struct device *dev, void *class)
+{
+	if (dev->class != NULL && !strcmp(dev->class->name, class))
+		return 1;
+
+	return 0;
+}
+
+int dma_buf_pages_net_rx_init(struct dma_buf_pages *priv, struct file *file)
+{
+	struct netdev_rx_queue *rxq;
+	struct net_device *netdev;
+	int xa_id, err, rxq_idx;
+	struct device *device;
+
+	priv->net_rx.page_pool =
+		gen_pool_create(PAGE_SHIFT, dev_to_node(&priv->pci_dev->dev));
+
+	if (!priv->net_rx.page_pool)
+		return -ENOMEM;
+
+	/*
+	 * We start with PAGE_SIZE instead of 0 since gen_pool_alloc_*() returns
+	 * NULL on error
+	 */
+	err = gen_pool_add_virt(priv->net_rx.page_pool, PAGE_SIZE, 0,
+				PAGE_SIZE * priv->num_pages,
+				dev_to_node(&priv->pci_dev->dev));
+	if (err)
+		goto out_destroy_pool;
+
+	xa_init_flags(&priv->net_rx.bound_rxq_list, XA_FLAGS_ALLOC);
+
+	device = device_find_child(&priv->pci_dev->dev, "net", dev_is_class);
+	if (!device) {
+		err = -ENODEV;
+		goto out_destroy_xarray;
+	}
+
+	netdev = to_net_dev(device);
+	if (!netdev) {
+		err = -ENODEV;
+		goto out_put_dev;
+	}
+
+	for (rxq_idx = 0; rxq_idx < (sizeof(priv->create_flags) * 8);
+	     rxq_idx++) {
+		if (!(priv->create_flags & (1ULL << rxq_idx)))
+			continue;
+
+		if (rxq_idx >= netdev->num_rx_queues) {
+			err = -ERANGE;
+			goto out_release_rx;
+		}
+
+		rxq = __netif_get_rx_queue(netdev, rxq_idx);
+
+		err = xa_alloc(&priv->net_rx.bound_rxq_list, &xa_id, rxq,
+			       xa_limit_32b, GFP_KERNEL);
+		if (err)
+			goto out_release_rx;
+
+		/* We previously have done a dma_buf_attach(), which validates
+		 * that the net_device we're trying to attach to can reach the
+		 * dmabuf, so we don't need to check here as well.
+		 */
+		rxq->dmabuf_pages = file;
+	}
+	put_device(device);
+	return 0;
+
+out_release_rx:
+	dma_buf_pages_net_rx_release(priv, file);
+out_put_dev:
+	put_device(device);
+out_destroy_xarray:
+	xa_destroy(&priv->net_rx.bound_rxq_list);
+out_destroy_pool:
+	gen_pool_destroy(priv->net_rx.page_pool);
+	return err;
+}
+
+void dma_buf_pages_net_rx_free(struct dma_buf_pages *priv)
+{
+	xa_destroy(&priv->net_rx.bound_rxq_list);
+	gen_pool_destroy(priv->net_rx.page_pool);
+}
+
+static unsigned long dma_buf_page_to_gen_pool_addr(struct page *page)
+{
+	struct dma_buf_pages *priv;
+	struct dev_pagemap *pgmap;
+	unsigned long offset;
+
+	pgmap = page->pgmap;
+	priv = container_of(pgmap, struct dma_buf_pages, pgmap);
+	offset = page - priv->pages;
+	/* Offset + 1 is due to the fact that we want to avoid 0 virt address
+	 * returned from the gen_pool. The gen_pool returns 0 on error, and virt
+	 * address 0 is indistinguishable from an error.
+	 */
+	return (offset + 1) << PAGE_SHIFT;
+}
+
+static struct page *
+dma_buf_gen_pool_addr_to_page(unsigned long addr, struct dma_buf_pages *priv)
+{
+	/* - 1 is due to the fact that we want to avoid 0 virt address
+	 * returned from the gen_pool. See comment in dma_buf_create_pages()
+	 * for details.
+	 */
+	unsigned long offset = (addr >> PAGE_SHIFT) - 1;
+	return &priv->pages[offset];
+}
+
+void dma_buf_page_free_net_rx(struct dma_buf_pages *priv, struct page *page)
+{
+	unsigned long addr = dma_buf_page_to_gen_pool_addr(page);
+
+	if (gen_pool_has_addr(priv->net_rx.page_pool, addr, PAGE_SIZE))
+		gen_pool_free(priv->net_rx.page_pool, addr, PAGE_SIZE);
+}
+
+const struct dma_buf_pages_type_ops net_rx_ops = {
+	.dma_buf_pages_init		= dma_buf_pages_net_rx_init,
+	.dma_buf_pages_release		= dma_buf_pages_net_rx_release,
+	.dma_buf_pages_destroy		= dma_buf_pages_net_rx_free,
+	.dma_buf_page_free		= dma_buf_page_free_net_rx,
+};
+
+struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv)
+{
+	unsigned long gen_pool_addr;
+	struct page *pg;
+
+	if (!(priv->type & DMA_BUF_PAGES_NET_RX))
+		return NULL;
+
+	gen_pool_addr = gen_pool_alloc(priv->net_rx.page_pool, PAGE_SIZE);
+	if (!gen_pool_addr)
+		return NULL;
+
+	if (!PAGE_ALIGNED(gen_pool_addr)) {
+		net_err_ratelimited("dmabuf page pool allocation not aligned");
+		gen_pool_free(priv->net_rx.page_pool, gen_pool_addr, PAGE_SIZE);
+		return NULL;
+	}
+
+	pg = dma_buf_gen_pool_addr_to_page(gen_pool_addr, priv);
+
+	percpu_ref_get(&priv->pgmap.ref);
+	return pg;
+}
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 5789006180ea..e8e66d6407d0 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -22,6 +22,9 @@ 
 #include <linux/fs.h>
 #include <linux/dma-fence.h>
 #include <linux/wait.h>
+#include <linux/genalloc.h>
+#include <linux/xarray.h>
+#include <net/page_pool.h>
 
 struct device;
 struct dma_buf;
@@ -552,6 +555,11 @@  struct dma_buf_pages_type_ops {
 				  struct page *page);
 };
 
+struct dma_buf_pages_net_rx {
+	struct gen_pool *page_pool;
+	struct xarray bound_rxq_list;
+};
+
 struct dma_buf_pages {
 	/* fields for dmabuf */
 	struct dma_buf *dmabuf;
@@ -568,6 +576,10 @@  struct dma_buf_pages {
 	unsigned int type;
 	const struct dma_buf_pages_type_ops *type_ops;
 	__u64 create_flags;
+
+	union {
+		struct dma_buf_pages_net_rx net_rx;
+	};
 };
 
 /**
@@ -671,6 +683,8 @@  static inline bool is_dma_buf_pages_file(struct file *file)
 	return file->f_op == &dma_buf_pages_fops;
 }
 
+struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv);
+
 static inline bool is_dma_buf_page(struct page *page)
 {
 	return (is_zone_device_page(page) && page->pgmap &&
@@ -718,6 +732,12 @@  static inline int dma_buf_map_sg(struct device *dev, struct scatterlist *sg,
 {
 	return 0;
 }
+
+static inline struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv)
+{
+	return NULL;
+}
+
 #endif
 
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2f0c6002a84..7a087ffa9baa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -796,6 +796,7 @@  struct netdev_rx_queue {
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
 #endif
+	struct file __rcu		*dmabuf_pages;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index d0f63a2ab7e4..b392cef9d3c6 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -186,6 +186,8 @@  struct dma_buf_create_pages_info {
 	__u64 create_flags;
 };
 
+#define DMA_BUF_PAGES_NET_RX		(1 << 0)
+
 #define DMA_BUF_CREATE_PAGES	_IOW(DMA_BUF_BASE, 4, struct dma_buf_create_pages_info)
 
 #endif