public inbox for [email protected]
 help / color / mirror / Atom feed
From: David Wei <[email protected]>
To: Jens Axboe <[email protected]>, Pavel Begunkov <[email protected]>
Cc: [email protected], [email protected],
	Mina Almasry <[email protected]>,
	Jakub Kicinski <[email protected]>
Subject: [PATCH 10/11] netdev/bnxt: add data pool and use it in BNXT driver
Date: Fri, 25 Aug 2023 18:19:53 -0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

From: David Wei <[email protected]>

This patch adds a thin wrapper called data pool that wraps the existing
page pool and the newly added ZC pool.

There is one struct netdev_rx_queue per logical RX queue. This patch
adds ZC ifq, page pool, and uarg (set during skb construction) to a
netdev_rx_queue. The data pool wrapper uses the ZC pool if an ifq is
present, otherwise using the page pool.

The BNXT driver is modified to use data pool in order to support ZC RX.
A setup function bnxt_zc_rx is added that is called on XDP_SETUP_ZC_RX
XDP command which sets fields in netdev_rx_queue. Calls to get/put bufs
from the page pool are related w/ the data pool.

Signed-off-by: David Wei <[email protected]>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 59 ++++++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  4 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |  3 +
 include/linux/netdevice.h                     |  4 +
 include/net/data_pool.h                       | 96 +++++++++++++++++++
 5 files changed, 149 insertions(+), 17 deletions(-)
 create mode 100644 include/net/data_pool.h

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 48f584f78561..5c1dabaf07f9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -57,6 +57,7 @@
 #include <net/page_pool.h>
 #include <linux/align.h>
 #include <net/netdev_queues.h>
+#include <net/data_pool.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -724,7 +725,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 
 	__netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
 				   bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
-				   READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
+				   READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING);
 }
 
 static struct page *__bnxt_alloc_rx_64k_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -738,13 +739,7 @@ static struct page *__bnxt_alloc_rx_64k_page(struct bnxt *bp, dma_addr_t *mappin
 	if (!page)
 		return NULL;
 
-	*mapping = dma_map_page_attrs(&bp->pdev->dev, page, offset,
-				      BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
-				      DMA_ATTR_WEAK_ORDERING);
-	if (dma_mapping_error(&bp->pdev->dev, *mapping)) {
-		page_pool_recycle_direct(rxr->page_pool, page);
-		return NULL;
-	}
+	*mapping = page_pool_get_dma_addr(page);
 
 	if (page_offset)
 		*page_offset = offset;
@@ -757,19 +752,14 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
 					 struct bnxt_rx_ring_info *rxr,
 					 gfp_t gfp)
 {
-	struct device *dev = &bp->pdev->dev;
 	struct page *page;
 
-	page = page_pool_dev_alloc_pages(rxr->page_pool);
+	page = data_pool_alloc_page(rxr->rx_queue);
 	if (!page)
 		return NULL;
 
-	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
-				      DMA_ATTR_WEAK_ORDERING);
-	if (dma_mapping_error(dev, *mapping)) {
-		page_pool_recycle_direct(rxr->page_pool, page);
-		return NULL;
-	}
+	*mapping = data_pool_get_dma_addr(rxr->rx_queue, page);
+
 	return page;
 }
 
@@ -1787,6 +1777,8 @@ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
 		return;
 	}
 	skb_record_rx_queue(skb, bnapi->index);
+	if (bnapi->rx_ring->rx_queue->zc_ifq)
+		skb_zcopy_init(skb, bnapi->rx_ring->rx_queue->zc_uarg);
 	skb_mark_for_recycle(skb);
 	napi_gro_receive(&bnapi->napi, skb);
 }
@@ -3016,7 +3008,7 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
 		rx_agg_buf->page = NULL;
 		__clear_bit(i, rxr->rx_agg_bmap);
 
-		page_pool_recycle_direct(rxr->page_pool, page);
+		data_pool_put_page(rxr->rx_queue, page);
 	}
 
 skip_rx_agg_free:
@@ -3225,6 +3217,8 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 
 		page_pool_destroy(rxr->page_pool);
 		rxr->page_pool = NULL;
+		rxr->rx_queue->page_pool = NULL;
+		rxr->rx_queue->zc_ifq = NULL;
 
 		kfree(rxr->rx_agg_bmap);
 		rxr->rx_agg_bmap = NULL;
@@ -3251,12 +3245,16 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
 	pp.dma_dir = DMA_BIDIRECTIONAL;
 	if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
 		pp.flags |= PP_FLAG_PAGE_FRAG;
+	pp.flags |= PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp.max_len = PAGE_SIZE;
 
 	rxr->page_pool = page_pool_create(&pp);
+	data_pool_set_page_pool(bp->dev, rxr->bnapi->index, rxr->page_pool);
 	if (IS_ERR(rxr->page_pool)) {
 		int err = PTR_ERR(rxr->page_pool);
 
 		rxr->page_pool = NULL;
+		data_pool_set_page_pool(bp->dev, rxr->bnapi->index, NULL);
 		return err;
 	}
 	return 0;
@@ -4620,6 +4618,8 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 				rxr->rx_agg_ring_struct.ring_mem.flags =
 					BNXT_RMEM_RING_PTE_FLAG;
 			}
+
+			rxr->rx_queue = data_pool_get_rx_queue(bp->dev, bp->bnapi[i]->index);
 			rxr->bnapi = bp->bnapi[i];
 			bp->bnapi[i]->rx_ring = &bp->rx_ring[i];
 		}
@@ -13904,6 +13904,31 @@ void bnxt_print_device_info(struct bnxt *bp)
 	pcie_print_link_status(bp->pdev);
 }
 
+int bnxt_zc_rx(struct bnxt *bp, struct netdev_bpf *xdp)
+{
+	if (xdp->zc_rx.queue_id >= bp->rx_nr_rings)
+		return -EINVAL;
+
+	bnxt_rtnl_lock_sp(bp);
+	if (netif_running(bp->dev)) {
+		struct netdev_rx_queue *rxq;
+		int rc;
+
+		bnxt_ulp_stop(bp);
+		bnxt_close_nic(bp, true, false);
+
+		rxq = data_pool_get_rx_queue(bp->dev, xdp->zc_rx.queue_id);
+		rxq->queue_id = xdp->zc_rx.queue_id;
+		rxq->zc_ifq = xdp->zc_rx.ifq;
+		rxq->zc_uarg = xdp->zc_rx.uarg;
+
+		rc = bnxt_open_nic(bp, true, false);
+		bnxt_ulp_start(bp, rc);
+	}
+	bnxt_rtnl_unlock_sp(bp);
+	return 0;
+}
+
 static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *dev;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9fd85ebd8ae8..554c0abc0d44 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -949,6 +949,7 @@ struct bnxt_rx_ring_info {
 	struct bnxt_ring_struct	rx_agg_ring_struct;
 	struct xdp_rxq_info	xdp_rxq;
 	struct page_pool	*page_pool;
+	struct netdev_rx_queue	*rx_queue;
 };
 
 struct bnxt_rx_sw_stats {
@@ -2454,4 +2455,7 @@ int bnxt_get_port_parent_id(struct net_device *dev,
 void bnxt_dim_work(struct work_struct *work);
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi);
 void bnxt_print_device_info(struct bnxt *bp);
+
+int bnxt_zc_rx(struct bnxt *bp, struct netdev_bpf *xdp);
+
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 4efa5fe6972b..1387f0e1fff5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -455,6 +455,9 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		rc = bnxt_xdp_set(bp, xdp->prog);
 		break;
+	case XDP_SETUP_ZC_RX:
+		return bnxt_zc_rx(bp, xdp);
+		break;
 	default:
 		rc = -EINVAL;
 		break;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bf133cbee721..994237e92cbc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -789,6 +789,10 @@ struct netdev_rx_queue {
 	struct kobject			kobj;
 	struct net_device		*dev;
 	netdevice_tracker		dev_tracker;
+	unsigned int			queue_id;
+	struct page_pool		*page_pool;
+	struct io_zc_rx_ifq		*zc_ifq;
+	struct ubuf_info		*zc_uarg;
 
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
diff --git a/include/net/data_pool.h b/include/net/data_pool.h
new file mode 100644
index 000000000000..84c96aa1c542
--- /dev/null
+++ b/include/net/data_pool.h
@@ -0,0 +1,96 @@
+#ifndef _DATA_POOL_H
+#define _DATA_POOL_H
+
+#include <linux/io_uring.h>
+#include <linux/io_uring_types.h>
+#include <linux/mm_types.h>
+#include <linux/netdevice.h>
+
+static inline struct netdev_rx_queue *
+data_pool_get_rx_queue(struct net_device *dev, unsigned int q_idx)
+{
+	if (q_idx >= dev->num_rx_queues)
+		return NULL;
+	return __netif_get_rx_queue(dev, q_idx);
+}
+
+static inline int data_pool_set_page_pool(struct net_device *dev,
+					  unsigned int q_idx,
+					  struct page_pool *pool)
+{
+	struct netdev_rx_queue *rxq;
+
+	rxq = data_pool_get_rx_queue(dev, q_idx);
+	if (!rxq)
+		return -EINVAL;
+
+	rxq->page_pool = pool;
+	return 0;
+}
+
+static inline int data_pool_set_zc_ifq(struct net_device *dev,
+					unsigned int q_idx,
+					struct io_zc_rx_ifq *ifq)
+{
+	struct netdev_rx_queue *rxq;
+
+	rxq = data_pool_get_rx_queue(dev, q_idx);
+	if (!rxq)
+		return -EINVAL;
+
+	rxq->zc_ifq = ifq;
+	return 0;
+}
+
+static inline struct page *data_pool_alloc_page(struct netdev_rx_queue *rxq)
+{
+	if (rxq->zc_ifq) {
+		struct io_zc_rx_buf *buf;
+		buf = io_zc_rx_get_buf(rxq->zc_ifq);
+		if (!buf)
+			return NULL;
+		return buf->page;
+	} else {
+		return page_pool_dev_alloc_pages(rxq->page_pool);
+	}
+}
+
+static inline void data_pool_fragment_page(struct netdev_rx_queue *rxq,
+					   struct page *page,
+					   unsigned long bias)
+{
+	if (rxq->zc_ifq) {
+		struct io_zc_rx_buf *buf;
+		buf = io_zc_rx_buf_from_page(rxq->zc_ifq, page);
+		atomic_set(&buf->refcount, bias);
+	} else {
+		page_pool_fragment_page(page, bias);
+	}
+}
+
+static inline void data_pool_put_page(struct netdev_rx_queue *rxq,
+				      struct page *page)
+{
+	if (rxq->zc_ifq) {
+		struct io_zc_rx_buf *buf;
+		buf = io_zc_rx_buf_from_page(rxq->zc_ifq, page);
+		io_zc_rx_put_buf(rxq->zc_ifq, buf);
+	} else {
+		WARN_ON_ONCE(page->pp_magic != PP_SIGNATURE);
+		page_pool_recycle_direct(rxq->page_pool, page);
+	}
+}
+
+static inline dma_addr_t data_pool_get_dma_addr(struct netdev_rx_queue *rxq,
+						struct page *page)
+{
+	if (rxq->zc_ifq) {
+		struct io_zc_rx_buf *buf;
+		buf = io_zc_rx_buf_from_page(rxq->zc_ifq, page);
+		return io_zc_rx_buf_dma(buf);
+	} else {
+		return page_pool_get_dma_addr(page);
+	}
+}
+
+#endif
-- 
2.39.3


  parent reply	other threads:[~2023-08-26  1:22 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-26  1:19 [RFC RESEND 00/11] Zero copy network RX using io_uring David Wei
2023-08-26  1:19 ` [PATCH 01/11] io_uring: add interface queue David Wei
2023-08-26  1:19 ` [PATCH 02/11] io_uring: add mmap support for shared ifq ringbuffers David Wei
2023-08-26  1:19 ` [PATCH 03/11] netdev: add XDP_SETUP_ZC_RX command David Wei
2023-08-26  2:21   ` David Ahern
2023-08-26 21:37     ` David Wei
2023-08-26  1:19 ` [PATCH 04/11] io_uring: setup ZC for an RX queue when registering an ifq David Wei
2023-08-26  2:26   ` David Ahern
2023-08-26 22:00     ` David Wei
2023-08-26  1:19 ` [PATCH 05/11] io_uring: add ZC buf and pool David Wei
2023-08-26  1:19 ` [PATCH 06/11] io_uring: add ZC pool API David Wei
2023-08-26  1:19 ` [PATCH 07/11] skbuff: add SKBFL_FIXED_FRAG and skb_fixed() David Wei
2023-08-26  1:19 ` [PATCH 08/11] io_uring: allocate a uarg for freeing zero copy skbs David Wei
2023-08-26  1:19 ` [PATCH 09/11] io_uring: delay ZC pool destruction David Wei
2023-08-26  1:19 ` David Wei [this message]
2023-08-26  1:19 ` [PATCH 11/11] io_uring: add io_recvzc request David Wei
2023-10-22 19:06 ` [RFC RESEND 00/11] Zero copy network RX using io_uring Gal Pressman
2023-10-23  3:35   ` David Wei
  -- strict thread matches above, loose matches on Subject: below --
2023-08-25 22:55 [RFC PATCH " David Wei
2023-08-25 22:55 ` [PATCH 10/11] netdev/bnxt: add data pool and use it in BNXT driver David Wei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox