public inbox for [email protected]
 help / color / mirror / Atom feed
From: David Wei <[email protected]>
To: [email protected], [email protected]
Cc: Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>,
	Jakub Kicinski <[email protected]>, Paolo Abeni <[email protected]>,
	"David S. Miller" <[email protected]>,
	Eric Dumazet <[email protected]>,
	Jesper Dangaard Brouer <[email protected]>,
	David Ahern <[email protected]>,
	Mina Almasry <[email protected]>,
	Stanislav Fomichev <[email protected]>,
	Joe Damato <[email protected]>,
	Pedro Tammela <[email protected]>
Subject: [PATCH net-next v10 20/22] io_uring/zcrx: add copy fallback
Date: Wed,  8 Jan 2025 14:06:41 -0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

From: Pavel Begunkov <[email protected]>

There are scenarios in which the zerocopy path can get a kernel buffer
instead of a net_iov and needs to copy it to the user, whether it is
because of mis-steering or simply getting an skb with the linear part.
In this case, grab a net_iov, copy into it and return it to the user as
normally.

At the moment the user doesn't get any indication whether there was a
copy or not, which is left for follow up work.

Reviewed-by: Jens Axboe <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: David Wei <[email protected]>
---
 io_uring/zcrx.c | 121 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 6 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0c737ab9058d..b5ce336fc78d 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -7,6 +7,7 @@
 #include <linux/io_uring.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/skbuff_ref.h>
 
 #include <net/page_pool/helpers.h>
 #include <net/page_pool/memory_provider.h>
@@ -143,6 +144,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
 	atomic_inc(io_get_user_counter(niov));
 }
 
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+	return area->pages[net_iov_idx(niov)];
+}
+
 static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
 {
 	struct netdev_rx_queue *rxq;
@@ -165,6 +173,7 @@ static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
 	ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq);
 	if (ret)
 		goto fail;
+
 	return 0;
 fail:
 	rxq->mp_params.mp_ops = NULL;
@@ -473,6 +482,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
 {
 	netmem_ref netmem = net_iov_to_netmem(niov);
 
+	if (!niov->pp) {
+		/* copy fallback allocated niovs */
+		io_zcrx_return_niov_freelist(niov);
+		return;
+	}
 	page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
 }
 
@@ -700,13 +714,93 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 	return true;
 }
 
+static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+{
+	struct net_iov *niov = NULL;
+
+	spin_lock_bh(&area->freelist_lock);
+	if (area->free_count)
+		niov = __io_zcrx_get_free_niov(area);
+	spin_unlock_bh(&area->freelist_lock);
+
+	if (niov)
+		page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+	return niov;
+}
+
+static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+				  void *src_base, struct page *src_page,
+				  unsigned int src_offset, size_t len)
+{
+	struct io_zcrx_area *area = ifq->area;
+	size_t copied = 0;
+	int ret = 0;
+
+	while (len) {
+		size_t copy_size = min_t(size_t, PAGE_SIZE, len);
+		const int dst_off = 0;
+		struct net_iov *niov;
+		struct page *dst_page;
+		void *dst_addr;
+
+		niov = io_zcrx_alloc_fallback(area);
+		if (!niov) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		dst_page = io_zcrx_iov_page(niov);
+		dst_addr = kmap_local_page(dst_page);
+		if (src_page)
+			src_base = kmap_local_page(src_page);
+
+		memcpy(dst_addr, src_base + src_offset, copy_size);
+
+		if (src_page)
+			kunmap_local(src_base);
+		kunmap_local(dst_addr);
+
+		if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
+			io_zcrx_return_niov(niov);
+			ret = -ENOSPC;
+			break;
+		}
+
+		io_zcrx_get_niov_uref(niov);
+		src_offset += copy_size;
+		len -= copy_size;
+		copied += copy_size;
+	}
+
+	return copied ? copied : ret;
+}
+
+static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+			     const skb_frag_t *frag, int off, int len)
+{
+	struct page *page = skb_frag_page(frag);
+	u32 p_off, p_len, t, copied = 0;
+	int ret = 0;
+
+	off += skb_frag_off(frag);
+
+	skb_frag_foreach_page(frag, off, len,
+			      page, p_off, p_len, t) {
+		ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
+		if (ret < 0)
+			return copied ? copied : ret;
+		copied += ret;
+	}
+	return copied;
+}
+
 static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 			     const skb_frag_t *frag, int off, int len)
 {
 	struct net_iov *niov;
 
 	if (unlikely(!skb_frag_is_net_iov(frag)))
-		return -EOPNOTSUPP;
+		return io_zcrx_copy_frag(req, ifq, frag, off, len);
 
 	niov = netmem_to_net_iov(frag->netmem);
 	if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -733,18 +827,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
 	struct io_zcrx_ifq *ifq = args->ifq;
 	struct io_kiocb *req = args->req;
 	struct sk_buff *frag_iter;
-	unsigned start, start_off;
+	unsigned start, start_off = offset;
 	int i, copy, end, off;
 	int ret = 0;
 
 	if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
 		return -EAGAIN;
 
-	start = skb_headlen(skb);
-	start_off = offset;
+	if (unlikely(offset < skb_headlen(skb))) {
+		ssize_t copied;
+		size_t to_copy;
 
-	if (offset < start)
-		return -EOPNOTSUPP;
+		to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+		copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
+					    offset, to_copy);
+		if (copied < 0) {
+			ret = copied;
+			goto out;
+		}
+		offset += copied;
+		len -= copied;
+		if (!len)
+			goto out;
+		if (offset != skb_headlen(skb))
+			goto out;
+	}
+
+	start = skb_headlen(skb);
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		const skb_frag_t *frag;
-- 
2.43.5


  parent reply	other threads:[~2025-01-08 22:07 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-08 22:06 [PATCH net-next v10 00/22] io_uring zero copy rx David Wei
2025-01-08 22:06 ` [PATCH net-next v10 01/22] net: make page_pool_ref_netmem work with net iovs David Wei
2025-01-16  0:30   ` Jakub Kicinski
2025-01-16  2:12     ` Pavel Begunkov
2025-01-16  2:48       ` Jakub Kicinski
2025-01-16 16:45         ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 02/22] net: page_pool: don't cast mp param to devmem David Wei
2025-01-08 22:06 ` [PATCH net-next v10 03/22] net: prefix devmem specific helpers David Wei
2025-01-08 22:06 ` [PATCH net-next v10 04/22] net: generalise net_iov chunk owners David Wei
2025-01-16  0:31   ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 05/22] net: page pool: export page_pool_set_dma_addr_netmem() David Wei
2025-01-16  0:35   ` Jakub Kicinski
2025-01-16  0:39     ` Jakub Kicinski
2025-01-16  2:12       ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 06/22] net: page_pool: create hooks for custom memory providers David Wei
2025-01-16  0:44   ` Jakub Kicinski
2025-01-16  2:25     ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 07/22] netdev: add io_uring memory provider info David Wei
2025-01-16  0:45   ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 08/22] net: page_pool: add callback for mp info printing David Wei
2025-01-16  0:46   ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 09/22] net: page_pool: add a mp hook to unregister_netdevice* David Wei
2025-01-08 22:06 ` [PATCH net-next v10 10/22] net: prepare for non devmem TCP memory providers David Wei
2025-01-08 22:06 ` [PATCH net-next v10 11/22] net: page_pool: add memory provider helpers David Wei
2025-01-16  0:49   ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 12/22] io_uring/zcrx: add interface queue and refill queue David Wei
2025-01-08 22:06 ` [PATCH net-next v10 13/22] io_uring/zcrx: add io_zcrx_area David Wei
2025-01-08 22:06 ` [PATCH net-next v10 14/22] io_uring/zcrx: grab a net device David Wei
2025-01-16  1:06   ` Jakub Kicinski
2025-01-16  2:33     ` Pavel Begunkov
2025-01-16  3:12       ` Jakub Kicinski
2025-01-16 16:46         ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 15/22] io_uring/zcrx: implement zerocopy receive pp memory provider David Wei
2025-01-13 22:32   ` Jens Axboe
2025-01-08 22:06 ` [PATCH net-next v10 16/22] io_uring/zcrx: dma-map area for the device David Wei
2025-01-08 22:06 ` [PATCH net-next v10 17/22] io_uring/zcrx: add io_recvzc request David Wei
2025-01-08 22:06 ` [PATCH net-next v10 18/22] io_uring/zcrx: set pp memory provider for an rx queue David Wei
2025-01-16  1:12   ` Jakub Kicinski
2025-01-16  2:27     ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 19/22] io_uring/zcrx: throttle receive requests David Wei
2025-01-08 22:06 ` David Wei [this message]
2025-01-08 22:06 ` [PATCH net-next v10 21/22] net: add documentation for io_uring zcrx David Wei
2025-01-08 22:06 ` [PATCH net-next v10 22/22] io_uring/zcrx: add selftest David Wei
2025-01-09 17:33   ` Stanislav Fomichev
2025-01-09 17:50     ` David Wei
2025-01-13 21:32       ` Pavel Begunkov
2025-01-14  0:11         ` Stanislav Fomichev
2025-01-16  0:53   ` Jakub Kicinski
2025-01-16 22:58     ` David Wei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox