From: David Wei <[email protected]>
To: [email protected], [email protected]
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
Jakub Kicinski <[email protected]>, Paolo Abeni <[email protected]>,
"David S. Miller" <[email protected]>,
Eric Dumazet <[email protected]>,
Jesper Dangaard Brouer <[email protected]>,
David Ahern <[email protected]>,
Mina Almasry <[email protected]>,
Stanislav Fomichev <[email protected]>,
Joe Damato <[email protected]>,
Pedro Tammela <[email protected]>
Subject: [PATCH net-next v10 20/22] io_uring/zcrx: add copy fallback
Date: Wed, 8 Jan 2025 14:06:41 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Pavel Begunkov <[email protected]>
There are scenarios in which the zerocopy path can get a kernel buffer
instead of a net_iov and needs to copy it to the user, whether it is
because of mis-steering or simply getting an skb with the linear part.
In this case, grab a net_iov, copy into it and return it to the user as
normally.
At the moment the user doesn't get any indication whether there was a
copy or not, which is left for follow up work.
Reviewed-by: Jens Axboe <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: David Wei <[email protected]>
---
io_uring/zcrx.c | 121 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 115 insertions(+), 6 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0c737ab9058d..b5ce336fc78d 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -7,6 +7,7 @@
#include <linux/io_uring.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
+#include <linux/skbuff_ref.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
@@ -143,6 +144,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
atomic_inc(io_get_user_counter(niov));
}
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+ struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+ return area->pages[net_iov_idx(niov)];
+}
+
static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
{
struct netdev_rx_queue *rxq;
@@ -165,6 +173,7 @@ static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq);
if (ret)
goto fail;
+
return 0;
fail:
rxq->mp_params.mp_ops = NULL;
@@ -473,6 +482,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
{
netmem_ref netmem = net_iov_to_netmem(niov);
+ if (!niov->pp) {
+ /* copy fallback allocated niovs */
+ io_zcrx_return_niov_freelist(niov);
+ return;
+ }
page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
}
@@ -700,13 +714,93 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
return true;
}
+static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+{
+ struct net_iov *niov = NULL;
+
+ spin_lock_bh(&area->freelist_lock);
+ if (area->free_count)
+ niov = __io_zcrx_get_free_niov(area);
+ spin_unlock_bh(&area->freelist_lock);
+
+ if (niov)
+ page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+ return niov;
+}
+
+static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+ void *src_base, struct page *src_page,
+ unsigned int src_offset, size_t len)
+{
+ struct io_zcrx_area *area = ifq->area;
+ size_t copied = 0;
+ int ret = 0;
+
+ while (len) {
+ size_t copy_size = min_t(size_t, PAGE_SIZE, len);
+ const int dst_off = 0;
+ struct net_iov *niov;
+ struct page *dst_page;
+ void *dst_addr;
+
+ niov = io_zcrx_alloc_fallback(area);
+ if (!niov) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ dst_page = io_zcrx_iov_page(niov);
+ dst_addr = kmap_local_page(dst_page);
+ if (src_page)
+ src_base = kmap_local_page(src_page);
+
+ memcpy(dst_addr, src_base + src_offset, copy_size);
+
+ if (src_page)
+ kunmap_local(src_base);
+ kunmap_local(dst_addr);
+
+ if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
+ io_zcrx_return_niov(niov);
+ ret = -ENOSPC;
+ break;
+ }
+
+ io_zcrx_get_niov_uref(niov);
+ src_offset += copy_size;
+ len -= copy_size;
+ copied += copy_size;
+ }
+
+ return copied ? copied : ret;
+}
+
+static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+ const skb_frag_t *frag, int off, int len)
+{
+ struct page *page = skb_frag_page(frag);
+ u32 p_off, p_len, t, copied = 0;
+ int ret = 0;
+
+ off += skb_frag_off(frag);
+
+ skb_frag_foreach_page(frag, off, len,
+ page, p_off, p_len, t) {
+ ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
+ if (ret < 0)
+ return copied ? copied : ret;
+ copied += ret;
+ }
+ return copied;
+}
+
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
const skb_frag_t *frag, int off, int len)
{
struct net_iov *niov;
if (unlikely(!skb_frag_is_net_iov(frag)))
- return -EOPNOTSUPP;
+ return io_zcrx_copy_frag(req, ifq, frag, off, len);
niov = netmem_to_net_iov(frag->netmem);
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -733,18 +827,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
struct io_zcrx_ifq *ifq = args->ifq;
struct io_kiocb *req = args->req;
struct sk_buff *frag_iter;
- unsigned start, start_off;
+ unsigned start, start_off = offset;
int i, copy, end, off;
int ret = 0;
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
return -EAGAIN;
- start = skb_headlen(skb);
- start_off = offset;
+ if (unlikely(offset < skb_headlen(skb))) {
+ ssize_t copied;
+ size_t to_copy;
- if (offset < start)
- return -EOPNOTSUPP;
+ to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+ copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
+ offset, to_copy);
+ if (copied < 0) {
+ ret = copied;
+ goto out;
+ }
+ offset += copied;
+ len -= copied;
+ if (!len)
+ goto out;
+ if (offset != skb_headlen(skb))
+ goto out;
+ }
+
+ start = skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const skb_frag_t *frag;
--
2.43.5
next prev parent reply other threads:[~2025-01-08 22:07 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-08 22:06 [PATCH net-next v10 00/22] io_uring zero copy rx David Wei
2025-01-08 22:06 ` [PATCH net-next v10 01/22] net: make page_pool_ref_netmem work with net iovs David Wei
2025-01-16 0:30 ` Jakub Kicinski
2025-01-16 2:12 ` Pavel Begunkov
2025-01-16 2:48 ` Jakub Kicinski
2025-01-16 16:45 ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 02/22] net: page_pool: don't cast mp param to devmem David Wei
2025-01-08 22:06 ` [PATCH net-next v10 03/22] net: prefix devmem specific helpers David Wei
2025-01-08 22:06 ` [PATCH net-next v10 04/22] net: generalise net_iov chunk owners David Wei
2025-01-16 0:31 ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 05/22] net: page pool: export page_pool_set_dma_addr_netmem() David Wei
2025-01-16 0:35 ` Jakub Kicinski
2025-01-16 0:39 ` Jakub Kicinski
2025-01-16 2:12 ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 06/22] net: page_pool: create hooks for custom memory providers David Wei
2025-01-16 0:44 ` Jakub Kicinski
2025-01-16 2:25 ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 07/22] netdev: add io_uring memory provider info David Wei
2025-01-16 0:45 ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 08/22] net: page_pool: add callback for mp info printing David Wei
2025-01-16 0:46 ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 09/22] net: page_pool: add a mp hook to unregister_netdevice* David Wei
2025-01-08 22:06 ` [PATCH net-next v10 10/22] net: prepare for non devmem TCP memory providers David Wei
2025-01-08 22:06 ` [PATCH net-next v10 11/22] net: page_pool: add memory provider helpers David Wei
2025-01-16 0:49 ` Jakub Kicinski
2025-01-08 22:06 ` [PATCH net-next v10 12/22] io_uring/zcrx: add interface queue and refill queue David Wei
2025-01-08 22:06 ` [PATCH net-next v10 13/22] io_uring/zcrx: add io_zcrx_area David Wei
2025-01-08 22:06 ` [PATCH net-next v10 14/22] io_uring/zcrx: grab a net device David Wei
2025-01-16 1:06 ` Jakub Kicinski
2025-01-16 2:33 ` Pavel Begunkov
2025-01-16 3:12 ` Jakub Kicinski
2025-01-16 16:46 ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 15/22] io_uring/zcrx: implement zerocopy receive pp memory provider David Wei
2025-01-13 22:32 ` Jens Axboe
2025-01-08 22:06 ` [PATCH net-next v10 16/22] io_uring/zcrx: dma-map area for the device David Wei
2025-01-08 22:06 ` [PATCH net-next v10 17/22] io_uring/zcrx: add io_recvzc request David Wei
2025-01-08 22:06 ` [PATCH net-next v10 18/22] io_uring/zcrx: set pp memory provider for an rx queue David Wei
2025-01-16 1:12 ` Jakub Kicinski
2025-01-16 2:27 ` Pavel Begunkov
2025-01-08 22:06 ` [PATCH net-next v10 19/22] io_uring/zcrx: throttle receive requests David Wei
2025-01-08 22:06 ` David Wei [this message]
2025-01-08 22:06 ` [PATCH net-next v10 21/22] net: add documentation for io_uring zcrx David Wei
2025-01-08 22:06 ` [PATCH net-next v10 22/22] io_uring/zcrx: add selftest David Wei
2025-01-09 17:33 ` Stanislav Fomichev
2025-01-09 17:50 ` David Wei
2025-01-13 21:32 ` Pavel Begunkov
2025-01-14 0:11 ` Stanislav Fomichev
2025-01-16 0:53 ` Jakub Kicinski
2025-01-16 22:58 ` David Wei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox