From: David Wei <[email protected]>
To: [email protected], [email protected]
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
Jakub Kicinski <[email protected]>, Paolo Abeni <[email protected]>,
"David S. Miller" <[email protected]>,
Eric Dumazet <[email protected]>,
Jesper Dangaard Brouer <[email protected]>,
David Ahern <[email protected]>,
Mina Almasry <[email protected]>,
Stanislav Fomichev <[email protected]>,
Joe Damato <[email protected]>,
Pedro Tammela <[email protected]>
Subject: [PATCH net-next v9 18/20] io_uring/zcrx: add copy fallback
Date: Tue, 17 Dec 2024 16:37:44 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Pavel Begunkov <[email protected]>
There are scenarios in which the zerocopy path can get a kernel buffer
instead of a net_iov and needs to copy it to the user, whether it is
because of mis-steering or simply getting an skb with the linear part.
In this case, grab a net_iov, copy into it and return it to the user as
normally.
At the moment the user doesn't get any indication whether there was a
copy or not, which is left for follow up work.
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: David Wei <[email protected]>
---
io_uring/zcrx.c | 123 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 117 insertions(+), 6 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index ffa388fbb1e4..92b4d91f97f7 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -7,6 +7,7 @@
#include <linux/io_uring.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
+#include <linux/skbuff_ref.h>
#include <net/page_pool/helpers.h>
#include <net/netlink.h>
@@ -123,6 +124,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
atomic_inc(io_get_user_counter(niov));
}
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+ struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+ return area->pages[net_iov_idx(niov)];
+}
+
static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
{
struct netdev_rx_queue *rxq;
@@ -145,6 +153,7 @@ static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq);
if (ret)
goto fail;
+
return 0;
fail:
rxq->mp_params.mp_ops = NULL;
@@ -453,6 +462,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
{
netmem_ref netmem = net_iov_to_netmem(niov);
+ if (!niov->pp) {
+ /* copy fallback allocated niovs */
+ io_zcrx_return_niov_freelist(niov);
+ return;
+ }
page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
}
@@ -668,13 +682,95 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
return true;
}
+static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+{
+ struct net_iov *niov = NULL;
+
+ spin_lock_bh(&area->freelist_lock);
+ if (area->free_count)
+ niov = __io_zcrx_get_free_niov(area);
+ spin_unlock_bh(&area->freelist_lock);
+
+ if (niov) {
+ page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+ page_pool_clear_pp_info(net_iov_to_netmem(niov));
+ }
+ return niov;
+}
+
+static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+ void *src_base, struct page *src_page,
+ unsigned int src_offset, size_t len)
+{
+ struct io_zcrx_area *area = ifq->area;
+ size_t copied = 0;
+ int ret = 0;
+
+ while (len) {
+ size_t copy_size = min_t(size_t, PAGE_SIZE, len);
+ const int dst_off = 0;
+ struct net_iov *niov;
+ struct page *dst_page;
+ void *dst_addr;
+
+ niov = io_zcrx_alloc_fallback(area);
+ if (!niov) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ dst_page = io_zcrx_iov_page(niov);
+ dst_addr = kmap_local_page(dst_page);
+ if (src_page)
+ src_base = kmap_local_page(src_page);
+
+ memcpy(dst_addr, src_base + src_offset, copy_size);
+
+ if (src_page)
+ kunmap_local(src_base);
+ kunmap_local(dst_addr);
+
+ if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
+ io_zcrx_return_niov(niov);
+ ret = -ENOSPC;
+ break;
+ }
+
+ io_zcrx_get_niov_uref(niov);
+ src_offset += copy_size;
+ len -= copy_size;
+ copied += copy_size;
+ }
+
+ return copied ? copied : ret;
+}
+
+static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+ const skb_frag_t *frag, int off, int len)
+{
+ struct page *page = skb_frag_page(frag);
+ u32 p_off, p_len, t, copied = 0;
+ int ret = 0;
+
+ off += skb_frag_off(frag);
+
+ skb_frag_foreach_page(frag, off, len,
+ page, p_off, p_len, t) {
+ ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
+ if (ret < 0)
+ return copied ? copied : ret;
+ copied += ret;
+ }
+ return copied;
+}
+
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
const skb_frag_t *frag, int off, int len)
{
struct net_iov *niov;
if (unlikely(!skb_frag_is_net_iov(frag)))
- return -EOPNOTSUPP;
+ return io_zcrx_copy_frag(req, ifq, frag, off, len);
niov = netmem_to_net_iov(frag->netmem);
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -701,18 +797,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
struct io_zcrx_ifq *ifq = args->ifq;
struct io_kiocb *req = args->req;
struct sk_buff *frag_iter;
- unsigned start, start_off;
+ unsigned start, start_off = offset;
int i, copy, end, off;
int ret = 0;
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
return -EAGAIN;
- start = skb_headlen(skb);
- start_off = offset;
+ if (unlikely(offset < skb_headlen(skb))) {
+ ssize_t copied;
+ size_t to_copy;
- if (offset < start)
- return -EOPNOTSUPP;
+ to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+ copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
+ offset, to_copy);
+ if (copied < 0) {
+ ret = copied;
+ goto out;
+ }
+ offset += copied;
+ len -= copied;
+ if (!len)
+ goto out;
+ if (offset != skb_headlen(skb))
+ goto out;
+ }
+
+ start = skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const skb_frag_t *frag;
--
2.43.5
next prev parent reply other threads:[~2024-12-18 0:38 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-18 0:37 [PATCH RESEND net-next v9 00/21] io_uring zero copy rx David Wei
2024-12-18 0:37 ` [PATCH net-next v9 01/20] net: page_pool: don't cast mp param to devmem David Wei
2024-12-20 22:04 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 02/20] net: prefix devmem specific helpers David Wei
2024-12-20 22:05 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 03/20] net: generalise net_iov chunk owners David Wei
2024-12-20 22:14 ` Jakub Kicinski
2024-12-21 0:50 ` Pavel Begunkov
2024-12-21 2:17 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 04/20] net: page_pool: create hooks for custom page providers David Wei
2024-12-18 0:37 ` [PATCH net-next v9 05/20] net: page_pool: add mp op for netlink reporting David Wei
2024-12-20 22:16 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 06/20] net: page_pool: add a mp hook to unregister_netdevice* David Wei
2024-12-20 22:18 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 07/20] net: prepare for non devmem TCP memory providers David Wei
2024-12-20 22:18 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 08/20] net: expose page_pool_{set,clear}_pp_info David Wei
2024-12-20 22:31 ` Jakub Kicinski
2024-12-21 1:07 ` Pavel Begunkov
2024-12-21 2:23 ` Jakub Kicinski
2024-12-18 0:37 ` [PATCH net-next v9 09/20] net: page_pool: introduce page_pool_mp_return_in_cache David Wei
2024-12-18 0:37 ` [PATCH net-next v9 10/20] io_uring/zcrx: add interface queue and refill queue David Wei
2024-12-18 0:37 ` [PATCH net-next v9 11/20] io_uring/zcrx: add io_zcrx_area David Wei
2024-12-18 0:37 ` [PATCH net-next v9 12/20] io_uring/zcrx: grab a net device David Wei
2024-12-18 0:37 ` [PATCH net-next v9 13/20] net: page pool: export page_pool_set_dma_addr_netmem() David Wei
2024-12-18 0:37 ` [PATCH net-next v9 14/20] io_uring/zcrx: dma-map area for the device David Wei
2024-12-20 22:38 ` Jakub Kicinski
2024-12-21 1:04 ` Pavel Begunkov
2024-12-18 0:37 ` [PATCH net-next v9 15/20] io_uring/zcrx: add io_recvzc request David Wei
2024-12-18 0:37 ` [PATCH net-next v9 16/20] io_uring/zcrx: set pp memory provider for an rx queue David Wei
2024-12-18 0:37 ` [PATCH net-next v9 17/20] io_uring/zcrx: throttle receive requests David Wei
2024-12-18 0:37 ` David Wei [this message]
2024-12-18 0:37 ` [PATCH net-next v9 19/20] net: add documentation for io_uring zcrx David Wei
2024-12-18 0:37 ` [PATCH net-next v9 20/20] io_uring/zcrx: add selftest David Wei
2024-12-18 14:40 ` [PATCH RESEND net-next v9 00/21] io_uring zero copy rx Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox