From: David Wei <[email protected]>
To: [email protected], [email protected]
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
Jakub Kicinski <[email protected]>, Paolo Abeni <[email protected]>,
"David S. Miller" <[email protected]>,
Eric Dumazet <[email protected]>,
Jesper Dangaard Brouer <[email protected]>,
David Ahern <[email protected]>,
Mina Almasry <[email protected]>,
Willem de Bruijn <[email protected]>,
Dragos Tatulea <[email protected]>
Subject: [PATCH 17/20] io_uring/zcrx: copy fallback to ring buffers
Date: Tue, 7 Nov 2023 13:40:42 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Pavel Begunkov <[email protected]>
The copy fallback is currently limited to spinlock protected ->freelist,
but we also want to be able to grab buffers from the refill queue, which
is napi protected. Use the new napi_execute() helper to inject a
function call into the napi context.
todo: the way we set napi_id with io_zc_rx_set_napi in drivers later is
not reliable, we should catch all netif_napi_del() and update the id.
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: David Wei <[email protected]>
---
include/linux/io_uring.h | 1 +
io_uring/zc_rx.c | 45 ++++++++++++++++++++++++++++++++++++++--
io_uring/zc_rx.h | 1 +
3 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index fb88e000c156..bf886d6de4e0 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -75,6 +75,7 @@ struct io_zc_rx_buf *io_zc_rx_get_buf(struct io_zc_rx_ifq *ifq);
struct io_zc_rx_buf *io_zc_rx_buf_from_page(struct io_zc_rx_ifq *ifq,
struct page *page);
void io_zc_rx_put_buf(struct io_zc_rx_ifq *ifq, struct io_zc_rx_buf *buf);
+void io_zc_rx_set_napi(struct io_zc_rx_ifq *ifq, unsigned napi_id);
static inline dma_addr_t io_zc_rx_buf_dma(struct io_zc_rx_buf *buf)
{
diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
index c2ed600f0951..14328024a550 100644
--- a/io_uring/zc_rx.c
+++ b/io_uring/zc_rx.c
@@ -7,6 +7,7 @@
#include <linux/netdevice.h>
#include <linux/nospec.h>
#include <net/tcp.h>
+#include <net/busy_poll.h>
#include <uapi/linux/io_uring.h>
@@ -41,6 +42,11 @@ struct io_zc_rx_pool {
u32 freelist[];
};
+struct io_zc_refill_data {
+ struct io_zc_rx_ifq *ifq;
+ unsigned count;
+};
+
static inline u32 io_zc_rx_cqring_entries(struct io_zc_rx_ifq *ifq)
{
struct io_rbuf_ring *ring = ifq->ring;
@@ -244,6 +250,12 @@ static void io_zc_rx_destroy_ifq(struct io_zc_rx_ifq *ifq)
kfree(ifq);
}
+void io_zc_rx_set_napi(struct io_zc_rx_ifq *ifq, unsigned napi_id)
+{
+ ifq->napi_id = napi_id;
+}
+EXPORT_SYMBOL(io_zc_rx_set_napi);
+
static void io_zc_rx_destroy_pool_work(struct work_struct *work)
{
struct io_zc_rx_pool *pool = container_of(
@@ -498,14 +510,43 @@ static void io_zc_rx_refill_cache(struct io_zc_rx_ifq *ifq, int count)
pool->cache_count += filled;
}
+static bool io_napi_refill(void *data)
+{
+ struct io_zc_refill_data *rd = data;
+ struct io_zc_rx_ifq *ifq = rd->ifq;
+ struct io_zc_rx_pool *pool = ifq->pool;
+ int i, count = rd->count;
+
+ lockdep_assert_no_hardirq();
+
+ if (!pool->cache_count)
+ io_zc_rx_refill_cache(ifq, POOL_REFILL_COUNT);
+
+ spin_lock_bh(&pool->freelist_lock);
+ for (i = 0; i < count && pool->cache_count; i++) {
+ u32 pgid;
+
+ pgid = pool->cache[--pool->cache_count];
+ pool->freelist[pool->free_count++] = pgid;
+ }
+ spin_unlock_bh(&pool->freelist_lock);
+ return true;
+}
+
static struct io_zc_rx_buf *io_zc_get_buf_task_safe(struct io_zc_rx_ifq *ifq)
{
struct io_zc_rx_pool *pool = ifq->pool;
struct io_zc_rx_buf *buf = NULL;
u32 pgid;
- if (!READ_ONCE(pool->free_count))
- return NULL;
+ if (!READ_ONCE(pool->free_count)) {
+ struct io_zc_refill_data rd = {
+ .ifq = ifq,
+ .count = 1,
+ };
+
+ napi_execute(ifq->napi_id, io_napi_refill, &rd);
+ }
spin_lock_bh(&pool->freelist_lock);
if (pool->free_count) {
diff --git a/io_uring/zc_rx.h b/io_uring/zc_rx.h
index fac32089e699..fd8828e4bd7a 100644
--- a/io_uring/zc_rx.h
+++ b/io_uring/zc_rx.h
@@ -20,6 +20,7 @@ struct io_zc_rx_ifq {
u32 cached_rq_head;
u32 cached_cq_tail;
void *pool;
+ unsigned int napi_id;
unsigned nr_sockets;
struct file *sockets[IO_ZC_MAX_IFQ_SOCKETS];
--
2.39.3
next prev parent reply other threads:[~2023-11-07 21:41 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-07 21:40 [RFC PATCH v2 00/20] Zero copy Rx using io_uring David Wei
2023-11-07 21:40 ` [PATCH 01/20] io_uring: add interface queue David Wei
2023-11-07 21:40 ` [PATCH 02/20] io_uring: add mmap support for shared ifq ringbuffers David Wei
2023-11-07 21:40 ` [PATCH 03/20] netdev: add XDP_SETUP_ZC_RX command David Wei
2023-11-07 21:40 ` [PATCH 04/20] io_uring: setup ZC for an Rx queue when registering an ifq David Wei
2023-11-07 21:40 ` [PATCH 05/20] io_uring/zcrx: implement socket registration David Wei
2023-11-07 21:40 ` [PATCH 06/20] io_uring: add ZC buf and pool David Wei
2023-11-07 21:40 ` [PATCH 07/20] io_uring: add ZC pool API David Wei
2023-11-07 21:40 ` [PATCH 08/20] skbuff: add SKBFL_FIXED_FRAG and skb_fixed() David Wei
2023-11-07 21:40 ` [PATCH 09/20] io_uring: allocate a uarg for freeing zero copy skbs David Wei
2023-11-07 21:40 ` [PATCH 10/20] io_uring: delay ZC pool destruction David Wei
2023-11-07 21:40 ` [PATCH 11/20] net: add data pool David Wei
2023-11-07 21:40 ` [PATCH 12/20] io_uring: add io_recvzc request David Wei
2023-11-07 21:40 ` [PATCH 13/20] io_uring/zcrx: propagate ifq down the stack David Wei
2023-11-07 21:40 ` [PATCH 14/20] io_uring/zcrx: introduce io_zc_get_rbuf_cqe David Wei
2023-11-07 21:40 ` [PATCH 15/20] io_uring/zcrx: add copy fallback David Wei
2023-11-07 21:40 ` [PATCH 16/20] net: execute custom callback from napi David Wei
2023-11-07 21:40 ` David Wei [this message]
2023-11-07 21:40 ` [PATCH 18/20] veth: add support for io_uring zc rx David Wei
2023-11-07 21:40 ` [PATCH 19/20] bnxt: use data pool David Wei
2023-11-07 21:40 ` [PATCH 20/20] io_uring/zcrx: add multi socket support per Rx queue David Wei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox