public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected], [email protected]
Cc: Jens Axboe <[email protected]>,
	[email protected], "David S . Miller" <[email protected]>,
	Jakub Kicinski <[email protected]>,
	David Ahern <[email protected]>,
	Eric Dumazet <[email protected]>,
	Willem de Bruijn <[email protected]>
Subject: [RFC 6/6] io_uring/notif: implement notification stacking
Date: Fri, 12 Apr 2024 13:55:27 +0100	[thread overview]
Message-ID: <3e2ef5f6d39c4631f5bae86b503a5397d6707563.1712923998.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

The network stack allows only one ubuf_info per skb, and unlike
MSG_ZEROCOPY, each io_uring zerocopy send will carry a separate
ubuf_info. That means that send requests can't reuse a previosly
allocated skb and need to get one more or more of new ones. That's fine
for large sends, but otherwise it would spam the stack with lots of skbs
carrying just a little data each.

To help with that implement linking notification (i.e. an io_uring wrapper
around ubuf_info) into a list. Each is refcounted by skbs and the stack
as usual. additionally all non head entries keep a reference to the
head, which they put down when their refcount hits 0. When the head have
no more users, it'll efficiently put all notifications in a batch.

As mentioned previously about ->io_link_skb, the callback implementation
always allows to bind to an skb without a ubuf_info.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 io_uring/notif.c | 71 +++++++++++++++++++++++++++++++++++++++++++-----
 io_uring/notif.h |  4 +++
 2 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/io_uring/notif.c b/io_uring/notif.c
index 26680176335f..d58cdc01e691 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -9,18 +9,28 @@
 #include "notif.h"
 #include "rsrc.h"
 
+static const struct ubuf_info_ops io_ubuf_ops;
+
 static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
 {
 	struct io_notif_data *nd = io_notif_to_data(notif);
 
-	if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
-		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+	do {
+		notif = cmd_to_io_kiocb(nd);
 
-	if (nd->account_pages && notif->ctx->user) {
-		__io_unaccount_mem(notif->ctx->user, nd->account_pages);
-		nd->account_pages = 0;
-	}
-	io_req_task_complete(notif, ts);
+		lockdep_assert(refcount_read(&nd->uarg.refcnt) == 0);
+
+		if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
+			notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
+		if (nd->account_pages && notif->ctx->user) {
+			__io_unaccount_mem(notif->ctx->user, nd->account_pages);
+			nd->account_pages = 0;
+		}
+
+		nd = nd->next;
+		io_req_task_complete(notif, ts);
+	} while (nd);
 }
 
 void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
@@ -39,12 +49,56 @@ void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
 	if (!refcount_dec_and_test(&uarg->refcnt))
 		return;
 
+	if (nd->head != nd) {
+		io_tx_ubuf_complete(skb, &nd->head->uarg, success);
+		return;
+	}
 	notif->io_task_work.func = io_notif_tw_complete;
 	__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
 }
 
+static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+	struct io_notif_data *nd, *prev_nd;
+	struct io_kiocb *prev_notif, *notif;
+	struct ubuf_info *prev_uarg = skb_zcopy(skb);
+
+	nd = container_of(uarg, struct io_notif_data, uarg);
+	notif = cmd_to_io_kiocb(nd);
+
+	if (!prev_uarg) {
+		net_zcopy_get(&nd->uarg);
+		skb_zcopy_init(skb, &nd->uarg);
+		return 0;
+	}
+	/* handle it separately as we can't link a notif to itself */
+	if (unlikely(prev_uarg == &nd->uarg))
+		return 0;
+	/* we can't join two links together, just request a fresh skb */
+	if (unlikely(nd->head != nd || nd->next))
+		return -EEXIST;
+	/* don't mix zc providers */
+	if (unlikely(prev_uarg->ops != &io_ubuf_ops))
+		return -EEXIST;
+
+	prev_nd = container_of(prev_uarg, struct io_notif_data, uarg);
+	prev_notif = cmd_to_io_kiocb(nd);
+
+	/* make sure all noifications can be finished in the same task_work */
+	if (unlikely(notif->ctx != prev_notif->ctx ||
+		     notif->task != prev_notif->task))
+		return -EEXIST;
+
+	nd->head = prev_nd->head;
+	nd->next = prev_nd->next;
+	prev_nd->next = nd;
+	net_zcopy_get(&nd->head->uarg);
+	return 0;
+}
+
 static const struct ubuf_info_ops io_ubuf_ops = {
 	.complete = io_tx_ubuf_complete,
+	.link_skb = io_link_skb,
 };
 
 struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
@@ -65,6 +119,9 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
 	nd = io_notif_to_data(notif);
 	nd->zc_report = false;
 	nd->account_pages = 0;
+	nd->next = NULL;
+	nd->head = nd;
+
 	nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
 	nd->uarg.ops = &io_ubuf_ops;
 	refcount_set(&nd->uarg.refcnt, 1);
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 394e1d33daa6..6d2e8b674b43 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -14,6 +14,10 @@ struct io_notif_data {
 	struct file		*file;
 	struct ubuf_info	uarg;
 	unsigned long		account_pages;
+
+	struct io_notif_data	*next;
+	struct io_notif_data	*head;
+
 	bool			zc_report;
 	bool			zc_used;
 	bool			zc_copied;
-- 
2.44.0


  parent reply	other threads:[~2024-04-12 12:55 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-12 12:55 [RFC 0/6] implement io_uring notification (ubuf_info) stacking Pavel Begunkov
2024-04-12 12:55 ` [RFC 1/6] net: extend ubuf_info callback to ops structure Pavel Begunkov
2024-04-13 17:17   ` David Ahern
2024-04-14 17:07   ` Willem de Bruijn
2024-04-15  0:07     ` Pavel Begunkov
2024-04-15 15:06       ` Willem de Bruijn
2024-04-15 18:55         ` Pavel Begunkov
2024-04-15 19:01           ` Willem de Bruijn
2024-04-16 14:50       ` David Ahern
2024-04-16 15:31         ` Pavel Begunkov
2024-04-12 12:55 ` [RFC 2/6] net: add callback for setting a ubuf_info to skb Pavel Begunkov
2024-04-13 17:18   ` David Ahern
2024-04-12 12:55 ` [RFC 3/6] io_uring/notif: refactor io_tx_ubuf_complete() Pavel Begunkov
2024-04-12 12:55 ` [RFC 4/6] io_uring/notif: remove ctx var from io_notif_tw_complete Pavel Begunkov
2024-04-12 12:55 ` [RFC 5/6] io_uring/notif: simplify io_notif_flush() Pavel Begunkov
2024-04-12 12:55 ` Pavel Begunkov [this message]
2024-04-14 17:10   ` [RFC 6/6] io_uring/notif: implement notification stacking Willem de Bruijn
2024-04-14 23:55     ` Pavel Begunkov
2024-04-15 15:15       ` Willem de Bruijn
2024-04-15 18:51         ` Pavel Begunkov
2024-04-15 19:02           ` Willem de Bruijn
2024-04-12 13:44 ` [RFC 0/6] implement io_uring notification (ubuf_info) stacking Jens Axboe
2024-04-12 14:52 ` Jens Axboe
2024-04-13 17:17 ` David Ahern
2024-04-15  0:08   ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3e2ef5f6d39c4631f5bae86b503a5397d6707563.1712923998.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox