public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected], [email protected],
	[email protected]
Cc: Jakub Kicinski <[email protected]>,
	Jonathan Lemon <[email protected]>,
	"David S . Miller" <[email protected]>,
	Willem de Bruijn <[email protected]>,
	Eric Dumazet <[email protected]>,
	David Ahern <[email protected]>, Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>
Subject: [RFC v2 18/19] io_uring: task_work for notification delivery
Date: Tue, 21 Dec 2021 15:35:40 +0000	[thread overview]
Message-ID: <33b943a2409dc1c4ad845ea0bebb76ecad723ef6.1640029579.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

workqueues are way too heavy for tx notification delivery. We still
need some non-irq context because ->completion_lock is not irq-safe, so
use task_work instead. Expectedly, performance for test cases with real
hardware and juggling lots of notifications the perfomance is
drastically better, e.g. profiles percetage of relevant parts drops
from 30% to less than 3%

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 57 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8cfa8ea161e4..ee496b463462 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -330,11 +330,16 @@ struct io_submit_state {
 
 struct io_tx_notifier {
 	struct ubuf_info	uarg;
-	struct work_struct	commit_work;
 	struct percpu_ref	*fixed_rsrc_refs;
 	u64			tag;
 	u32			seq;
 	struct list_head	cache_node;
+	struct task_struct	*task;
+
+	union {
+		struct callback_head	task_work;
+		struct work_struct	commit_work;
+	};
 };
 
 struct io_tx_ctx {
@@ -1965,19 +1970,17 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 	return __io_fill_cqe(ctx, user_data, res, cflags);
 }
 
-static void io_zc_tx_work_callback(struct work_struct *work)
+static void io_zc_tx_notifier_finish(struct callback_head *cb)
 {
-	struct io_tx_notifier *notifier = container_of(work, struct io_tx_notifier,
-						       commit_work);
+	struct io_tx_notifier *notifier = container_of(cb, struct io_tx_notifier,
+						       task_work);
 	struct io_ring_ctx *ctx = notifier->uarg.ctx;
 	struct percpu_ref *rsrc_refs = notifier->fixed_rsrc_refs;
 
 	spin_lock(&ctx->completion_lock);
 	io_fill_cqe_aux(ctx, notifier->tag, notifier->seq, 0);
-
 	list_add(&notifier->cache_node, &ctx->ubuf_list_locked);
 	ctx->ubuf_locked_nr++;
-
 	io_commit_cqring(ctx);
 	spin_unlock(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
@@ -1985,6 +1988,14 @@ static void io_zc_tx_work_callback(struct work_struct *work)
 	percpu_ref_put(rsrc_refs);
 }
 
+static void io_zc_tx_work_callback(struct work_struct *work)
+{
+	struct io_tx_notifier *notifier = container_of(work, struct io_tx_notifier,
+						       commit_work);
+
+	io_zc_tx_notifier_finish(&notifier->task_work);
+}
+
 static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
 					  struct ubuf_info *uarg,
 					  bool success)
@@ -1994,21 +2005,39 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
 
 	if (!refcount_dec_and_test(&uarg->refcnt))
 		return;
+	if (unlikely(!notifier->task))
+		goto fallback;
 
-	if (in_interrupt()) {
-		INIT_WORK(&notifier->commit_work, io_zc_tx_work_callback);
-		queue_work(system_unbound_wq, &notifier->commit_work);
-	} else {
-		io_zc_tx_work_callback(&notifier->commit_work);
+	put_task_struct(notifier->task);
+	notifier->task = NULL;
+
+	if (!in_interrupt()) {
+		io_zc_tx_notifier_finish(&notifier->task_work);
+		return;
 	}
+
+	init_task_work(&notifier->task_work, io_zc_tx_notifier_finish);
+	if (likely(!task_work_add(notifier->task, &notifier->task_work,
+				  TWA_SIGNAL)))
+		return;
+
+fallback:
+	INIT_WORK(&notifier->commit_work, io_zc_tx_work_callback);
+	queue_work(system_unbound_wq, &notifier->commit_work);
 }
 
-static void io_tx_kill_notification(struct io_tx_ctx *tx_ctx)
+static inline void __io_tx_kill_notification(struct io_tx_ctx *tx_ctx)
 {
 	io_uring_tx_zerocopy_callback(NULL, &tx_ctx->notifier->uarg, true);
 	tx_ctx->notifier = NULL;
 }
 
+static inline void io_tx_kill_notification(struct io_tx_ctx *tx_ctx)
+{
+	tx_ctx->notifier->task = get_task_struct(current);
+	__io_tx_kill_notification(tx_ctx);
+}
+
 static void io_notifier_splice(struct io_ring_ctx *ctx)
 {
 	spin_lock(&ctx->completion_lock);
@@ -2058,7 +2087,7 @@ static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
 	} else {
 		gfp_t gfp_flags = GFP_ATOMIC|GFP_KERNEL_ACCOUNT;
 
-		notifier = kmalloc(sizeof(*notifier), gfp_flags);
+		notifier = kzalloc(sizeof(*notifier), gfp_flags);
 		if (!notifier)
 			return NULL;
 		ctx->nr_tx_ctx++;
@@ -9502,7 +9531,7 @@ static void io_sqe_tx_ctx_kill_ubufs(struct io_ring_ctx *ctx)
 		tx_ctx = &ctx->tx_ctxs[i];
 
 		if (tx_ctx->notifier)
-			io_tx_kill_notification(tx_ctx);
+			__io_tx_kill_notification(tx_ctx);
 	}
 }
 
-- 
2.34.1


  parent reply	other threads:[~2021-12-21 15:36 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-21 15:35 [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 01/19] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 02/19] skbuff: pass a struct ubuf_info in msghdr Pavel Begunkov
2022-01-11 13:51   ` Hao Xu
2022-01-11 15:50     ` Pavel Begunkov
2022-01-12  3:39       ` Hao Xu
2022-01-12 16:53         ` Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 03/19] net: add zerocopy_sg_from_iter for bvec Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 04/19] net: optimise page get/free for bvec zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 05/19] net: don't track pfmemalloc for zc registered mem Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 06/19] ipv4/udp: add support msgdr::msg_ubuf Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 07/19] ipv6/udp: " Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 08/19] ipv4: avoid partial copy for zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 09/19] ipv6: " Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 10/19] io_uring: add send notifiers registration Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 11/19] io_uring: infrastructure for send zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 12/19] io_uring: wire send zc request type Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 13/19] io_uring: add an option to flush zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 14/19] io_uring: opcode independent fixed buf import Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 15/19] io_uring: sendzc with fixed buffers Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 16/19] io_uring: cache struct ubuf_info Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers Pavel Begunkov
2021-12-21 15:35 ` Pavel Begunkov [this message]
2021-12-21 15:35 ` [RFC v2 19/19] io_uring: optimise task referencing by notifiers Pavel Begunkov
2021-12-21 15:43 ` [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=33b943a2409dc1c4ad845ea0bebb76ecad723ef6.1640029579.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox