public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH 07/16] io_uring: merge iopoll and normal completion paths
Date: Tue, 15 Aug 2023 18:31:36 +0100	[thread overview]
Message-ID: <efd1e68f819f3507a4983af1dd718381b4cfe9f5.1692119257.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

io_do_iopoll() and io_submit_flush_completions() are pretty similar,
both filling CQEs and then free a list of requests. Don't duplicate it
and make iopoll use __io_submit_flush_completions(), which also helps
with inlining and other optimisations.

For that, we need to first find all completed iopoll requests and splice
them from the iopoll list and then pass it down. This adds one extra
list traversal, which should be fine as requests will stay hot in cache.

CQ locking is already conditional, introduce ->lockless_cq and skip
locking for IOPOLL as it's protected by ->uring_lock.

We also add a wakeup optimisation for IOPOLL to __io_cq_unlock_post(),
so it works just like io_cqring_ev_posted_iopoll().

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            | 18 ++++++++++++------
 io_uring/io_uring.h            |  2 +-
 io_uring/rw.c                  | 24 +++++-------------------
 4 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9795eda529f7..c0c03d8059df 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -205,6 +205,7 @@ struct io_ring_ctx {
 		unsigned int		has_evfd: 1;
 		/* all CQEs should be posted only by the submitter task */
 		unsigned int		task_complete: 1;
+		unsigned int		lockless_cq: 1;
 		unsigned int		syscall_iopoll: 1;
 		unsigned int		poll_activated: 1;
 		unsigned int		drain_disabled: 1;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 8d27d2a2e893..204c6a31c5d1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -147,7 +147,6 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 bool cancel_all);
 
 static void io_queue_sqe(struct io_kiocb *req);
-static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
 
 struct kmem_cache *req_cachep;
 
@@ -616,7 +615,7 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 
 static inline void __io_cq_lock(struct io_ring_ctx *ctx)
 {
-	if (!ctx->task_complete)
+	if (!ctx->lockless_cq)
 		spin_lock(&ctx->completion_lock);
 }
 
@@ -630,8 +629,11 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
 {
 	io_commit_cqring(ctx);
 	if (!ctx->task_complete) {
-		spin_unlock(&ctx->completion_lock);
-		io_cqring_wake(ctx);
+		if (!ctx->lockless_cq)
+			spin_unlock(&ctx->completion_lock);
+		/* IOPOLL rings only need to wake up if it's also SQPOLL */
+		if (!ctx->syscall_iopoll)
+			io_cqring_wake(ctx);
 	}
 	io_commit_cqring_flush(ctx);
 }
@@ -1485,7 +1487,8 @@ void io_queue_next(struct io_kiocb *req)
 		io_req_task_queue(nxt);
 }
 
-void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
+static void io_free_batch_list(struct io_ring_ctx *ctx,
+			       struct io_wq_work_node *node)
 	__must_hold(&ctx->uring_lock)
 {
 	do {
@@ -1522,7 +1525,7 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
 	} while (node);
 }
 
-static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
+void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	__must_hold(&ctx->uring_lock)
 {
 	struct io_submit_state *state = &ctx->submit_state;
@@ -3836,6 +3839,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 	    !(ctx->flags & IORING_SETUP_SQPOLL))
 		ctx->task_complete = true;
 
+	if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
+		ctx->lockless_cq = true;
+
 	/*
 	 * lazy poll_wq activation relies on ->task_complete for synchronisation
 	 * purposes, see io_activate_pollwq()
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 2960e35b32a5..07fd185064d2 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -72,7 +72,7 @@ int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
 int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
-void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
+void __io_submit_flush_completions(struct io_ring_ctx *ctx);
 int io_req_prep_async(struct io_kiocb *req);
 
 struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 20140d3505f1..0a1e515f0510 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -983,13 +983,6 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	return ret;
 }
 
-static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
-{
-	if (ctx->flags & IORING_SETUP_SQPOLL)
-		io_cqring_wake(ctx);
-	io_commit_cqring_flush(ctx);
-}
-
 void io_rw_fail(struct io_kiocb *req)
 {
 	int res;
@@ -1060,24 +1053,17 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (!smp_load_acquire(&req->iopoll_completed))
 			break;
 		nr_events++;
-		if (unlikely(req->flags & REQ_F_CQE_SKIP))
-			continue;
-
 		req->cqe.flags = io_put_kbuf(req, 0);
-		if (unlikely(!io_fill_cqe_req(ctx, req))) {
-			spin_lock(&ctx->completion_lock);
-			io_req_cqe_overflow(req);
-			spin_unlock(&ctx->completion_lock);
-		}
 	}
-
 	if (unlikely(!nr_events))
 		return 0;
 
-	io_commit_cqring(ctx);
-	io_cqring_ev_posted_iopoll(ctx);
 	pos = start ? start->next : ctx->iopoll_list.first;
 	wq_list_cut(&ctx->iopoll_list, prev, start);
-	io_free_batch_list(ctx, pos);
+
+	if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
+		return 0;
+	ctx->submit_state.compl_reqs.first = pos;
+	__io_submit_flush_completions(ctx);
 	return nr_events;
 }
-- 
2.41.0


  parent reply	other threads:[~2023-08-15 17:34 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-15 17:31 [RFC 00/16] caching and SQ/CQ optimisations Pavel Begunkov
2023-08-15 17:31 ` [PATCH 01/16] io_uring: improve cqe !tracing hot path Pavel Begunkov
2023-08-15 17:31 ` [PATCH 02/16] io_uring: cqe init hardening Pavel Begunkov
2023-08-19 15:03   ` Jens Axboe
2023-08-24 16:28     ` Pavel Begunkov
2023-08-24 16:49       ` Jens Axboe
2023-08-15 17:31 ` [PATCH 03/16] io_uring: simplify big_cqe handling Pavel Begunkov
2023-08-15 17:31 ` [PATCH 04/16] io_uring: refactor __io_get_cqe() Pavel Begunkov
2023-08-15 17:31 ` [PATCH 05/16] io_uring: optimise extra io_get_cqe null check Pavel Begunkov
2023-08-15 17:31 ` [PATCH 06/16] io_uring: reorder cqring_flush and wakeups Pavel Begunkov
2023-08-15 17:31 ` Pavel Begunkov [this message]
2023-08-15 17:31 ` [PATCH 08/16] io_uring: compact SQ/CQ heads/tails Pavel Begunkov
2023-08-19 15:05   ` Jens Axboe
2023-08-24 16:29     ` Pavel Begunkov
2023-08-15 17:31 ` [PATCH 09/16] io_uring: add option to remove SQ indirection Pavel Begunkov
2023-08-19 15:06   ` Jens Axboe
2023-08-15 17:31 ` [PATCH 10/16] io_uring: static_key for !IORING_SETUP_NO_SQARRAY Pavel Begunkov
2023-08-15 17:31 ` [PATCH 11/16] io_uring: move non aligned field to the end Pavel Begunkov
2023-08-15 17:31 ` [PATCH 12/16] io_uring: banish non-hot data to end of io_ring_ctx Pavel Begunkov
2023-08-15 17:31 ` [PATCH 13/16] io_uring: separate task_work/waiting cache line Pavel Begunkov
2023-08-15 17:31 ` [PATCH 14/16] io_uring: move multishot cqe cache in ctx Pavel Begunkov
2023-08-15 17:31 ` [PATCH 15/16] io_uring: move iopoll ctx fields around Pavel Begunkov
2023-08-15 17:31 ` [PATCH 16/16] io_uring: force inline io_fill_cqe_req Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=efd1e68f819f3507a4983af1dd718381b4cfe9f5.1692119257.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox