public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH for-next v2 03/12] io_uring: skip overflow CQE posting for dying ring
Date: Wed,  7 Dec 2022 03:53:28 +0000	[thread overview]
Message-ID: <26d13751155a735a3029e24f8d9ca992f810419d.1670384893.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

After io_ring_ctx_wait_and_kill() is called there should be no users
poking into rings and so there is no need to post CQEs. So, instead of
trying to post overflowed CQEs into the CQ, drop them. Also, do it
in io_ring_exit_work() in a loop to reduce the number of contexts it
can be executed from and even when it struggles to quiesce the ring we
won't be leaving memory allocated for longer than needed.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 io_uring/io_uring.c | 45 +++++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 62372a641add..5c0b3ba6059e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -611,12 +611,30 @@ void io_cq_unlock_post(struct io_ring_ctx *ctx)
 }
 
 /* Returns true if there are no backlogged entries after the flush */
-static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+static void io_cqring_overflow_kill(struct io_ring_ctx *ctx)
+{
+	struct io_overflow_cqe *ocqe;
+	LIST_HEAD(list);
+
+	io_cq_lock(ctx);
+	list_splice_init(&ctx->cq_overflow_list, &list);
+	clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+	io_cq_unlock(ctx);
+
+	while (!list_empty(&list)) {
+		ocqe = list_first_entry(&list, struct io_overflow_cqe, list);
+		list_del(&ocqe->list);
+		kfree(ocqe);
+	}
+}
+
+/* Returns true if there are no backlogged entries after the flush */
+static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx)
 {
 	bool all_flushed;
 	size_t cqe_size = sizeof(struct io_uring_cqe);
 
-	if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
+	if (__io_cqring_events(ctx) == ctx->cq_entries)
 		return false;
 
 	if (ctx->flags & IORING_SETUP_CQE32)
@@ -627,15 +645,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 		struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true);
 		struct io_overflow_cqe *ocqe;
 
-		if (!cqe && !force)
+		if (!cqe)
 			break;
 		ocqe = list_first_entry(&ctx->cq_overflow_list,
 					struct io_overflow_cqe, list);
-		if (cqe)
-			memcpy(cqe, &ocqe->cqe, cqe_size);
-		else
-			io_account_cq_overflow(ctx);
-
+		memcpy(cqe, &ocqe->cqe, cqe_size);
 		list_del(&ocqe->list);
 		kfree(ocqe);
 	}
@@ -658,7 +672,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
 		/* iopoll syncs against uring_lock, not completion_lock */
 		if (ctx->flags & IORING_SETUP_IOPOLL)
 			mutex_lock(&ctx->uring_lock);
-		ret = __io_cqring_overflow_flush(ctx, false);
+		ret = __io_cqring_overflow_flush(ctx);
 		if (ctx->flags & IORING_SETUP_IOPOLL)
 			mutex_unlock(&ctx->uring_lock);
 	}
@@ -1478,7 +1492,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 	check_cq = READ_ONCE(ctx->check_cq);
 	if (unlikely(check_cq)) {
 		if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
-			__io_cqring_overflow_flush(ctx, false);
+			__io_cqring_overflow_flush(ctx);
 		/*
 		 * Similarly do not spin if we have not informed the user of any
 		 * dropped CQE.
@@ -2646,8 +2660,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		__io_sqe_buffers_unregister(ctx);
 	if (ctx->file_data)
 		__io_sqe_files_unregister(ctx);
-	if (ctx->rings)
-		__io_cqring_overflow_flush(ctx, true);
+	io_cqring_overflow_kill(ctx);
 	io_eventfd_unregister(ctx);
 	io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
 	io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
@@ -2788,6 +2801,12 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 	 * as nobody else will be looking for them.
 	 */
 	do {
+		if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
+			mutex_lock(&ctx->uring_lock);
+			io_cqring_overflow_kill(ctx);
+			mutex_unlock(&ctx->uring_lock);
+		}
+
 		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
 			io_move_task_work_from_local(ctx);
 
@@ -2853,8 +2872,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 
 	mutex_lock(&ctx->uring_lock);
 	percpu_ref_kill(&ctx->refs);
-	if (ctx->rings)
-		__io_cqring_overflow_flush(ctx, true);
 	xa_for_each(&ctx->personalities, index, creds)
 		io_unregister_personality(ctx, index);
 	if (ctx->rings)
-- 
2.38.1


  parent reply	other threads:[~2022-12-07  3:54 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-07  3:53 [PATCH for-next v2 00/12] CQ locking optimisation Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 01/12] io_uring: dont remove file from msg_ring reqs Pavel Begunkov
2022-12-07 13:52   ` Jens Axboe
2022-12-07 21:12     ` Pavel Begunkov
2022-12-07 21:23       ` Jens Axboe
2022-12-07  3:53 ` [PATCH for-next v2 02/12] io_uring: improve io_double_lock_ctx fail handling Pavel Begunkov
2022-12-07  3:53 ` Pavel Begunkov [this message]
2022-12-07  3:53 ` [PATCH for-next v2 04/12] io_uring: don't check overflow flush failures Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 05/12] io_uring: complete all requests in task context Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 06/12] io_uring: force multishot CQEs into " Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 07/12] io_uring: use tw for putting rsrc Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 08/12] io_uring: never run tw and fallback in parallel Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 09/12] io_uring: get rid of double locking Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 10/12] io_uring: extract a io_msg_install_complete helper Pavel Begunkov
2022-12-07  3:53 ` [PATCH for-next v2 11/12] io_uring: do msg_ring in target task via tw Pavel Begunkov
2022-12-07 15:31   ` Jens Axboe
2022-12-07 15:51     ` Jens Axboe
2022-12-07 21:18       ` Pavel Begunkov
2022-12-07 21:22         ` Jens Axboe
2022-12-07  3:53 ` [PATCH for-next v2 12/12] io_uring: skip spinlocking for ->task_complete Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=26d13751155a735a3029e24f8d9ca992f810419d.1670384893.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox