From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH for-next 4/7] io_uring: hide eventfd assumptions in evenfd paths
Date: Sun, 19 Jun 2022 12:26:07 +0100 [thread overview]
Message-ID: <8ac7fbe5ad880990ef498fa09f8de70390836f97.1655637157.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Some io_uring-eventfd users assume that there won't be spurious wakeups.
That assumption has to be honoured by all io_cqring_ev_posted() callers,
which is inconvenient and from time to time leads to problems but should
be maintained to not break the userspace.
Instead of making the callers to track whether a CQE was posted or not,
hide it inside io_eventfd_signal(). It saves ->cached_cq_tail it saw
last time and triggers the eventfd only when ->cached_cq_tail changed
since then.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 2 ++
io_uring/io_uring.c | 44 ++++++++++++++++++++--------------
io_uring/timeout.c | 3 +--
3 files changed, 29 insertions(+), 20 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 779c72da5b8f..327bc7f0808d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -315,6 +315,8 @@ struct io_ring_ctx {
struct list_head defer_list;
unsigned sq_thread_idle;
+ /* protected by ->completion_lock */
+ unsigned evfd_last_cq_tail;
};
enum {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 31beb9ccbf12..0875cc649e23 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -473,6 +473,22 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
static void io_eventfd_signal(struct io_ring_ctx *ctx)
{
struct io_ev_fd *ev_fd;
+ bool skip;
+
+ spin_lock(&ctx->completion_lock);
+ /*
+ * Eventfd should only get triggered when at least one event has been
+ * posted. Some applications rely on the eventfd notification count only
+ * changing IFF a new CQE has been added to the CQ ring. There's no
+ * depedency on 1:1 relationship between how many times this function is
+ * called (and hence the eventfd count) and number of CQEs posted to the
+ * CQ ring.
+ */
+ skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+ if (skip)
+ return;
rcu_read_lock();
/*
@@ -511,13 +527,6 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
io_eventfd_signal(ctx);
}
-/*
- * This should only get called when at least one event has been posted.
- * Some applications rely on the eventfd notification count only changing
- * IFF a new CQE has been added to the CQ ring. There's no depedency on
- * 1:1 relationship between how many times this function is called (and
- * hence the eventfd count) and number of CQEs posted to the CQ ring.
- */
void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
@@ -530,7 +539,7 @@ void io_cqring_ev_posted(struct io_ring_ctx *ctx)
/* Returns true if there are no backlogged entries after the flush */
static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
- bool all_flushed, posted;
+ bool all_flushed;
size_t cqe_size = sizeof(struct io_uring_cqe);
if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
@@ -539,7 +548,6 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (ctx->flags & IORING_SETUP_CQE32)
cqe_size <<= 1;
- posted = false;
spin_lock(&ctx->completion_lock);
while (!list_empty(&ctx->cq_overflow_list)) {
struct io_uring_cqe *cqe = io_get_cqe(ctx);
@@ -554,7 +562,6 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
else
io_account_cq_overflow(ctx);
- posted = true;
list_del(&ocqe->list);
kfree(ocqe);
}
@@ -567,8 +574,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
- if (posted)
- io_cqring_ev_posted(ctx);
+ io_cqring_ev_posted(ctx);
return all_flushed;
}
@@ -758,8 +764,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx,
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
- if (filled)
- io_cqring_ev_posted(ctx);
+ io_cqring_ev_posted(ctx);
return filled;
}
@@ -940,14 +945,12 @@ __cold void io_free_req(struct io_kiocb *req)
static void __io_req_find_next_prep(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- bool posted;
spin_lock(&ctx->completion_lock);
- posted = io_disarm_next(req);
+ io_disarm_next(req);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
- if (posted)
- io_cqring_ev_posted(ctx);
+ io_cqring_ev_posted(ctx);
}
static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
@@ -2431,6 +2434,11 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
kfree(ev_fd);
return ret;
}
+
+ spin_lock(&ctx->completion_lock);
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+
ev_fd->eventfd_async = eventfd_async;
ctx->has_evfd = true;
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 557c637af158..4938c1cdcbcd 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -628,7 +628,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
spin_unlock_irq(&ctx->timeout_lock);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
- if (canceled != 0)
- io_cqring_ev_posted(ctx);
+ io_cqring_ev_posted(ctx);
return canceled != 0;
}
--
2.36.1
next prev parent reply other threads:[~2022-06-19 11:26 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-19 11:26 [PATCH for-next 0/7] cqe posting cleanups Pavel Begunkov
2022-06-19 11:26 ` [PATCH for-next 1/7] io_uring: remove extra io_commit_cqring() Pavel Begunkov
2022-06-19 11:26 ` [PATCH for-next 2/7] io_uring: reshuffle io_uring/io_uring.h Pavel Begunkov
2022-06-19 11:26 ` [PATCH for-next 3/7] io_uring: move io_eventfd_signal() Pavel Begunkov
2022-06-19 11:26 ` Pavel Begunkov [this message]
2022-06-19 18:18 ` [PATCH for-next 4/7] io_uring: hide eventfd assumptions in evenfd paths Jens Axboe
2022-06-19 18:49 ` Pavel Begunkov
2022-06-19 18:58 ` Jens Axboe
2022-06-19 11:26 ` [PATCH for-next 5/7] io_uring: remove ->flush_cqes optimisation Pavel Begunkov
2022-06-19 13:31 ` Jens Axboe
2022-06-19 14:52 ` Pavel Begunkov
2022-06-19 15:52 ` Jens Axboe
2022-06-19 16:15 ` Pavel Begunkov
2022-06-19 16:17 ` Jens Axboe
2022-06-19 16:19 ` Pavel Begunkov
2022-06-19 16:38 ` Jens Axboe
2022-06-19 16:38 ` Jens Axboe
2022-06-19 11:26 ` [PATCH for-next 6/7] io_uring: introduce locking helpers for CQE posting Pavel Begunkov
2022-06-19 13:30 ` Jens Axboe
2022-06-19 14:20 ` Pavel Begunkov
2022-06-19 15:50 ` Jens Axboe
2022-06-19 11:26 ` [PATCH for-next 7/7] io_uring: add io_commit_cqring_flush() Pavel Begunkov
2022-06-19 12:36 ` [PATCH for-next 0/7] cqe posting cleanups Pavel Begunkov
2022-06-19 16:01 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8ac7fbe5ad880990ef498fa09f8de70390836f97.1655637157.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox