From: Dylan Yudaken <[email protected]>
To: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
<[email protected]>
Cc: <[email protected]>, Dylan Yudaken <[email protected]>
Subject: [PATCH for-next v2 6/6] io_uring: signal registered eventfd to process deferred task work
Date: Tue, 16 Aug 2022 08:37:28 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Some workloads rely on a registered eventfd (via
io_uring_register_eventfd(3)) in order to wake up and process the
io_uring.
In the case of a ring setup with IORING_SETUP_DEFER_TASKRUN, that eventfd
also needs to be signalled when there are tasks to run.
This changes an old behaviour which assumed 1 eventfd signal implied at
least 1 CQE, however only when this new flag is set (and so old users will
not notice). This should be expected with the IORING_SETUP_DEFER_TASKRUN
flag as it is not guaranteed that every task will result in a CQE.
Signed-off-by: Dylan Yudaken <[email protected]>
---
include/linux/io_uring_types.h | 1 +
io_uring/io_uring.c | 75 ++++++++++++++++++++++++----------
2 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index d56ff2185168..42494176434a 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -184,6 +184,7 @@ struct io_ev_fd {
struct eventfd_ctx *cq_ev_fd;
unsigned int eventfd_async: 1;
struct rcu_head rcu;
+ atomic_t refs;
};
struct io_alloc_cache {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2eb9ad9edef3..65ea8ccd63e8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -478,33 +478,33 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
}
}
+
+static inline void __io_eventfd_put(struct io_ev_fd *ev_fd)
+{
+ if (atomic_dec_and_test(&ev_fd->refs)) {
+ eventfd_ctx_put(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+ }
+}
+
+static void io_eventfd_signal_put(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_signal(ev_fd->cq_ev_fd, 1);
+ __io_eventfd_put(ev_fd);
+}
+
static void io_eventfd_put(struct rcu_head *rcu)
{
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
- eventfd_ctx_put(ev_fd->cq_ev_fd);
- kfree(ev_fd);
+ __io_eventfd_put(ev_fd);
}
static void io_eventfd_signal(struct io_ring_ctx *ctx)
{
- struct io_ev_fd *ev_fd;
- bool skip;
-
- spin_lock(&ctx->completion_lock);
- /*
- * Eventfd should only get triggered when at least one event has been
- * posted. Some applications rely on the eventfd notification count only
- * changing IFF a new CQE has been added to the CQ ring. There's no
- * depedency on 1:1 relationship between how many times this function is
- * called (and hence the eventfd count) and number of CQEs posted to the
- * CQ ring.
- */
- skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- if (skip)
- return;
+ struct io_ev_fd *ev_fd = NULL;
rcu_read_lock();
/*
@@ -522,13 +522,43 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
goto out;
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
goto out;
+ if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+ goto out;
- if (!ev_fd->eventfd_async || io_wq_current_is_worker())
+ if (likely(eventfd_signal_allowed())) {
eventfd_signal(ev_fd->cq_ev_fd, 1);
+ } else {
+ atomic_inc(&ev_fd->refs);
+ call_rcu(&ev_fd->rcu, io_eventfd_signal_put);
+ }
+
out:
rcu_read_unlock();
}
+static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
+{
+ bool skip;
+
+ spin_lock(&ctx->completion_lock);
+
+ /*
+ * Eventfd should only get triggered when at least one event has been
+ * posted. Some applications rely on the eventfd notification count
+ * only changing IFF a new CQE has been added to the CQ ring. There's
+ * no depedency on 1:1 relationship between how many times this
+ * function is called (and hence the eventfd count) and number of CQEs
+ * posted to the CQ ring.
+ */
+ skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+ if (skip)
+ return;
+
+ io_eventfd_signal(ctx);
+}
+
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
if (ctx->off_timeout_used || ctx->drain_active) {
@@ -540,7 +570,7 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
spin_unlock(&ctx->completion_lock);
}
if (ctx->has_evfd)
- io_eventfd_signal(ctx);
+ io_eventfd_flush_signal(ctx);
}
static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
@@ -1066,6 +1096,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+ if (ctx->has_evfd)
+ io_eventfd_signal(ctx);
io_cqring_wake(ctx);
}
@@ -2443,6 +2475,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
ev_fd->eventfd_async = eventfd_async;
ctx->has_evfd = true;
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
+ atomic_set(&ev_fd->refs, 1);
return 0;
}
--
2.30.2
prev parent reply other threads:[~2022-08-16 15:39 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-16 15:37 [PATCH for-next v2 0/6] io_uring: defer task work to when it is needed Dylan Yudaken
2022-08-16 15:37 ` [PATCH for-next v2 1/6] io_uring: remove unnecessary variable Dylan Yudaken
2022-08-16 15:37 ` [PATCH for-next v2 2/6] io_uring: introduce io_has_work Dylan Yudaken
2022-08-16 15:37 ` [PATCH for-next v2 3/6] io_uring: do not run task work at the start of io_uring_enter Dylan Yudaken
2022-08-16 15:37 ` [PATCH for-next v2 4/6] io_uring: add IORING_SETUP_DEFER_TASKRUN Dylan Yudaken
2022-08-18 10:42 ` Pavel Begunkov
2022-08-18 14:18 ` Dylan Yudaken
2022-08-16 15:37 ` [PATCH for-next v2 5/6] io_uring: move io_eventfd_put Dylan Yudaken
2022-08-16 15:37 ` Dylan Yudaken [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox