From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 2/2] io_uring/eventfd: move eventfd handling to separate file
Date: Mon, 3 Jun 2024 12:03:18 -0600 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
This is pretty nicely abstracted already, but let's move it to a separate
file rather than have it in the main io_uring file. With that, we can
also move the io_ev_fd struct and enum out of global scope.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring_types.h | 8 --
io_uring/Makefile | 6 +-
io_uring/eventfd.c | 160 +++++++++++++++++++++++++++++++++
io_uring/eventfd.h | 8 ++
io_uring/io_uring.c | 82 +----------------
io_uring/io_uring.h | 6 --
io_uring/register.c | 56 +-----------
7 files changed, 173 insertions(+), 153 deletions(-)
create mode 100644 io_uring/eventfd.c
create mode 100644 io_uring/eventfd.h
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 91224bbcfa73..a2227ab7fd16 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,14 +211,6 @@ struct io_submit_state {
struct blk_plug plug;
};
-struct io_ev_fd {
- struct eventfd_ctx *cq_ev_fd;
- unsigned int eventfd_async: 1;
- struct rcu_head rcu;
- atomic_t refs;
- atomic_t ops;
-};
-
struct io_alloc_cache {
void **entries;
unsigned int nr_cached;
diff --git a/io_uring/Makefile b/io_uring/Makefile
index fc1b23c524e8..61923e11c767 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -4,9 +4,9 @@
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
tctx.o filetable.o rw.o net.o poll.o \
- uring_cmd.o openclose.o sqpoll.o \
- xattr.o nop.o fs.o splice.o sync.o \
- msg_ring.o advise.o openclose.o \
+ eventfd.o uring_cmd.o openclose.o \
+ sqpoll.o xattr.o nop.o fs.o splice.o \
+ sync.o msg_ring.o advise.o openclose.o \
epoll.o statx.o timeout.o fdinfo.o \
cancel.o waitid.o register.o \
truncate.o memmap.o
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
new file mode 100644
index 000000000000..b9384503a2b7
--- /dev/null
+++ b/io_uring/eventfd.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/eventfd.h>
+#include <linux/eventpoll.h>
+#include <linux/io_uring.h>
+#include <linux/io_uring_types.h>
+
+#include "io-wq.h"
+#include "eventfd.h"
+
+struct io_ev_fd {
+ struct eventfd_ctx *cq_ev_fd;
+ unsigned int eventfd_async: 1;
+ struct rcu_head rcu;
+ atomic_t refs;
+ atomic_t ops;
+};
+
+enum {
+ IO_EVENTFD_OP_SIGNAL_BIT,
+};
+
+static void io_eventfd_free(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_ctx_put(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+}
+
+static void io_eventfd_do_signal(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+
+ if (atomic_dec_and_test(&ev_fd->refs))
+ io_eventfd_free(rcu);
+}
+
+void io_eventfd_signal(struct io_ring_ctx *ctx)
+{
+ struct io_ev_fd *ev_fd = NULL;
+
+ if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+ return;
+
+ guard(rcu)();
+
+ /*
+ * rcu_dereference ctx->io_ev_fd once and use it for both for checking
+ * and eventfd_signal
+ */
+ ev_fd = rcu_dereference(ctx->io_ev_fd);
+
+ /*
+ * Check again if ev_fd exists incase an io_eventfd_unregister call
+ * completed between the NULL check of ctx->io_ev_fd at the start of
+ * the function and rcu_read_lock.
+ */
+ if (unlikely(!ev_fd))
+ return;
+ if (!atomic_inc_not_zero(&ev_fd->refs))
+ return;
+ if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+ goto out;
+
+ if (likely(eventfd_signal_allowed())) {
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ } else {
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+ call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+ return;
+ }
+ }
+out:
+ if (atomic_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
+void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
+{
+ bool skip;
+
+ spin_lock(&ctx->completion_lock);
+
+ /*
+ * Eventfd should only get triggered when at least one event has been
+ * posted. Some applications rely on the eventfd notification count
+ * only changing IFF a new CQE has been added to the CQ ring. There's
+ * no depedency on 1:1 relationship between how many times this
+ * function is called (and hence the eventfd count) and number of CQEs
+ * posted to the CQ ring.
+ */
+ skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+ if (skip)
+ return;
+
+ io_eventfd_signal(ctx);
+}
+
+int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int eventfd_async)
+{
+ struct io_ev_fd *ev_fd;
+ __s32 __user *fds = arg;
+ int fd;
+
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd)
+ return -EBUSY;
+
+ if (copy_from_user(&fd, fds, sizeof(*fds)))
+ return -EFAULT;
+
+ ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
+ if (!ev_fd)
+ return -ENOMEM;
+
+ ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
+ if (IS_ERR(ev_fd->cq_ev_fd)) {
+ int ret = PTR_ERR(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+ return ret;
+ }
+
+ spin_lock(&ctx->completion_lock);
+ ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
+
+ ev_fd->eventfd_async = eventfd_async;
+ ctx->has_evfd = true;
+ atomic_set(&ev_fd->refs, 1);
+ atomic_set(&ev_fd->ops, 0);
+ rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
+ return 0;
+}
+
+int io_eventfd_unregister(struct io_ring_ctx *ctx)
+{
+ struct io_ev_fd *ev_fd;
+
+ ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+ lockdep_is_held(&ctx->uring_lock));
+ if (ev_fd) {
+ ctx->has_evfd = false;
+ rcu_assign_pointer(ctx->io_ev_fd, NULL);
+ if (atomic_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
+ return 0;
+ }
+
+ return -ENXIO;
+}
diff --git a/io_uring/eventfd.h b/io_uring/eventfd.h
new file mode 100644
index 000000000000..d394f49c6321
--- /dev/null
+++ b/io_uring/eventfd.h
@@ -0,0 +1,8 @@
+
+struct io_ring_ctx;
+int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int eventfd_async);
+int io_eventfd_unregister(struct io_ring_ctx *ctx);
+
+void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
+void io_eventfd_signal(struct io_ring_ctx *ctx);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index b874836ee49d..96f6da0bf5cd 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -101,6 +101,7 @@
#include "poll.h"
#include "rw.h"
#include "alloc_cache.h"
+#include "eventfd.h"
#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
@@ -541,87 +542,6 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
}
}
-void io_eventfd_free(struct rcu_head *rcu)
-{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
- eventfd_ctx_put(ev_fd->cq_ev_fd);
- kfree(ev_fd);
-}
-
-void io_eventfd_do_signal(struct rcu_head *rcu)
-{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-
- if (atomic_dec_and_test(&ev_fd->refs))
- io_eventfd_free(rcu);
-}
-
-static void io_eventfd_signal(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd = NULL;
-
- if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- return;
-
- guard(rcu)();
-
- /*
- * rcu_dereference ctx->io_ev_fd once and use it for both for checking
- * and eventfd_signal
- */
- ev_fd = rcu_dereference(ctx->io_ev_fd);
-
- /*
- * Check again if ev_fd exists incase an io_eventfd_unregister call
- * completed between the NULL check of ctx->io_ev_fd at the start of
- * the function and rcu_read_lock.
- */
- if (unlikely(!ev_fd))
- return;
- if (!atomic_inc_not_zero(&ev_fd->refs))
- return;
- if (ev_fd->eventfd_async && !io_wq_current_is_worker())
- goto out;
-
- if (likely(eventfd_signal_allowed())) {
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
- } else {
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
- call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
- return;
- }
- }
-out:
- if (atomic_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
-}
-
-static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-{
- bool skip;
-
- spin_lock(&ctx->completion_lock);
-
- /*
- * Eventfd should only get triggered when at least one event has been
- * posted. Some applications rely on the eventfd notification count
- * only changing IFF a new CQE has been added to the CQ ring. There's
- * no depedency on 1:1 relationship between how many times this
- * function is called (and hence the eventfd count) and number of CQEs
- * posted to the CQ ring.
- */
- skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- if (skip)
- return;
-
- io_eventfd_signal(ctx);
-}
-
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
if (ctx->poll_activated)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 804cd55416e9..8518da64ada9 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -104,12 +104,6 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all);
-enum {
- IO_EVENTFD_OP_SIGNAL_BIT,
-};
-
-void io_eventfd_do_signal(struct rcu_head *rcu);
-void io_eventfd_free(struct rcu_head *rcu);
void io_activate_pollwq(struct io_ring_ctx *ctx);
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
diff --git a/io_uring/register.c b/io_uring/register.c
index e1e9d005718e..50e9cbf85f7d 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -27,65 +27,11 @@
#include "cancel.h"
#include "kbuf.h"
#include "napi.h"
+#include "eventfd.h"
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
- unsigned int eventfd_async)
-{
- struct io_ev_fd *ev_fd;
- __s32 __user *fds = arg;
- int fd;
-
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd)
- return -EBUSY;
-
- if (copy_from_user(&fd, fds, sizeof(*fds)))
- return -EFAULT;
-
- ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
- if (!ev_fd)
- return -ENOMEM;
-
- ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
- if (IS_ERR(ev_fd->cq_ev_fd)) {
- int ret = PTR_ERR(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- return ret;
- }
-
- spin_lock(&ctx->completion_lock);
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
-
- ev_fd->eventfd_async = eventfd_async;
- ctx->has_evfd = true;
- atomic_set(&ev_fd->refs, 1);
- atomic_set(&ev_fd->ops, 0);
- rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
- return 0;
-}
-
-int io_eventfd_unregister(struct io_ring_ctx *ctx)
-{
- struct io_ev_fd *ev_fd;
-
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd) {
- ctx->has_evfd = false;
- rcu_assign_pointer(ctx->io_ev_fd, NULL);
- if (atomic_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
- return 0;
- }
-
- return -ENXIO;
-}
-
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
--
2.43.0
prev parent reply other threads:[~2024-06-03 18:04 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-03 18:03 [PATCHSET 0/2] eventfd handling cleanups Jens Axboe
2024-06-03 18:03 ` [PATCH 1/2] io_uring/eventfd: move to more idiomatic RCU free usage Jens Axboe
2024-06-03 18:03 ` Jens Axboe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox