From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com, axboe@kernel.dk,
Martin KaFai Lau <martin.lau@linux.dev>,
bpf@vger.kernel.org,
Alexei Starovoitov <alexei.starovoitov@gmail.com>,
Andrii Nakryiko <andrii@kernel.org>,
ming.lei@redhat.com
Subject: [PATCH v3 06/10] io_uring/bpf: add handle events callback
Date: Thu, 13 Nov 2025 11:59:43 +0000 [thread overview]
Message-ID: <5bcb51a2d391e7dfb732c5cdbd152dfbd021b51b.1763031077.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1763031077.git.asml.silence@gmail.com>
Add a struct_ops callback called handle_events, which will be called
off the CQ waiting loop every time there is an event that might be
interesting to the program. The program takes the io_uring ctx and also
a loop state, which it can use to set the number of events it wants to
wait for as well as the timeout value.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
io_uring/bpf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
io_uring/bpf.h | 26 ++++++++++++++++++
io_uring/io_uring.c | 15 ++++++++++-
3 files changed, 104 insertions(+), 1 deletion(-)
diff --git a/io_uring/bpf.c b/io_uring/bpf.c
index 4cb5d25c9247..24dd2fe9134f 100644
--- a/io_uring/bpf.c
+++ b/io_uring/bpf.c
@@ -1,9 +1,18 @@
#include <linux/mutex.h>
+#include <linux/bpf_verifier.h>
#include "bpf.h"
#include "register.h"
+static const struct btf_type *loop_state_type;
+
+static int io_bpf_ops__loop(struct io_ring_ctx *ctx, struct iou_loop_state *ls)
+{
+ return IOU_RES_STOP;
+}
+
static struct io_uring_ops io_bpf_ops_stubs = {
+ .loop = io_bpf_ops__loop,
};
static bool bpf_io_is_valid_access(int off, int size,
@@ -25,6 +34,17 @@ static int bpf_io_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg, int off,
int size)
{
+ const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
+
+ if (t == loop_state_type) {
+ if (off >= offsetof(struct iou_loop_state, cq_tail) &&
+ off + size <= offsetofend(struct iou_loop_state, cq_tail))
+ return SCALAR_VALUE;
+ if (off >= offsetof(struct iou_loop_state, timeout) &&
+ off + size <= offsetofend(struct iou_loop_state, timeout))
+ return SCALAR_VALUE;
+ }
+
return -EACCES;
}
@@ -34,8 +54,25 @@ static const struct bpf_verifier_ops bpf_io_verifier_ops = {
.btf_struct_access = bpf_io_btf_struct_access,
};
+static const struct btf_type *
+io_lookup_struct_type(struct btf *btf, const char *name)
+{
+ s32 type_id;
+
+ type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return NULL;
+ return btf_type_by_id(btf, type_id);
+}
+
static int bpf_io_init(struct btf *btf)
{
+ loop_state_type = io_lookup_struct_type(btf, "iou_loop_state");
+ if (!loop_state_type) {
+ pr_err("io_uring: Failed to locate iou_loop_state\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -91,3 +128,30 @@ static int __init io_uring_bpf_init(void)
return 0;
}
__initcall(io_uring_bpf_init);
+
+int io_run_cqwait_ops(struct io_ring_ctx *ctx, struct iou_loop_state *ls)
+{
+ int ret;
+
+ io_run_task_work();
+
+ guard(mutex)(&ctx->uring_lock);
+ if (unlikely(!ctx->bpf_ops))
+ return 1;
+
+ if (unlikely(task_sigpending(current)))
+ return -EINTR;
+
+ ret = ctx->bpf_ops->loop(ctx, ls);
+ if (ret == IOU_RES_STOP)
+ return 0;
+
+
+ if (io_local_work_pending(ctx)) {
+ unsigned nr_wait = ls->cq_tail - READ_ONCE(ctx->rings->cq.tail);
+ struct io_tw_state ts = {};
+
+ __io_run_local_work(ctx, ts, nr_wait, nr_wait);
+ }
+ return 1;
+}
diff --git a/io_uring/bpf.h b/io_uring/bpf.h
index 34a51a57103d..0b7246c4f05b 100644
--- a/io_uring/bpf.h
+++ b/io_uring/bpf.h
@@ -7,15 +7,41 @@
#include "io_uring.h"
+enum {
+ IOU_RES_WAIT,
+ IOU_RES_STOP,
+};
+
struct io_uring_ops {
+ int (*loop)(struct io_ring_ctx *ctx, struct iou_loop_state *ls);
+
+ __u32 ring_fd;
+ void *priv;
};
+static inline bool io_bpf_attached(struct io_ring_ctx *ctx)
+{
+ return IS_ENABLED(CONFIG_IO_URING_BPF) && ctx->bpf_ops != NULL;
+}
+
+static inline bool io_has_cqwait_ops(struct io_ring_ctx *ctx)
+{
+ return io_bpf_attached(ctx);
+}
+
+
#ifdef CONFIG_IO_URING_BPF
void io_unregister_bpf(struct io_ring_ctx *ctx);
+int io_run_cqwait_ops(struct io_ring_ctx *ctx, struct iou_loop_state *ls);
#else
static inline void io_unregister_bpf(struct io_ring_ctx *ctx)
{
}
+static inline int io_run_cqwait_ops(struct io_ring_ctx *ctx,
+ struct iou_loop_state *ls)
+{
+ return IOU_RES_STOP;
+}
#endif
#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5b80987ebb2c..1d5e3dd6c608 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2633,6 +2633,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
ktime_t start_time;
int ret;
+
min_events = min_t(int, min_events, ctx->cq_entries);
if (!io_allowed_run_tw(ctx))
@@ -2644,8 +2645,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)))
io_cqring_do_overflow_flush(ctx);
- if (__io_cqring_events_user(ctx) >= min_events)
+
+ if (io_has_cqwait_ops(ctx)) {
+ if (ext_arg->min_time)
+ return -EINVAL;
+ } else if (__io_cqring_events_user(ctx) >= min_events) {
return 0;
+ }
init_waitqueue_func_entry(&iowq.wqe, io_wake_function);
iowq.wqe.private = current;
@@ -2706,6 +2712,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
__set_current_state(TASK_RUNNING);
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
+ if (io_has_cqwait_ops(ctx)) {
+ ret = io_run_cqwait_ops(ctx, &iowq.ls);
+ if (ret <= 0)
+ break;
+ continue;
+ }
+
/*
* Run task_work after scheduling and before io_should_wake().
* If we got woken because of task_work being processed, run it
--
2.49.0
next prev parent reply other threads:[~2025-11-13 12:00 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-13 11:59 [PATCH v3 00/10] BPF controlled io_uring Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 01/10] io_uring: rename the wait queue entry field Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 02/10] io_uring: simplify io_cqring_wait_schedule results Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 03/10] io_uring: export __io_run_local_work Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 04/10] io_uring: extract waiting parameters into a struct Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 05/10] io_uring/bpf: add stubs for bpf struct_ops Pavel Begunkov
2025-11-13 11:59 ` Pavel Begunkov [this message]
2025-11-13 11:59 ` [PATCH v3 07/10] io_uring/bpf: implement struct_ops registration Pavel Begunkov
2025-11-24 3:44 ` Ming Lei
2025-11-24 13:12 ` Pavel Begunkov
2025-11-24 14:29 ` Ming Lei
2025-11-25 12:46 ` Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 08/10] io_uring/bpf: add basic kfunc helpers Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 09/10] selftests/io_uring: update mini liburing Pavel Begunkov
2025-11-13 11:59 ` [PATCH v3 10/10] selftests/io_uring: add bpf io_uring selftests Pavel Begunkov
2025-11-14 13:08 ` Ming Lei
2025-11-19 19:00 ` Pavel Begunkov
2025-11-20 1:41 ` Ming Lei
2025-11-21 16:12 ` Pavel Begunkov
2025-11-22 0:19 ` Ming Lei
2025-11-24 11:57 ` Pavel Begunkov
2025-11-24 13:28 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5bcb51a2d391e7dfb732c5cdbd152dfbd021b51b.1763031077.git.asml.silence@gmail.com \
--to=asml.silence@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=axboe@kernel.dk \
--cc=bpf@vger.kernel.org \
--cc=io-uring@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=ming.lei@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox