From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org, Pavel Begunkov <asml.silence@gmail.com>,
Caleb Sander Mateos <csander@purestorage.com>,
Stefan Metzmacher <metze@samba.org>,
Ming Lei <ming.lei@redhat.com>
Subject: [PATCH V2 06/13] io_uring: bpf: implement struct_ops registration
Date: Tue, 6 Jan 2026 18:11:15 +0800 [thread overview]
Message-ID: <20260106101126.4064990-7-ming.lei@redhat.com> (raw)
In-Reply-To: <20260106101126.4064990-1-ming.lei@redhat.com>
Complete the BPF struct_ops registration mechanism by implementing
refcount-based lifecycle management:
- Add refcount field to struct uring_bpf_ops_kern for tracking active
requests
- Add wait_queue_head_t bpf_wq to struct io_ring_ctx for synchronizing
unregistration with in-flight requests
- Implement io_bpf_reg_unreg() to handle registration (refcount=1) and
unregistration (wait for in-flight requests to complete)
- Update io_uring_bpf_prep() to increment refcount on success and reject
new requests when refcount is zero (unregistration in progress)
- Update io_uring_bpf_cleanup() to decrement refcount and wake up waiters
when it reaches zero
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
include/linux/io_uring_types.h | 2 +
io_uring/bpf_op.c | 104 ++++++++++++++++++++++++++++++++-
io_uring/bpf_op.h | 3 +
3 files changed, 106 insertions(+), 3 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 62ff38b3ce1e..b8eb9d8ba4ce 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -474,6 +474,8 @@ struct io_ring_ctx {
struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
+
+ wait_queue_head_t bpf_wq;
};
/*
diff --git a/io_uring/bpf_op.c b/io_uring/bpf_op.c
index f616416652e9..d6f146abe304 100644
--- a/io_uring/bpf_op.c
+++ b/io_uring/bpf_op.c
@@ -12,6 +12,7 @@
#include <linux/filter.h>
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
+#include "register.h"
#include "bpf_op.h"
static inline unsigned char uring_bpf_get_op(u32 op_flags)
@@ -29,7 +30,9 @@ int io_uring_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
u32 opf = READ_ONCE(sqe->bpf_op_flags);
unsigned char bpf_op = uring_bpf_get_op(opf);
+ struct uring_bpf_ops_kern *ops_kern;
const struct uring_bpf_ops *ops;
+ int ret;
if (unlikely(!(req->ctx->flags & IORING_SETUP_BPF_OP)))
goto fail;
@@ -37,11 +40,20 @@ int io_uring_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (bpf_op >= IO_RING_MAX_BPF_OPS)
return -EINVAL;
- ops = req->ctx->bpf_ops[bpf_op].ops;
+ ops_kern = &req->ctx->bpf_ops[bpf_op];
+ ops = ops_kern->ops;
+ if (!ops || !ops->prep_fn || !ops_kern->refcount)
+ goto fail;
+
data->opf = opf;
data->ops = ops;
- if (ops && ops->prep_fn)
- return ops->prep_fn(data, sqe);
+ ret = ops->prep_fn(data, sqe);
+ if (!ret) {
+ /* Only increment refcount on success (uring_lock already held) */
+ req->flags |= REQ_F_NEED_CLEANUP;
+ ops_kern->refcount++;
+ }
+ return ret;
fail:
return -EOPNOTSUPP;
}
@@ -78,9 +90,18 @@ void io_uring_bpf_cleanup(struct io_kiocb *req)
{
struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
const struct uring_bpf_ops *ops = data->ops;
+ struct uring_bpf_ops_kern *ops_kern;
+ unsigned char bpf_op;
if (ops && ops->cleanup_fn)
ops->cleanup_fn(data);
+
+ bpf_op = uring_bpf_get_op(data->opf);
+ ops_kern = &req->ctx->bpf_ops[bpf_op];
+
+ /* Decrement refcount after cleanup (uring_lock already held) */
+ if (--ops_kern->refcount == 0)
+ wake_up(&req->ctx->bpf_wq);
}
static const struct btf_type *uring_bpf_data_type;
@@ -157,10 +178,82 @@ static int uring_bpf_ops_init_member(const struct btf_type *t,
*/
kuring_bpf_ops->id = uuring_bpf_ops->id;
return 1;
+ case offsetof(struct uring_bpf_ops, ring_fd):
+ kuring_bpf_ops->ring_fd = uuring_bpf_ops->ring_fd;
+ return 1;
}
return 0;
}
+static int io_bpf_reg_unreg(struct uring_bpf_ops *ops, bool reg)
+{
+ struct uring_bpf_ops_kern *ops_kern;
+ struct io_ring_ctx *ctx;
+ struct file *file;
+ int ret = -EINVAL;
+
+ if (ops->id >= IO_RING_MAX_BPF_OPS)
+ return -EINVAL;
+
+ file = io_uring_register_get_file(ops->ring_fd, false);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = file->private_data;
+ if (!(ctx->flags & IORING_SETUP_BPF_OP))
+ goto out;
+
+ ops_kern = &ctx->bpf_ops[ops->id];
+
+ mutex_lock(&ctx->uring_lock);
+ if (reg) {
+ /* Registration: set refcount to 1 and store ops */
+ if (ops_kern->ops) {
+ ret = -EBUSY;
+ } else {
+ ops_kern->ops = ops;
+ ops_kern->refcount = 1;
+ ret = 0;
+ }
+ } else {
+ /* Unregistration */
+ if (!ops_kern->ops) {
+ ret = -EINVAL;
+ } else {
+ ops_kern->refcount--;
+retry:
+ if (ops_kern->refcount == 0) {
+ ops_kern->ops = NULL;
+ ret = 0;
+ } else {
+ mutex_unlock(&ctx->uring_lock);
+ wait_event(ctx->bpf_wq, ops_kern->refcount == 0);
+ mutex_lock(&ctx->uring_lock);
+ goto retry;
+ }
+ }
+ }
+ mutex_unlock(&ctx->uring_lock);
+
+out:
+ fput(file);
+ return ret;
+}
+
+static int io_bpf_reg(void *kdata, struct bpf_link *link)
+{
+ struct uring_bpf_ops *ops = kdata;
+
+ return io_bpf_reg_unreg(ops, true);
+}
+
+static void io_bpf_unreg(void *kdata, struct bpf_link *link)
+{
+ struct uring_bpf_ops *ops = kdata;
+
+ io_bpf_reg_unreg(ops, false);
+}
+
static int io_bpf_prep_io(struct uring_bpf_data *data, const struct io_uring_sqe *sqe)
{
return 0;
@@ -191,6 +284,8 @@ static struct bpf_struct_ops bpf_uring_bpf_ops = {
.init = uring_bpf_ops_init,
.check_member = uring_bpf_ops_check_member,
.init_member = uring_bpf_ops_init_member,
+ .reg = io_bpf_reg,
+ .unreg = io_bpf_unreg,
.name = "uring_bpf_ops",
.cfi_stubs = &__bpf_uring_bpf_ops,
.owner = THIS_MODULE,
@@ -218,6 +313,8 @@ static const struct btf_kfunc_id_set uring_kfunc_set = {
int io_bpf_alloc(struct io_ring_ctx *ctx)
{
+ init_waitqueue_head(&ctx->bpf_wq);
+
if (!(ctx->flags & IORING_SETUP_BPF_OP))
return 0;
@@ -225,6 +322,7 @@ int io_bpf_alloc(struct io_ring_ctx *ctx)
sizeof(struct uring_bpf_ops_kern), GFP_KERNEL);
if (!ctx->bpf_ops)
return -ENOMEM;
+
return 0;
}
diff --git a/io_uring/bpf_op.h b/io_uring/bpf_op.h
index 99708140992f..9de0606f5d25 100644
--- a/io_uring/bpf_op.h
+++ b/io_uring/bpf_op.h
@@ -27,14 +27,17 @@ typedef void (*uring_bpf_cleanup_t)(struct uring_bpf_data *data);
struct uring_bpf_ops {
unsigned short id;
+ int ring_fd;
uring_bpf_prep_t prep_fn;
uring_bpf_issue_t issue_fn;
uring_bpf_fail_t fail_fn;
uring_bpf_cleanup_t cleanup_fn;
};
+/* TODO: manage it via `io_rsrc_node` */
struct uring_bpf_ops_kern {
const struct uring_bpf_ops *ops;
+ int refcount;
};
#ifdef CONFIG_IO_URING_BPF_OP
--
2.47.0
next prev parent reply other threads:[~2026-01-06 10:12 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-06 10:11 [PATCH v2 0/13] io_uring: add IORING_OP_BPF for extending io_uring Ming Lei
2026-01-06 10:11 ` [PATCH V2 01/13] io_uring: make io_import_fixed() global Ming Lei
2026-01-06 10:11 ` [PATCH V2 02/13] io_uring: refactor io_prep_reg_iovec() for BPF kfunc use Ming Lei
2026-01-06 10:11 ` [PATCH V2 03/13] io_uring: refactor io_import_reg_vec() " Ming Lei
2026-01-06 10:11 ` [PATCH V2 04/13] io_uring: prepare for extending io_uring with bpf Ming Lei
2026-01-06 10:11 ` [PATCH V2 05/13] io_uring: bpf: extend io_uring with bpf struct_ops Ming Lei
2026-01-06 10:11 ` Ming Lei [this message]
2026-01-06 10:11 ` [PATCH V2 07/13] io_uring: bpf: add BPF buffer descriptor for IORING_OP_BPF Ming Lei
2026-01-06 10:11 ` [PATCH V2 08/13] io_uring: bpf: add uring_bpf_memcpy() kfunc Ming Lei
2026-01-06 10:11 ` [PATCH V2 09/13] selftests/io_uring: update mini liburing Ming Lei
2026-01-06 10:11 ` [PATCH V2 10/13] selftests/io_uring: add BPF struct_ops and kfunc tests Ming Lei
2026-01-06 10:11 ` [PATCH V2 11/13] selftests/io_uring: add bpf_memcpy selftest for uring_bpf_memcpy() kfunc Ming Lei
2026-01-06 10:11 ` [PATCH V2 12/13] selftests/io_uring: add copy_user_to_fixed() and copy_fixed_to_user() bpf_memcpy tests Ming Lei
2026-01-06 10:11 ` [PATCH V2 13/13] selftests/io_uring: add copy_user_to_reg_vec() and copy_reg_vec_to_user() " Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260106101126.4064990-7-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=io-uring@vger.kernel.org \
--cc=metze@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox