From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [RFC 2/3] io_uring/bpf: allow to register and run BPF programs
Date: Mon, 11 Nov 2024 01:50:45 +0000 [thread overview]
Message-ID: <cffec449e9f6a37b0701f2a8fdd37688db25be55.1731285516.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Let the user to register a BPF_PROG_TYPE_IOURING BPF program to a ring.
The progrma will be run in the waiting loop every time something
happens, i.e. the task was woken up by a task_work / signal / etc.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 4 +++
include/uapi/linux/io_uring.h | 9 +++++
io_uring/bpf.c | 63 ++++++++++++++++++++++++++++++++++
io_uring/bpf.h | 41 ++++++++++++++++++++++
io_uring/io_uring.c | 15 ++++++++
io_uring/register.c | 7 ++++
6 files changed, 139 insertions(+)
create mode 100644 io_uring/bpf.h
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index ad5001102c86..50cee0d3622e 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -8,6 +8,8 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
+struct io_bpf_ctx;
+
enum {
/*
* A hint to not wake right away but delay until there are enough of
@@ -246,6 +248,8 @@ struct io_ring_ctx {
enum task_work_notify_mode notify_method;
unsigned sq_thread_idle;
+
+ struct io_bpf_ctx *bpf_ctx;
} ____cacheline_aligned_in_smp;
/* submission data */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ba373deb8406..f2c2fefc8514 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -634,6 +634,8 @@ enum io_uring_register_op {
/* register fixed io_uring_reg_wait arguments */
IORING_REGISTER_CQWAIT_REG = 34,
+ IORING_REGISTER_BPF = 35,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -905,6 +907,13 @@ enum io_uring_socket_op {
SOCKET_URING_OP_SETSOCKOPT,
};
+struct io_uring_bpf_reg {
+ __u64 prog_fd;
+ __u32 flags;
+ __u32 resv1;
+ __u64 resv2[2];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/io_uring/bpf.c b/io_uring/bpf.c
index 6eb0c47b4aa9..8b7c74761c63 100644
--- a/io_uring/bpf.c
+++ b/io_uring/bpf.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
+#include <linux/filter.h>
+
+#include "bpf.h"
static const struct bpf_func_proto *
io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -22,3 +25,63 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
.get_func_proto = io_bpf_func_proto,
.is_valid_access = io_bpf_is_valid_access,
};
+
+int io_run_bpf(struct io_ring_ctx *ctx)
+{
+ struct io_bpf_ctx *bc = ctx->bpf_ctx;
+ int ret;
+
+ mutex_lock(&ctx->uring_lock);
+ ret = bpf_prog_run_pin_on_cpu(bc->prog, bc);
+ mutex_unlock(&ctx->uring_lock);
+ return ret;
+}
+
+int io_unregister_bpf(struct io_ring_ctx *ctx)
+{
+ struct io_bpf_ctx *bc = ctx->bpf_ctx;
+
+ if (!bc)
+ return -ENXIO;
+ bpf_prog_put(bc->prog);
+ kfree(bc);
+ ctx->bpf_ctx = NULL;
+ return 0;
+}
+
+int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int nr_args)
+{
+ struct __user io_uring_bpf_reg *bpf_reg_usr = arg;
+ struct io_uring_bpf_reg bpf_reg;
+ struct io_bpf_ctx *bc;
+ struct bpf_prog *prog;
+
+ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+ return -EOPNOTSUPP;
+
+ if (nr_args != 1)
+ return -EINVAL;
+ if (copy_from_user(&bpf_reg, bpf_reg_usr, sizeof(bpf_reg)))
+ return -EFAULT;
+ if (bpf_reg.flags || bpf_reg.resv1 ||
+ bpf_reg.resv2[0] || bpf_reg.resv2[1])
+ return -EINVAL;
+
+ if (ctx->bpf_ctx)
+ return -ENXIO;
+
+ bc = kzalloc(sizeof(*bc), GFP_KERNEL);
+ if (!bc)
+ return -ENOMEM;
+
+ prog = bpf_prog_get_type(bpf_reg.prog_fd, BPF_PROG_TYPE_IOURING);
+ if (IS_ERR(prog)) {
+ kfree(bc);
+ return PTR_ERR(prog);
+ }
+
+ bc->prog = prog;
+ ctx->bpf_ctx = bc;
+ return 0;
+}
diff --git a/io_uring/bpf.h b/io_uring/bpf.h
new file mode 100644
index 000000000000..2b4e555ff07a
--- /dev/null
+++ b/io_uring/bpf.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef IOU_BPF_H
+#define IOU_BPF_H
+
+#include <linux/io_uring/bpf.h>
+#include <linux/io_uring_types.h>
+
+struct bpf_prog;
+
+struct io_bpf_ctx {
+ struct io_bpf_ctx_kern kern;
+ struct bpf_prog *prog;
+};
+
+static inline bool io_bpf_enabled(struct io_ring_ctx *ctx)
+{
+ return IS_ENABLED(CONFIG_BPF) && ctx->bpf_ctx != NULL;
+}
+
+#ifdef CONFIG_BPF
+int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int nr_args);
+int io_unregister_bpf(struct io_ring_ctx *ctx);
+int io_run_bpf(struct io_ring_ctx *ctx);
+
+#else
+static inline int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned int nr_args)
+{
+ return -EOPNOTSUPP;
+}
+static inline int io_unregister_bpf(struct io_ring_ctx *ctx)
+{
+ return -EOPNOTSUPP;
+}
+static inline int io_run_bpf(struct io_ring_ctx *ctx)
+{
+}
+#endif
+
+#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f34fa1ead2cf..82599e2a888a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -104,6 +104,7 @@
#include "rw.h"
#include "alloc_cache.h"
#include "eventfd.h"
+#include "bpf.h"
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
IOSQE_IO_HARDLINK | IOSQE_ASYNC)
@@ -2834,6 +2835,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
io_napi_busy_loop(ctx, &iowq);
+ if (io_bpf_enabled(ctx)) {
+ ret = io_run_bpf(ctx);
+ if (ret == IOU_BPF_RET_STOP)
+ return 0;
+ }
+
trace_io_uring_cqring_wait(ctx, min_events);
do {
unsigned long check_cq;
@@ -2879,6 +2886,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
if (ret < 0)
break;
+ if (io_bpf_enabled(ctx)) {
+ ret = io_run_bpf(ctx);
+ if (ret == IOU_BPF_RET_STOP)
+ break;
+ continue;
+ }
+
check_cq = READ_ONCE(ctx->check_cq);
if (unlikely(check_cq)) {
/* let the caller flush overflows, retry */
@@ -3009,6 +3023,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_futex_cache_free(ctx);
io_destroy_buffers(ctx);
io_unregister_cqwait_reg(ctx);
+ io_unregister_bpf(ctx);
mutex_unlock(&ctx->uring_lock);
if (ctx->sq_creds)
put_cred(ctx->sq_creds);
diff --git a/io_uring/register.c b/io_uring/register.c
index 45edfc57963a..2a8efeacf2db 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -30,6 +30,7 @@
#include "eventfd.h"
#include "msg_ring.h"
#include "memmap.h"
+#include "bpf.h"
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -846,6 +847,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_cqwait_reg(ctx, arg);
break;
+ case IORING_REGISTER_BPF:
+ ret = -EINVAL;
+ if (!arg)
+ break;
+ ret = io_register_bpf(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;
--
2.46.0
next prev parent reply other threads:[~2024-11-11 1:50 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-11 1:50 [RFC 0/3] Add BPF for io_uring Pavel Begunkov
2024-11-11 1:50 ` [RFC 1/3] bpf/io_uring: add io_uring program type Pavel Begunkov
2024-11-11 1:50 ` Pavel Begunkov [this message]
2024-11-13 8:21 ` [RFC 2/3] io_uring/bpf: allow to register and run BPF programs Ming Lei
2024-11-13 13:09 ` Pavel Begunkov
2024-11-11 1:50 ` [RFC 3/3] io_uring/bpf: add kfuncs for " Pavel Begunkov
2024-11-13 8:13 ` [RFC 0/3] Add BPF for io_uring Ming Lei
2024-11-13 13:09 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cffec449e9f6a37b0701f2a8fdd37688db25be55.1731285516.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox