From: Ming Lei <[email protected]>
To: Pavel Begunkov <[email protected]>
Cc: [email protected], [email protected]
Subject: Re: [RFC 2/3] io_uring/bpf: allow to register and run BPF programs
Date: Wed, 13 Nov 2024 16:21:48 +0800 [thread overview]
Message-ID: <ZzRhnDXxkahNB0rx@fedora> (raw)
In-Reply-To: <cffec449e9f6a37b0701f2a8fdd37688db25be55.1731285516.git.asml.silence@gmail.com>
On Mon, Nov 11, 2024 at 01:50:45AM +0000, Pavel Begunkov wrote:
> Let the user to register a BPF_PROG_TYPE_IOURING BPF program to a ring.
> The progrma will be run in the waiting loop every time something
> happens, i.e. the task was woken up by a task_work / signal / etc.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> include/linux/io_uring_types.h | 4 +++
> include/uapi/linux/io_uring.h | 9 +++++
> io_uring/bpf.c | 63 ++++++++++++++++++++++++++++++++++
> io_uring/bpf.h | 41 ++++++++++++++++++++++
> io_uring/io_uring.c | 15 ++++++++
> io_uring/register.c | 7 ++++
> 6 files changed, 139 insertions(+)
> create mode 100644 io_uring/bpf.h
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index ad5001102c86..50cee0d3622e 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -8,6 +8,8 @@
> #include <linux/llist.h>
> #include <uapi/linux/io_uring.h>
>
> +struct io_bpf_ctx;
> +
> enum {
> /*
> * A hint to not wake right away but delay until there are enough of
> @@ -246,6 +248,8 @@ struct io_ring_ctx {
>
> enum task_work_notify_mode notify_method;
> unsigned sq_thread_idle;
> +
> + struct io_bpf_ctx *bpf_ctx;
> } ____cacheline_aligned_in_smp;
>
> /* submission data */
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index ba373deb8406..f2c2fefc8514 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -634,6 +634,8 @@ enum io_uring_register_op {
> /* register fixed io_uring_reg_wait arguments */
> IORING_REGISTER_CQWAIT_REG = 34,
>
> + IORING_REGISTER_BPF = 35,
> +
> /* this goes last */
> IORING_REGISTER_LAST,
>
> @@ -905,6 +907,13 @@ enum io_uring_socket_op {
> SOCKET_URING_OP_SETSOCKOPT,
> };
>
> +struct io_uring_bpf_reg {
> + __u64 prog_fd;
> + __u32 flags;
> + __u32 resv1;
> + __u64 resv2[2];
> +};
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/io_uring/bpf.c b/io_uring/bpf.c
> index 6eb0c47b4aa9..8b7c74761c63 100644
> --- a/io_uring/bpf.c
> +++ b/io_uring/bpf.c
> @@ -1,6 +1,9 @@
> // SPDX-License-Identifier: GPL-2.0
>
> #include <linux/bpf.h>
> +#include <linux/filter.h>
> +
> +#include "bpf.h"
>
> static const struct bpf_func_proto *
> io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> @@ -22,3 +25,63 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
> .get_func_proto = io_bpf_func_proto,
> .is_valid_access = io_bpf_is_valid_access,
> };
> +
> +int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> + struct io_bpf_ctx *bc = ctx->bpf_ctx;
> + int ret;
> +
> + mutex_lock(&ctx->uring_lock);
> + ret = bpf_prog_run_pin_on_cpu(bc->prog, bc);
> + mutex_unlock(&ctx->uring_lock);
> + return ret;
> +}
> +
> +int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> + struct io_bpf_ctx *bc = ctx->bpf_ctx;
> +
> + if (!bc)
> + return -ENXIO;
> + bpf_prog_put(bc->prog);
> + kfree(bc);
> + ctx->bpf_ctx = NULL;
> + return 0;
> +}
> +
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args)
> +{
> + struct __user io_uring_bpf_reg *bpf_reg_usr = arg;
> + struct io_uring_bpf_reg bpf_reg;
> + struct io_bpf_ctx *bc;
> + struct bpf_prog *prog;
> +
> + if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
> + return -EOPNOTSUPP;
> +
> + if (nr_args != 1)
> + return -EINVAL;
> + if (copy_from_user(&bpf_reg, bpf_reg_usr, sizeof(bpf_reg)))
> + return -EFAULT;
> + if (bpf_reg.flags || bpf_reg.resv1 ||
> + bpf_reg.resv2[0] || bpf_reg.resv2[1])
> + return -EINVAL;
> +
> + if (ctx->bpf_ctx)
> + return -ENXIO;
> +
> + bc = kzalloc(sizeof(*bc), GFP_KERNEL);
> + if (!bc)
> + return -ENOMEM;
> +
> + prog = bpf_prog_get_type(bpf_reg.prog_fd, BPF_PROG_TYPE_IOURING);
> + if (IS_ERR(prog)) {
> + kfree(bc);
> + return PTR_ERR(prog);
> + }
> +
> + bc->prog = prog;
> + ctx->bpf_ctx = bc;
> + return 0;
> +}
> diff --git a/io_uring/bpf.h b/io_uring/bpf.h
> new file mode 100644
> index 000000000000..2b4e555ff07a
> --- /dev/null
> +++ b/io_uring/bpf.h
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef IOU_BPF_H
> +#define IOU_BPF_H
> +
> +#include <linux/io_uring/bpf.h>
> +#include <linux/io_uring_types.h>
> +
> +struct bpf_prog;
> +
> +struct io_bpf_ctx {
> + struct io_bpf_ctx_kern kern;
> + struct bpf_prog *prog;
> +};
> +
> +static inline bool io_bpf_enabled(struct io_ring_ctx *ctx)
> +{
> + return IS_ENABLED(CONFIG_BPF) && ctx->bpf_ctx != NULL;
> +}
> +
> +#ifdef CONFIG_BPF
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args);
> +int io_unregister_bpf(struct io_ring_ctx *ctx);
> +int io_run_bpf(struct io_ring_ctx *ctx);
> +
> +#else
> +static inline int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned int nr_args)
> +{
> + return -EOPNOTSUPP;
> +}
> +static inline int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> + return -EOPNOTSUPP;
> +}
> +static inline int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> +}
> +#endif
> +
> +#endif
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index f34fa1ead2cf..82599e2a888a 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -104,6 +104,7 @@
> #include "rw.h"
> #include "alloc_cache.h"
> #include "eventfd.h"
> +#include "bpf.h"
>
> #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
> IOSQE_IO_HARDLINK | IOSQE_ASYNC)
> @@ -2834,6 +2835,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
>
> io_napi_busy_loop(ctx, &iowq);
>
> + if (io_bpf_enabled(ctx)) {
> + ret = io_run_bpf(ctx);
> + if (ret == IOU_BPF_RET_STOP)
> + return 0;
> + }
> +
> trace_io_uring_cqring_wait(ctx, min_events);
> do {
> unsigned long check_cq;
> @@ -2879,6 +2886,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
> if (ret < 0)
> break;
>
> + if (io_bpf_enabled(ctx)) {
> + ret = io_run_bpf(ctx);
> + if (ret == IOU_BPF_RET_STOP)
> + break;
> + continue;
> + }
I believe 'struct_ops' is much simpler to run the prog and return the result.
Then you needn't any bpf core change and the bpf register code.
Thanks,
Ming
next prev parent reply other threads:[~2024-11-13 8:22 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-11 1:50 [RFC 0/3] Add BPF for io_uring Pavel Begunkov
2024-11-11 1:50 ` [RFC 1/3] bpf/io_uring: add io_uring program type Pavel Begunkov
2024-11-11 1:50 ` [RFC 2/3] io_uring/bpf: allow to register and run BPF programs Pavel Begunkov
2024-11-13 8:21 ` Ming Lei [this message]
2024-11-13 13:09 ` Pavel Begunkov
2024-11-11 1:50 ` [RFC 3/3] io_uring/bpf: add kfuncs for " Pavel Begunkov
2024-11-13 8:13 ` [RFC 0/3] Add BPF for io_uring Ming Lei
2024-11-13 13:09 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZzRhnDXxkahNB0rx@fedora \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox