public inbox for [email protected]
 help / color / mirror / Atom feed
From: Ming Lei <[email protected]>
To: Pavel Begunkov <[email protected]>
Cc: [email protected], [email protected]
Subject: Re: [RFC 2/3] io_uring/bpf: allow to register and run BPF programs
Date: Wed, 13 Nov 2024 16:21:48 +0800	[thread overview]
Message-ID: <ZzRhnDXxkahNB0rx@fedora> (raw)
In-Reply-To: <cffec449e9f6a37b0701f2a8fdd37688db25be55.1731285516.git.asml.silence@gmail.com>

On Mon, Nov 11, 2024 at 01:50:45AM +0000, Pavel Begunkov wrote:
> Let the user to register a BPF_PROG_TYPE_IOURING BPF program to a ring.
> The progrma will be run in the waiting loop every time something
> happens, i.e. the task was woken up by a task_work / signal / etc.
> 
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>  include/linux/io_uring_types.h |  4 +++
>  include/uapi/linux/io_uring.h  |  9 +++++
>  io_uring/bpf.c                 | 63 ++++++++++++++++++++++++++++++++++
>  io_uring/bpf.h                 | 41 ++++++++++++++++++++++
>  io_uring/io_uring.c            | 15 ++++++++
>  io_uring/register.c            |  7 ++++
>  6 files changed, 139 insertions(+)
>  create mode 100644 io_uring/bpf.h
> 
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index ad5001102c86..50cee0d3622e 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -8,6 +8,8 @@
>  #include <linux/llist.h>
>  #include <uapi/linux/io_uring.h>
>  
> +struct io_bpf_ctx;
> +
>  enum {
>  	/*
>  	 * A hint to not wake right away but delay until there are enough of
> @@ -246,6 +248,8 @@ struct io_ring_ctx {
>  
>  		enum task_work_notify_mode	notify_method;
>  		unsigned			sq_thread_idle;
> +
> +		struct io_bpf_ctx		*bpf_ctx;
>  	} ____cacheline_aligned_in_smp;
>  
>  	/* submission data */
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index ba373deb8406..f2c2fefc8514 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -634,6 +634,8 @@ enum io_uring_register_op {
>  	/* register fixed io_uring_reg_wait arguments */
>  	IORING_REGISTER_CQWAIT_REG		= 34,
>  
> +	IORING_REGISTER_BPF			= 35,
> +
>  	/* this goes last */
>  	IORING_REGISTER_LAST,
>  
> @@ -905,6 +907,13 @@ enum io_uring_socket_op {
>  	SOCKET_URING_OP_SETSOCKOPT,
>  };
>  
> +struct io_uring_bpf_reg {
> +	__u64		prog_fd;
> +	__u32		flags;
> +	__u32		resv1;
> +	__u64		resv2[2];
> +};
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/io_uring/bpf.c b/io_uring/bpf.c
> index 6eb0c47b4aa9..8b7c74761c63 100644
> --- a/io_uring/bpf.c
> +++ b/io_uring/bpf.c
> @@ -1,6 +1,9 @@
>  // SPDX-License-Identifier: GPL-2.0
>  
>  #include <linux/bpf.h>
> +#include <linux/filter.h>
> +
> +#include "bpf.h"
>  
>  static const struct bpf_func_proto *
>  io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> @@ -22,3 +25,63 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
>  	.get_func_proto			= io_bpf_func_proto,
>  	.is_valid_access		= io_bpf_is_valid_access,
>  };
> +
> +int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> +	struct io_bpf_ctx *bc = ctx->bpf_ctx;
> +	int ret;
> +
> +	mutex_lock(&ctx->uring_lock);
> +	ret = bpf_prog_run_pin_on_cpu(bc->prog, bc);
> +	mutex_unlock(&ctx->uring_lock);
> +	return ret;
> +}
> +
> +int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> +	struct io_bpf_ctx *bc = ctx->bpf_ctx;
> +
> +	if (!bc)
> +		return -ENXIO;
> +	bpf_prog_put(bc->prog);
> +	kfree(bc);
> +	ctx->bpf_ctx = NULL;
> +	return 0;
> +}
> +
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> +		    unsigned int nr_args)
> +{
> +	struct __user io_uring_bpf_reg *bpf_reg_usr = arg;
> +	struct io_uring_bpf_reg bpf_reg;
> +	struct io_bpf_ctx *bc;
> +	struct bpf_prog *prog;
> +
> +	if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
> +		return -EOPNOTSUPP;
> +
> +	if (nr_args != 1)
> +		return -EINVAL;
> +	if (copy_from_user(&bpf_reg, bpf_reg_usr, sizeof(bpf_reg)))
> +		return -EFAULT;
> +	if (bpf_reg.flags || bpf_reg.resv1 ||
> +	    bpf_reg.resv2[0] || bpf_reg.resv2[1])
> +		return -EINVAL;
> +
> +	if (ctx->bpf_ctx)
> +		return -ENXIO;
> +
> +	bc = kzalloc(sizeof(*bc), GFP_KERNEL);
> +	if (!bc)
> +		return -ENOMEM;
> +
> +	prog = bpf_prog_get_type(bpf_reg.prog_fd, BPF_PROG_TYPE_IOURING);
> +	if (IS_ERR(prog)) {
> +		kfree(bc);
> +		return PTR_ERR(prog);
> +	}
> +
> +	bc->prog = prog;
> +	ctx->bpf_ctx = bc;
> +	return 0;
> +}
> diff --git a/io_uring/bpf.h b/io_uring/bpf.h
> new file mode 100644
> index 000000000000..2b4e555ff07a
> --- /dev/null
> +++ b/io_uring/bpf.h
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef IOU_BPF_H
> +#define IOU_BPF_H
> +
> +#include <linux/io_uring/bpf.h>
> +#include <linux/io_uring_types.h>
> +
> +struct bpf_prog;
> +
> +struct io_bpf_ctx {
> +	struct io_bpf_ctx_kern kern;
> +	struct bpf_prog *prog;
> +};
> +
> +static inline bool io_bpf_enabled(struct io_ring_ctx *ctx)
> +{
> +	return IS_ENABLED(CONFIG_BPF) && ctx->bpf_ctx != NULL;
> +}
> +
> +#ifdef CONFIG_BPF
> +int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> +		    unsigned int nr_args);
> +int io_unregister_bpf(struct io_ring_ctx *ctx);
> +int io_run_bpf(struct io_ring_ctx *ctx);
> +
> +#else
> +static inline int io_register_bpf(struct io_ring_ctx *ctx, void __user *arg,
> +				  unsigned int nr_args)
> +{
> +	return -EOPNOTSUPP;
> +}
> +static inline int io_unregister_bpf(struct io_ring_ctx *ctx)
> +{
> +	return -EOPNOTSUPP;
> +}
> +static inline int io_run_bpf(struct io_ring_ctx *ctx)
> +{
> +}
> +#endif
> +
> +#endif
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index f34fa1ead2cf..82599e2a888a 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -104,6 +104,7 @@
>  #include "rw.h"
>  #include "alloc_cache.h"
>  #include "eventfd.h"
> +#include "bpf.h"
>  
>  #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
>  			  IOSQE_IO_HARDLINK | IOSQE_ASYNC)
> @@ -2834,6 +2835,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
>  
>  	io_napi_busy_loop(ctx, &iowq);
>  
> +	if (io_bpf_enabled(ctx)) {
> +		ret = io_run_bpf(ctx);
> +		if (ret == IOU_BPF_RET_STOP)
> +			return 0;
> +	}
> +
>  	trace_io_uring_cqring_wait(ctx, min_events);
>  	do {
>  		unsigned long check_cq;
> @@ -2879,6 +2886,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
>  		if (ret < 0)
>  			break;
>  
> +		if (io_bpf_enabled(ctx)) {
> +			ret = io_run_bpf(ctx);
> +			if (ret == IOU_BPF_RET_STOP)
> +				break;
> +			continue;
> +		}

I believe 'struct_ops' is much simpler to run the prog and return the result.
Then you needn't any bpf core change and the bpf register code.


Thanks,
Ming


  reply	other threads:[~2024-11-13  8:22 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-11  1:50 [RFC 0/3] Add BPF for io_uring Pavel Begunkov
2024-11-11  1:50 ` [RFC 1/3] bpf/io_uring: add io_uring program type Pavel Begunkov
2024-11-11  1:50 ` [RFC 2/3] io_uring/bpf: allow to register and run BPF programs Pavel Begunkov
2024-11-13  8:21   ` Ming Lei [this message]
2024-11-13 13:09     ` Pavel Begunkov
2024-11-11  1:50 ` [RFC 3/3] io_uring/bpf: add kfuncs for " Pavel Begunkov
2024-11-13  8:13 ` [RFC 0/3] Add BPF for io_uring Ming Lei
2024-11-13 13:09   ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZzRhnDXxkahNB0rx@fedora \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox