From: Ming Lei <[email protected]>
To: Xiaoguang Wang <[email protected]>
Cc: [email protected], [email protected],
[email protected], [email protected], [email protected],
[email protected], [email protected]
Subject: Re: [UBLKSRV] Add ebpf support.
Date: Thu, 16 Feb 2023 16:28:55 +0800 [thread overview]
Message-ID: <Y+3pR991R9nrdg5Y@T590> (raw)
In-Reply-To: <[email protected]>
On Wed, Feb 15, 2023 at 08:46:18AM +0800, Xiaoguang Wang wrote:
> Signed-off-by: Xiaoguang Wang <[email protected]>
> ---
> bpf/ublk.bpf.c | 168 +++++++++++++++++++++++++++++++++++++++++
> include/ublk_cmd.h | 2 +
> include/ublksrv.h | 8 ++
> include/ublksrv_priv.h | 1 +
> include/ublksrv_tgt.h | 1 +
> lib/ublksrv.c | 4 +
> lib/ublksrv_cmd.c | 21 ++++++
> tgt_loop.cpp | 31 +++++++-
> ublksrv_tgt.cpp | 33 ++++++++
> 9 files changed, 268 insertions(+), 1 deletion(-)
> create mode 100644 bpf/ublk.bpf.c
>
> diff --git a/bpf/ublk.bpf.c b/bpf/ublk.bpf.c
> new file mode 100644
> index 0000000..80e79de
> --- /dev/null
> +++ b/bpf/ublk.bpf.c
> @@ -0,0 +1,168 @@
> +#include "vmlinux.h"
Where is vmlinux.h?
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_core_read.h>
> +
> +
> +static long (*bpf_ublk_queue_sqe)(void *ctx, struct io_uring_sqe *sqe,
> + u32 sqe_len, u32 fd) = (void *) 212;
> +
> +int target_fd = -1;
> +
> +struct sqe_key {
> + u16 q_id;
> + u16 tag;
> + u32 res;
> + u64 offset;
> +};
> +
> +struct sqe_data {
> + char data[128];
> +};
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_HASH);
> + __uint(max_entries, 8192);
> + __type(key, struct sqe_key);
> + __type(value, struct sqe_data);
> +} sqes_map SEC(".maps");
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(max_entries, 128);
> + __type(key, int);
> + __type(value, int);
> +} uring_fd_map SEC(".maps");
> +
> +static inline void io_uring_prep_rw(__u8 op, struct io_uring_sqe *sqe, int fd,
> + const void *addr, unsigned len,
> + __u64 offset)
> +{
> + sqe->opcode = op;
> + sqe->flags = 0;
> + sqe->ioprio = 0;
> + sqe->fd = fd;
> + sqe->off = offset;
> + sqe->addr = (unsigned long) addr;
> + sqe->len = len;
> + sqe->fsync_flags = 0;
> + sqe->buf_index = 0;
> + sqe->personality = 0;
> + sqe->splice_fd_in = 0;
> + sqe->addr3 = 0;
> + sqe->__pad2[0] = 0;
> +}
> +
> +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
> +{
> + io_uring_prep_rw(IORING_OP_NOP, sqe, -1, 0, 0, 0);
> +}
> +
> +static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
> + void *buf, unsigned nbytes, off_t offset)
> +{
> + io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
> +}
> +
> +static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
> + const void *buf, unsigned nbytes, off_t offset)
> +{
> + io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
> +}
> +
> +/*
> +static u64 submit_sqe(struct bpf_map *map, void *key, void *value, void *data)
> +{
> + struct io_uring_sqe *sqe = (struct io_uring_sqe *)value;
> + struct ublk_bpf_ctx *ctx = ((struct callback_ctx *)data)->ctx;
> + struct sqe_key *skey = (struct sqe_key *)key;
> + char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
> + char fmt2[] ="submit sqe test prep\n";
> + u16 qid, tag;
> + int q_id = skey->q_id, *ring_fd;
> +
> + bpf_trace_printk(fmt2, sizeof(fmt2));
> + ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
> + if (ring_fd) {
> + bpf_trace_printk(fmt, sizeof(fmt), qid, skey->tag);
> + bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
> + bpf_map_delete_elem(map, key);
> + }
> + return 0;
> +}
> +*/
> +
> +static inline __u64 build_user_data(unsigned tag, unsigned op,
> + unsigned tgt_data, unsigned is_target_io,
> + unsigned is_bpf_io)
> +{
> + return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63 |
> + (__u64)is_bpf_io << 60;
> +}
> +
> +SEC("ublk.s/")
> +int ublk_io_prep_prog(struct ublk_bpf_ctx *ctx)
> +{
> + struct io_uring_sqe *sqe;
> + struct sqe_data sd = {0};
> + struct sqe_key key;
> + u16 q_id = ctx->q_id;
> + u8 op; // = ctx->op;
> + u32 nr_sectors = ctx->nr_sectors;
> + u64 start_sector = ctx->start_sector;
> + char fmt_1[] ="ublk_io_prep_prog %d %d\n";
> +
> + key.q_id = ctx->q_id;
> + key.tag = ctx->tag;
> + key.offset = 0;
> + key.res = 0;
> +
> + bpf_probe_read_kernel(&op, 1, &ctx->op);
> + bpf_trace_printk(fmt_1, sizeof(fmt_1), q_id, op);
> + sqe = (struct io_uring_sqe *)&sd;
> + if (op == REQ_OP_READ) {
> + char fmt[] ="add read sae\n";
> +
> + bpf_trace_printk(fmt, sizeof(fmt));
> + io_uring_prep_read(sqe, target_fd, 0, nr_sectors << 9,
> + start_sector << 9);
> + sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
> + bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
> + } else if (op == REQ_OP_WRITE) {
> + char fmt[] ="add write sae\n";
> +
> + bpf_trace_printk(fmt, sizeof(fmt));
> +
> + io_uring_prep_write(sqe, target_fd, 0, nr_sectors << 9,
> + start_sector << 9);
> + sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
> + bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
> + } else {
> + ;
> + }
> + return 0;
> +}
> +
> +SEC("ublk.s/")
> +int ublk_io_submit_prog(struct ublk_bpf_ctx *ctx)
> +{
> + struct io_uring_sqe *sqe;
> + char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
> + int q_id = ctx->q_id, *ring_fd;
> + struct sqe_key key;
> +
> + key.q_id = ctx->q_id;
> + key.tag = ctx->tag;
> + key.offset = 0;
> + key.res = 0;
> +
> + sqe = bpf_map_lookup_elem(&sqes_map, &key);
> + ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
> + if (ring_fd) {
> + bpf_trace_printk(fmt, sizeof(fmt), key.q_id, key.tag);
> + bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
> + bpf_map_delete_elem(&sqes_map, &key);
> + }
> + return 0;
> +}
> +
> +char LICENSE[] SEC("license") = "GPL";
> diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
> index f6238cc..893ba8c 100644
> --- a/include/ublk_cmd.h
> +++ b/include/ublk_cmd.h
> @@ -17,6 +17,8 @@
> #define UBLK_CMD_STOP_DEV 0x07
> #define UBLK_CMD_SET_PARAMS 0x08
> #define UBLK_CMD_GET_PARAMS 0x09
> +#define UBLK_CMD_REG_BPF_PROG 0x0a
> +#define UBLK_CMD_UNREG_BPF_PROG 0x0b
> #define UBLK_CMD_START_USER_RECOVERY 0x10
> #define UBLK_CMD_END_USER_RECOVERY 0x11
> #define UBLK_CMD_GET_DEV_INFO2 0x12
> diff --git a/include/ublksrv.h b/include/ublksrv.h
> index d38bd46..f5deddb 100644
> --- a/include/ublksrv.h
> +++ b/include/ublksrv.h
> @@ -106,6 +106,7 @@ struct ublksrv_tgt_info {
> unsigned int nr_fds;
> int fds[UBLKSRV_TGT_MAX_FDS];
> void *tgt_data;
> + void *tgt_bpf_obj;
>
> /*
> * Extra IO slots for each queue, target code can reserve some
> @@ -263,6 +264,8 @@ struct ublksrv_tgt_type {
> int (*init_queue)(const struct ublksrv_queue *, void **queue_data_ptr);
> void (*deinit_queue)(const struct ublksrv_queue *);
>
> + int (*init_queue_bpf)(const struct ublksrv_dev *dev, const struct ublksrv_queue *q);
> +
> unsigned long reserved[5];
> };
>
> @@ -318,6 +321,11 @@ extern void ublksrv_ctrl_prep_recovery(struct ublksrv_ctrl_dev *dev,
> const char *recovery_jbuf);
> extern const char *ublksrv_ctrl_get_recovery_jbuf(const struct ublksrv_ctrl_dev *dev);
>
> +extern void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,
> + void *obj);
> +extern int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
> + int io_prep_fd, int io_submit_fd);
> +
> /* ublksrv device ("/dev/ublkcN") level APIs */
> extern const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *
> ctrl_dev);
> diff --git a/include/ublksrv_priv.h b/include/ublksrv_priv.h
> index 2996baa..8da8866 100644
> --- a/include/ublksrv_priv.h
> +++ b/include/ublksrv_priv.h
> @@ -42,6 +42,7 @@ struct ublksrv_ctrl_dev {
>
> const char *tgt_type;
> const struct ublksrv_tgt_type *tgt_ops;
> + void *bpf_obj;
>
> /*
> * default is UBLKSRV_RUN_DIR but can be specified via command line,
> diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
> index 234d31e..e0db7d9 100644
> --- a/include/ublksrv_tgt.h
> +++ b/include/ublksrv_tgt.h
> @@ -9,6 +9,7 @@
> #include <getopt.h>
> #include <string.h>
> #include <stdarg.h>
> +#include <limits.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <sys/ioctl.h>
> diff --git a/lib/ublksrv.c b/lib/ublksrv.c
> index 96bed95..110ccb3 100644
> --- a/lib/ublksrv.c
> +++ b/lib/ublksrv.c
> @@ -603,6 +603,9 @@ skip_alloc_buf:
> goto fail;
> }
>
> + if (dev->tgt.ops->init_queue_bpf)
> + dev->tgt.ops->init_queue_bpf(tdev, local_to_tq(q));
> +
> ublksrv_dev_init_io_cmds(dev, q);
>
> /*
> @@ -723,6 +726,7 @@ const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *ctrl_d
> }
>
> tgt->fds[0] = dev->cdev_fd;
> + tgt->tgt_bpf_obj = ctrl_dev->bpf_obj;
>
> ret = ublksrv_tgt_init(dev, ctrl_dev->tgt_type, ctrl_dev->tgt_ops,
> ctrl_dev->tgt_argc, ctrl_dev->tgt_argv);
> diff --git a/lib/ublksrv_cmd.c b/lib/ublksrv_cmd.c
> index 0d7265d..0101cb9 100644
> --- a/lib/ublksrv_cmd.c
> +++ b/lib/ublksrv_cmd.c
> @@ -502,6 +502,27 @@ int ublksrv_ctrl_end_recovery(struct ublksrv_ctrl_dev *dev, int daemon_pid)
> return ret;
> }
>
> +int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
> + int io_prep_fd, int io_submit_fd)
> +{
> + struct ublksrv_ctrl_cmd_data data = {
> + .cmd_op = UBLK_CMD_REG_BPF_PROG,
> + .flags = CTRL_CMD_HAS_DATA,
> + };
> + int ret;
> +
> + data.data[0] = io_prep_fd;
> + data.data[1] = io_submit_fd;
> +
> + ret = __ublksrv_ctrl_cmd(dev, &data);
> + return ret;
> +}
> +
> +void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev, void *obj)
> +{
> + dev->bpf_obj = obj;
> +}
> +
> const struct ublksrv_ctrl_dev_info *ublksrv_ctrl_get_dev_info(
> const struct ublksrv_ctrl_dev *dev)
> {
> diff --git a/tgt_loop.cpp b/tgt_loop.cpp
> index 79a65d3..b1568fe 100644
> --- a/tgt_loop.cpp
> +++ b/tgt_loop.cpp
> @@ -4,7 +4,11 @@
>
> #include <poll.h>
> #include <sys/epoll.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf.h>
> +#include <bpf/libbpf.h>
> #include "ublksrv_tgt.h"
> +#include "bpf/.tmp/ublk.skel.h"
Where is bpf/.tmp/ublk.skel.h?
>
> static bool backing_supports_discard(char *name)
> {
> @@ -88,6 +92,20 @@ static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
> return 0;
> }
>
> +static int loop_init_queue_bpf(const struct ublksrv_dev *dev,
> + const struct ublksrv_queue *q)
> +{
> + int ret, q_id, ring_fd;
> + const struct ublksrv_tgt_info *tgt = &dev->tgt;
> + struct ublk_bpf *obj = (struct ublk_bpf*)tgt->tgt_bpf_obj;
> +
> + q_id = q->q_id;
> + ring_fd = q->ring_ptr->ring_fd;
> + ret = bpf_map_update_elem(bpf_map__fd(obj->maps.uring_fd_map), &q_id,
> + &ring_fd, 0);
> + return ret;
> +}
> +
> static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
> *argv[])
> {
> @@ -125,6 +143,7 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
> },
> };
> bool can_discard = false;
> + struct ublk_bpf *bpf_obj;
>
> strcpy(tgt_json.name, "loop");
>
> @@ -218,6 +237,10 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
> jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
> } while (ret < 0);
>
> + if (tgt->tgt_bpf_obj) {
> + bpf_obj = (struct ublk_bpf *)tgt->tgt_bpf_obj;
> + bpf_obj->data->target_fd = tgt->fds[1];
> + }
> return 0;
> }
>
> @@ -252,9 +275,14 @@ static int loop_queue_tgt_io(const struct ublksrv_queue *q,
> const struct ublk_io_data *data, int tag)
> {
> const struct ublksrv_io_desc *iod = data->iod;
> - struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
> + struct io_uring_sqe *sqe;
> unsigned ublk_op = ublksrv_get_op(iod);
>
> + /* ebpf prog wil handle read/write requests. */
> + if ((ublk_op == UBLK_IO_OP_READ) || (ublk_op == UBLK_IO_OP_WRITE))
> + return 1;
> +
> + sqe = io_uring_get_sqe(q->ring_ptr);
> if (!sqe)
> return 0;
>
> @@ -374,6 +402,7 @@ struct ublksrv_tgt_type loop_tgt_type = {
> .type = UBLKSRV_TGT_TYPE_LOOP,
> .name = "loop",
> .recovery_tgt = loop_recovery_tgt,
> + .init_queue_bpf = loop_init_queue_bpf,
> };
>
> static void tgt_loop_init() __attribute__((constructor));
> diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
> index 5ed328d..d3796cf 100644
> --- a/ublksrv_tgt.cpp
> +++ b/ublksrv_tgt.cpp
> @@ -2,6 +2,7 @@
>
> #include "config.h"
> #include "ublksrv_tgt.h"
> +#include "bpf/.tmp/ublk.skel.h"
Same with above
Thanks,
Ming
next prev parent reply other threads:[~2023-02-16 8:30 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-15 0:41 [RFC 0/3] Add io_uring & ebpf based methods to implement zero-copy for ublk Xiaoguang Wang
2023-02-15 0:41 ` [RFC 1/3] bpf: add UBLK program type Xiaoguang Wang
2023-02-15 0:41 ` [RFC 2/3] io_uring: enable io_uring to submit sqes located in kernel Xiaoguang Wang
2023-02-15 0:41 ` [RFC 3/3] ublk_drv: add ebpf support Xiaoguang Wang
2023-02-16 8:11 ` Ming Lei
2023-02-16 12:12 ` Xiaoguang Wang
2023-02-17 3:02 ` Ming Lei
2023-02-17 10:46 ` Ming Lei
2023-02-22 14:13 ` Xiaoguang Wang
2023-02-15 0:46 ` [UBLKSRV] Add " Xiaoguang Wang
2023-02-16 8:28 ` Ming Lei [this message]
2023-02-16 9:17 ` Xiaoguang Wang
2023-02-15 8:40 ` [RFC 0/3] Add io_uring & ebpf based methods to implement zero-copy for ublk Ziyang Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y+3pR991R9nrdg5Y@T590 \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox