From: Xiaoguang Wang <[email protected]>
To: [email protected], [email protected],
[email protected]
Cc: [email protected], [email protected], [email protected],
[email protected]
Subject: [UBLKSRV] Add ebpf support.
Date: Wed, 15 Feb 2023 08:46:18 +0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Signed-off-by: Xiaoguang Wang <[email protected]>
---
bpf/ublk.bpf.c | 168 +++++++++++++++++++++++++++++++++++++++++
include/ublk_cmd.h | 2 +
include/ublksrv.h | 8 ++
include/ublksrv_priv.h | 1 +
include/ublksrv_tgt.h | 1 +
lib/ublksrv.c | 4 +
lib/ublksrv_cmd.c | 21 ++++++
tgt_loop.cpp | 31 +++++++-
ublksrv_tgt.cpp | 33 ++++++++
9 files changed, 268 insertions(+), 1 deletion(-)
create mode 100644 bpf/ublk.bpf.c
diff --git a/bpf/ublk.bpf.c b/bpf/ublk.bpf.c
new file mode 100644
index 0000000..80e79de
--- /dev/null
+++ b/bpf/ublk.bpf.c
@@ -0,0 +1,168 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+
+static long (*bpf_ublk_queue_sqe)(void *ctx, struct io_uring_sqe *sqe,
+ u32 sqe_len, u32 fd) = (void *) 212;
+
+int target_fd = -1;
+
+struct sqe_key {
+ u16 q_id;
+ u16 tag;
+ u32 res;
+ u64 offset;
+};
+
+struct sqe_data {
+ char data[128];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 8192);
+ __type(key, struct sqe_key);
+ __type(value, struct sqe_data);
+} sqes_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 128);
+ __type(key, int);
+ __type(value, int);
+} uring_fd_map SEC(".maps");
+
+static inline void io_uring_prep_rw(__u8 op, struct io_uring_sqe *sqe, int fd,
+ const void *addr, unsigned len,
+ __u64 offset)
+{
+ sqe->opcode = op;
+ sqe->flags = 0;
+ sqe->ioprio = 0;
+ sqe->fd = fd;
+ sqe->off = offset;
+ sqe->addr = (unsigned long) addr;
+ sqe->len = len;
+ sqe->fsync_flags = 0;
+ sqe->buf_index = 0;
+ sqe->personality = 0;
+ sqe->splice_fd_in = 0;
+ sqe->addr3 = 0;
+ sqe->__pad2[0] = 0;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+ io_uring_prep_rw(IORING_OP_NOP, sqe, -1, 0, 0, 0);
+}
+
+static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
+ void *buf, unsigned nbytes, off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
+}
+
+static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
+ const void *buf, unsigned nbytes, off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
+}
+
+/*
+static u64 submit_sqe(struct bpf_map *map, void *key, void *value, void *data)
+{
+ struct io_uring_sqe *sqe = (struct io_uring_sqe *)value;
+ struct ublk_bpf_ctx *ctx = ((struct callback_ctx *)data)->ctx;
+ struct sqe_key *skey = (struct sqe_key *)key;
+ char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
+ char fmt2[] ="submit sqe test prep\n";
+ u16 qid, tag;
+ int q_id = skey->q_id, *ring_fd;
+
+ bpf_trace_printk(fmt2, sizeof(fmt2));
+ ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
+ if (ring_fd) {
+ bpf_trace_printk(fmt, sizeof(fmt), qid, skey->tag);
+ bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
+ bpf_map_delete_elem(map, key);
+ }
+ return 0;
+}
+*/
+
+static inline __u64 build_user_data(unsigned tag, unsigned op,
+ unsigned tgt_data, unsigned is_target_io,
+ unsigned is_bpf_io)
+{
+ return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63 |
+ (__u64)is_bpf_io << 60;
+}
+
+SEC("ublk.s/")
+int ublk_io_prep_prog(struct ublk_bpf_ctx *ctx)
+{
+ struct io_uring_sqe *sqe;
+ struct sqe_data sd = {0};
+ struct sqe_key key;
+ u16 q_id = ctx->q_id;
+ u8 op; // = ctx->op;
+ u32 nr_sectors = ctx->nr_sectors;
+ u64 start_sector = ctx->start_sector;
+ char fmt_1[] ="ublk_io_prep_prog %d %d\n";
+
+ key.q_id = ctx->q_id;
+ key.tag = ctx->tag;
+ key.offset = 0;
+ key.res = 0;
+
+ bpf_probe_read_kernel(&op, 1, &ctx->op);
+ bpf_trace_printk(fmt_1, sizeof(fmt_1), q_id, op);
+ sqe = (struct io_uring_sqe *)&sd;
+ if (op == REQ_OP_READ) {
+ char fmt[] ="add read sae\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt));
+ io_uring_prep_read(sqe, target_fd, 0, nr_sectors << 9,
+ start_sector << 9);
+ sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
+ bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
+ } else if (op == REQ_OP_WRITE) {
+ char fmt[] ="add write sae\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt));
+
+ io_uring_prep_write(sqe, target_fd, 0, nr_sectors << 9,
+ start_sector << 9);
+ sqe->user_data = build_user_data(ctx->tag, op, 0, 1, 1);
+ bpf_map_update_elem(&sqes_map, &key, &sd, BPF_NOEXIST);
+ } else {
+ ;
+ }
+ return 0;
+}
+
+SEC("ublk.s/")
+int ublk_io_submit_prog(struct ublk_bpf_ctx *ctx)
+{
+ struct io_uring_sqe *sqe;
+ char fmt[] ="submit sqe for req[qid:%u tag:%u]\n";
+ int q_id = ctx->q_id, *ring_fd;
+ struct sqe_key key;
+
+ key.q_id = ctx->q_id;
+ key.tag = ctx->tag;
+ key.offset = 0;
+ key.res = 0;
+
+ sqe = bpf_map_lookup_elem(&sqes_map, &key);
+ ring_fd = bpf_map_lookup_elem(&uring_fd_map, &q_id);
+ if (ring_fd) {
+ bpf_trace_printk(fmt, sizeof(fmt), key.q_id, key.tag);
+ bpf_ublk_queue_sqe(ctx, sqe, 128, *ring_fd);
+ bpf_map_delete_elem(&sqes_map, &key);
+ }
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
index f6238cc..893ba8c 100644
--- a/include/ublk_cmd.h
+++ b/include/ublk_cmd.h
@@ -17,6 +17,8 @@
#define UBLK_CMD_STOP_DEV 0x07
#define UBLK_CMD_SET_PARAMS 0x08
#define UBLK_CMD_GET_PARAMS 0x09
+#define UBLK_CMD_REG_BPF_PROG 0x0a
+#define UBLK_CMD_UNREG_BPF_PROG 0x0b
#define UBLK_CMD_START_USER_RECOVERY 0x10
#define UBLK_CMD_END_USER_RECOVERY 0x11
#define UBLK_CMD_GET_DEV_INFO2 0x12
diff --git a/include/ublksrv.h b/include/ublksrv.h
index d38bd46..f5deddb 100644
--- a/include/ublksrv.h
+++ b/include/ublksrv.h
@@ -106,6 +106,7 @@ struct ublksrv_tgt_info {
unsigned int nr_fds;
int fds[UBLKSRV_TGT_MAX_FDS];
void *tgt_data;
+ void *tgt_bpf_obj;
/*
* Extra IO slots for each queue, target code can reserve some
@@ -263,6 +264,8 @@ struct ublksrv_tgt_type {
int (*init_queue)(const struct ublksrv_queue *, void **queue_data_ptr);
void (*deinit_queue)(const struct ublksrv_queue *);
+ int (*init_queue_bpf)(const struct ublksrv_dev *dev, const struct ublksrv_queue *q);
+
unsigned long reserved[5];
};
@@ -318,6 +321,11 @@ extern void ublksrv_ctrl_prep_recovery(struct ublksrv_ctrl_dev *dev,
const char *recovery_jbuf);
extern const char *ublksrv_ctrl_get_recovery_jbuf(const struct ublksrv_ctrl_dev *dev);
+extern void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev,
+ void *obj);
+extern int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
+ int io_prep_fd, int io_submit_fd);
+
/* ublksrv device ("/dev/ublkcN") level APIs */
extern const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *
ctrl_dev);
diff --git a/include/ublksrv_priv.h b/include/ublksrv_priv.h
index 2996baa..8da8866 100644
--- a/include/ublksrv_priv.h
+++ b/include/ublksrv_priv.h
@@ -42,6 +42,7 @@ struct ublksrv_ctrl_dev {
const char *tgt_type;
const struct ublksrv_tgt_type *tgt_ops;
+ void *bpf_obj;
/*
* default is UBLKSRV_RUN_DIR but can be specified via command line,
diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
index 234d31e..e0db7d9 100644
--- a/include/ublksrv_tgt.h
+++ b/include/ublksrv_tgt.h
@@ -9,6 +9,7 @@
#include <getopt.h>
#include <string.h>
#include <stdarg.h>
+#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
diff --git a/lib/ublksrv.c b/lib/ublksrv.c
index 96bed95..110ccb3 100644
--- a/lib/ublksrv.c
+++ b/lib/ublksrv.c
@@ -603,6 +603,9 @@ skip_alloc_buf:
goto fail;
}
+ if (dev->tgt.ops->init_queue_bpf)
+ dev->tgt.ops->init_queue_bpf(tdev, local_to_tq(q));
+
ublksrv_dev_init_io_cmds(dev, q);
/*
@@ -723,6 +726,7 @@ const struct ublksrv_dev *ublksrv_dev_init(const struct ublksrv_ctrl_dev *ctrl_d
}
tgt->fds[0] = dev->cdev_fd;
+ tgt->tgt_bpf_obj = ctrl_dev->bpf_obj;
ret = ublksrv_tgt_init(dev, ctrl_dev->tgt_type, ctrl_dev->tgt_ops,
ctrl_dev->tgt_argc, ctrl_dev->tgt_argv);
diff --git a/lib/ublksrv_cmd.c b/lib/ublksrv_cmd.c
index 0d7265d..0101cb9 100644
--- a/lib/ublksrv_cmd.c
+++ b/lib/ublksrv_cmd.c
@@ -502,6 +502,27 @@ int ublksrv_ctrl_end_recovery(struct ublksrv_ctrl_dev *dev, int daemon_pid)
return ret;
}
+int ublksrv_ctrl_reg_bpf_prog(struct ublksrv_ctrl_dev *dev,
+ int io_prep_fd, int io_submit_fd)
+{
+ struct ublksrv_ctrl_cmd_data data = {
+ .cmd_op = UBLK_CMD_REG_BPF_PROG,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+ int ret;
+
+ data.data[0] = io_prep_fd;
+ data.data[1] = io_submit_fd;
+
+ ret = __ublksrv_ctrl_cmd(dev, &data);
+ return ret;
+}
+
+void ublksrv_ctrl_set_bpf_obj_info(struct ublksrv_ctrl_dev *dev, void *obj)
+{
+ dev->bpf_obj = obj;
+}
+
const struct ublksrv_ctrl_dev_info *ublksrv_ctrl_get_dev_info(
const struct ublksrv_ctrl_dev *dev)
{
diff --git a/tgt_loop.cpp b/tgt_loop.cpp
index 79a65d3..b1568fe 100644
--- a/tgt_loop.cpp
+++ b/tgt_loop.cpp
@@ -4,7 +4,11 @@
#include <poll.h>
#include <sys/epoll.h>
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
#include "ublksrv_tgt.h"
+#include "bpf/.tmp/ublk.skel.h"
static bool backing_supports_discard(char *name)
{
@@ -88,6 +92,20 @@ static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
return 0;
}
+static int loop_init_queue_bpf(const struct ublksrv_dev *dev,
+ const struct ublksrv_queue *q)
+{
+ int ret, q_id, ring_fd;
+ const struct ublksrv_tgt_info *tgt = &dev->tgt;
+ struct ublk_bpf *obj = (struct ublk_bpf*)tgt->tgt_bpf_obj;
+
+ q_id = q->q_id;
+ ring_fd = q->ring_ptr->ring_fd;
+ ret = bpf_map_update_elem(bpf_map__fd(obj->maps.uring_fd_map), &q_id,
+ &ring_fd, 0);
+ return ret;
+}
+
static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
*argv[])
{
@@ -125,6 +143,7 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
},
};
bool can_discard = false;
+ struct ublk_bpf *bpf_obj;
strcpy(tgt_json.name, "loop");
@@ -218,6 +237,10 @@ static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
} while (ret < 0);
+ if (tgt->tgt_bpf_obj) {
+ bpf_obj = (struct ublk_bpf *)tgt->tgt_bpf_obj;
+ bpf_obj->data->target_fd = tgt->fds[1];
+ }
return 0;
}
@@ -252,9 +275,14 @@ static int loop_queue_tgt_io(const struct ublksrv_queue *q,
const struct ublk_io_data *data, int tag)
{
const struct ublksrv_io_desc *iod = data->iod;
- struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
+ struct io_uring_sqe *sqe;
unsigned ublk_op = ublksrv_get_op(iod);
+ /* ebpf prog wil handle read/write requests. */
+ if ((ublk_op == UBLK_IO_OP_READ) || (ublk_op == UBLK_IO_OP_WRITE))
+ return 1;
+
+ sqe = io_uring_get_sqe(q->ring_ptr);
if (!sqe)
return 0;
@@ -374,6 +402,7 @@ struct ublksrv_tgt_type loop_tgt_type = {
.type = UBLKSRV_TGT_TYPE_LOOP,
.name = "loop",
.recovery_tgt = loop_recovery_tgt,
+ .init_queue_bpf = loop_init_queue_bpf,
};
static void tgt_loop_init() __attribute__((constructor));
diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
index 5ed328d..d3796cf 100644
--- a/ublksrv_tgt.cpp
+++ b/ublksrv_tgt.cpp
@@ -2,6 +2,7 @@
#include "config.h"
#include "ublksrv_tgt.h"
+#include "bpf/.tmp/ublk.skel.h"
/* per-task variable */
static pthread_mutex_t jbuf_lock;
@@ -575,6 +576,31 @@ static void ublksrv_tgt_set_params(struct ublksrv_ctrl_dev *cdev,
}
}
+static int ublksrv_tgt_load_bpf_prog(struct ublksrv_ctrl_dev *cdev)
+{
+ struct ublk_bpf *obj;
+ int ret, io_prep_fd, io_submit_fd;
+
+ obj = ublk_bpf__open();
+ if (!obj) {
+ fprintf(stderr, "failed to open BPF object\n");
+ return -1;
+ }
+ ret = ublk_bpf__load(obj);
+ if (ret) {
+ fprintf(stderr, "failed to load BPF object\n");
+ return -1;
+ }
+
+
+ io_prep_fd = bpf_program__fd(obj->progs.ublk_io_prep_prog);
+ io_submit_fd = bpf_program__fd(obj->progs.ublk_io_submit_prog);
+ ret = ublksrv_ctrl_reg_bpf_prog(cdev, io_prep_fd, io_submit_fd);
+ if (!ret)
+ ublksrv_ctrl_set_bpf_obj_info(cdev, obj);
+ return ret;
+}
+
static int cmd_dev_add(int argc, char *argv[])
{
static const struct option longopts[] = {
@@ -696,6 +722,13 @@ static int cmd_dev_add(int argc, char *argv[])
goto fail;
}
+ ret = ublksrv_tgt_load_bpf_prog(dev);
+ if (ret < 0) {
+ fprintf(stderr, "dev %d load bpf prog failed, ret %d\n",
+ data.dev_id, ret);
+ goto fail_stop_daemon;
+ }
+
{
const struct ublksrv_ctrl_dev_info *info =
ublksrv_ctrl_get_dev_info(dev);
--
2.31.1
next prev parent reply other threads:[~2023-02-15 0:46 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-15 0:41 [RFC 0/3] Add io_uring & ebpf based methods to implement zero-copy for ublk Xiaoguang Wang
2023-02-15 0:41 ` [RFC 1/3] bpf: add UBLK program type Xiaoguang Wang
2023-02-15 0:41 ` [RFC 2/3] io_uring: enable io_uring to submit sqes located in kernel Xiaoguang Wang
2023-02-15 0:41 ` [RFC 3/3] ublk_drv: add ebpf support Xiaoguang Wang
2023-02-16 8:11 ` Ming Lei
2023-02-16 12:12 ` Xiaoguang Wang
2023-02-17 3:02 ` Ming Lei
2023-02-17 10:46 ` Ming Lei
2023-02-22 14:13 ` Xiaoguang Wang
2023-02-15 0:46 ` Xiaoguang Wang [this message]
2023-02-16 8:28 ` [UBLKSRV] Add " Ming Lei
2023-02-16 9:17 ` Xiaoguang Wang
2023-02-15 8:40 ` [RFC 0/3] Add io_uring & ebpf based methods to implement zero-copy for ublk Ziyang Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230215004618.35503-1-xiaoguang.wang@linux.alibaba.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox