On 11/11/24 08:36PM, hexue wrote: >Add a test file for hybrid iopoll to make sure it works safe.Testcass >include basic read/write tests, and run in normal iopoll mode and >passthrough mode respectively. > >Signed-off-by: hexue >--- > man/io_uring_setup.2 | 10 +- > src/include/liburing/io_uring.h | 3 + > test/Makefile | 1 + i> test/iopoll-hybridpoll.c | 544 ++++++++++++++++++++++++++++++++ > 4 files changed, 557 insertions(+), 1 deletion(-) > create mode 100644 test/iopoll-hybridpoll.c > >diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2 >index 2f87783..8cfafdc 100644 >--- a/man/io_uring_setup.2 >+++ b/man/io_uring_setup.2 >@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the > parameter set to the desired number of polling queues. The polling queues > will be shared appropriately between the CPUs in the system, if the number > is less than the number of online CPU threads. >- >+.TP >+.B IORING_SETUP_HYBRID_IOPOLL >+This flag must setup with s// >+.B IORING_SETUP_IOPOLL >+flag. hybrid poll is a new s/hybrid/Hybrid "new" is probably not required here >+feature baed on iopoll, this could be a suboptimal solution when running s/baed/based s// >+on a single thread, it offers higher performance than IRQ and lower CPU s/<, it>/<. It> >+utilization than polling. Similarly, this feature also requires the devices >+to support polling configuration. This feature would work if a device doesn't have polled queues,right? The performance might be suboptimal in that case, but the userspace won't get any errors. > .TP > .B IORING_SETUP_SQPOLL > When this flag is specified, a kernel thread is created to perform >diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h >index 20bc570..d16364c 100644 >--- a/src/include/liburing/io_uring.h >+++ b/src/include/liburing/io_uring.h >@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit { > */ > #define IORING_SETUP_NO_SQARRAY (1U << 16) > >+/* Use hybrid poll in iopoll process */ >+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17) >+ > enum io_uring_op { > IORING_OP_NOP, > IORING_OP_READV, >diff --git a/test/Makefile b/test/Makefile >index dfbbcbe..ea9452c 100644 >--- a/test/Makefile >+++ b/test/Makefile >@@ -116,6 +116,7 @@ test_srcs := \ > iopoll.c \ > iopoll-leak.c \ > iopoll-overflow.c \ >+ iopoll-hybridpoll.c \ > io_uring_enter.c \ > io_uring_passthrough.c \ > io_uring_register.c \ >diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c >new file mode 100644 >index 0000000..d7c08ae >--- /dev/null >+++ b/test/iopoll-hybridpoll.c >@@ -0,0 +1,544 @@ >+/* SPDX-License-Identifier: MIT */ >+/* >+ * Description: basic read/write tests with >+ * hybrid polled IO, include iopoll and io_uring >+ * passthrough. >+ */ >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include "helpers.h" >+#include "liburing.h" >+#include "../src/syscall.h" >+#include "nvme.h" >+ >+#define FILE_SIZE (128 * 1024) >+#define BS 4096 >+#define BUFFERS (FILE_SIZE / BS) >+ >+static struct iovec *vecs; >+static int no_pt, no_iopoll; >+ >+static int fill_pattern(int tc) >+{ >+ unsigned int val, *ptr; >+ int i, j; >+ int u_in_buf = BS / sizeof(val); >+ >+ val = (tc / 2) * FILE_SIZE; >+ for (i = 0; i < BUFFERS; i++) { >+ ptr = vecs[i].iov_base; >+ for (j = 0; j < u_in_buf; j++) { >+ *ptr = val; >+ val++; >+ ptr++; >+ } >+ } >+ >+ return 0; >+} >+ >+static int verify_buf(int tc, void *buf, off_t off) >+{ >+ int i, u_in_buf = BS / sizeof(unsigned int); >+ unsigned int *ptr; >+ >+ off /= sizeof(unsigned int); >+ off += (tc / 2) * FILE_SIZE; >+ ptr = buf; >+ for (i = 0; i < u_in_buf; i++) { >+ if (off != *ptr) { >+ fprintf(stderr, "Found %u, wanted %llu\n", *ptr, >+ (unsigned long long) off); >+ return 1; >+ } >+ ptr++; >+ off++; >+ } >+ >+ return 0; >+} >+ >+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write, >+ int sqthread, int fixed, int nonvec) >+{ >+ struct io_uring_sqe *sqe; >+ struct io_uring_cqe *cqe; >+ struct nvme_uring_cmd *cmd; >+ int open_flags; >+ int do_fixed; >+ int i, ret, fd = -1; >+ off_t offset; >+ __u64 slba; >+ __u32 nlb; >+ >+ if (write) >+ open_flags = O_WRONLY; >+ else >+ open_flags = O_RDONLY; >+ >+ if (fixed) { >+ ret = t_register_buffers(ring, vecs, BUFFERS); >+ if (ret == T_SETUP_SKIP) >+ return 0; >+ if (ret != T_SETUP_OK) { >+ fprintf(stderr, "buffer reg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ >+ fd = open(file, open_flags); >+ if (fd < 0) { >+ if (errno == EACCES || errno == EPERM) >+ return T_EXIT_SKIP; >+ perror("file open"); >+ goto err; >+ } >+ >+ if (sqthread) { >+ ret = io_uring_register_files(ring, &fd, 1); >+ if (ret) { >+ fprintf(stderr, "file reg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ >+ if (write) >+ fill_pattern(tc); >+ >+ offset = 0; >+ for (i = 0; i < BUFFERS; i++) { >+ sqe = io_uring_get_sqe(ring); >+ if (!sqe) { >+ fprintf(stderr, "sqe get failed\n"); >+ goto err; >+ } >+ if (write) { >+ int use_fd = fd; >+ >+ do_fixed = fixed; >+ >+ if (sqthread) >+ use_fd = 0; >+ if (fixed && (i & 1)) >+ do_fixed = 0; >+ if (do_fixed) { >+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, >+ offset, i); >+ sqe->cmd_op = NVME_URING_CMD_IO; >+ } else if (nonvec) { >+ io_uring_prep_write(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, offset); >+ sqe->cmd_op = NVME_URING_CMD_IO; >+ } else { >+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, >+ offset); >+ sqe->cmd_op = NVME_URING_CMD_IO_VEC; >+ } >+ } else { >+ int use_fd = fd; >+ >+ do_fixed = fixed; >+ >+ if (sqthread) >+ use_fd = 0; >+ if (fixed && (i & 1)) >+ do_fixed = 0; >+ if (do_fixed) { >+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, >+ offset, i); >+ sqe->cmd_op = NVME_URING_CMD_IO; >+ } else if (nonvec) { >+ io_uring_prep_read(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, offset); >+ sqe->cmd_op = NVME_URING_CMD_IO; >+ } else { >+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, >+ offset); >+ sqe->cmd_op = NVME_URING_CMD_IO_VEC; >+ } >+ } >+ sqe->opcode = IORING_OP_URING_CMD; >+ if (do_fixed) >+ sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED; >+ sqe->user_data = ((uint64_t)offset << 32) | i; >+ if (sqthread) >+ sqe->flags |= IOSQE_FIXED_FILE; >+ >+ cmd = (struct nvme_uring_cmd *)sqe->cmd; >+ memset(cmd, 0, sizeof(struct nvme_uring_cmd)); >+ >+ cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read; >+ >+ slba = offset >> lba_shift; >+ nlb = (BS >> lba_shift) - 1; >+ >+ /* cdw10 and cdw11 represent starting lba */ >+ cmd->cdw10 = slba & 0xffffffff; >+ cmd->cdw11 = slba >> 32; >+ /* cdw12 represent number of lba's for read/write */ >+ cmd->cdw12 = nlb; >+ if (do_fixed || nonvec) { >+ cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base; >+ cmd->data_len = vecs[i].iov_len; >+ } else { >+ cmd->addr = (__u64)(uintptr_t)&vecs[i]; >+ cmd->data_len = 1; >+ } >+ cmd->nsid = nsid; >+ >+ offset += BS; >+ } >+ >+ ret = io_uring_submit(ring); >+ if (ret != BUFFERS) { >+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); >+ goto err; >+ } >+ >+ for (i = 0; i < BUFFERS; i++) { >+ ret = io_uring_wait_cqe(ring, &cqe); >+ if (ret) { >+ fprintf(stderr, "wait_cqe=%d\n", ret); >+ goto err; >+ } >+ if (cqe->res != 0) { >+ if (!no_pt) { >+ no_pt = 1; >+ goto skip; >+ } >+ fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res); >+ goto err; >+ } >+ io_uring_cqe_seen(ring, cqe); >+ if (!write) { >+ int index = cqe->user_data & 0xffffffff; >+ void *buf = vecs[index].iov_base; >+ off_t voff = cqe->user_data >> 32; >+ >+ if (verify_buf(tc, buf, voff)) >+ goto err; >+ } >+ } >+ >+ if (fixed) { >+ ret = io_uring_unregister_buffers(ring); >+ if (ret) { >+ fprintf(stderr, "buffer unreg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ if (sqthread) { >+ ret = io_uring_unregister_files(ring); >+ if (ret) { >+ fprintf(stderr, "file unreg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ >+skip: >+ close(fd); >+ return 0; >+err: >+ if (fd != -1) >+ close(fd); >+ return 1; >+} >+ >+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread, >+ int fixed, int nonvec) >+{ >+ struct io_uring ring; >+ int ret, ring_flags = 0; >+ >+ ring_flags |= IORING_SETUP_SQE128; >+ ring_flags |= IORING_SETUP_CQE32; >+ ring_flags |= IORING_SETUP_HYBRID_IOPOLL; >+ >+ if (sqthread) >+ ring_flags |= IORING_SETUP_SQPOLL; >+ >+ ret = t_create_ring(64, &ring, ring_flags); >+ if (ret == T_SETUP_SKIP) >+ return 0; >+ if (ret != T_SETUP_OK) { >+ if (ret == -EINVAL) { >+ no_pt = 1; >+ return T_SETUP_SKIP; >+ } >+ fprintf(stderr, "ring create failed: %d\n", ret); >+ return 1; >+ } >+ >+ ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec); >+ io_uring_queue_exit(&ring); >+ >+ return ret; >+} >+ >+static int provide_buffers(struct io_uring *ring) >+{ >+ struct io_uring_sqe *sqe; >+ struct io_uring_cqe *cqe; >+ int ret, i; >+ >+ for (i = 0; i < BUFFERS; i++) { >+ sqe = io_uring_get_sqe(ring); >+ io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, >+ vecs[i].iov_len, 1, 1, i); >+ } >+ >+ ret = io_uring_submit(ring); >+ if (ret != BUFFERS) { >+ fprintf(stderr, "submit: %d\n", ret); >+ return 1; >+ } >+ >+ for (i = 0; i < BUFFERS; i++) { >+ ret = io_uring_wait_cqe(ring, &cqe); >+ if (cqe->res < 0) { >+ fprintf(stderr, "cqe->res=%d\n", cqe->res); >+ return 1; >+ } >+ io_uring_cqe_seen(ring, cqe); >+ } >+ >+ return 0; >+} >+ >+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread, >+ int fixed, int buf_select) >+{ >+ struct io_uring_sqe *sqe; >+ struct io_uring_cqe *cqe; >+ int open_flags; >+ int i, fd = -1, ret; >+ off_t offset; >+ >+ if (buf_select) { >+ write = 0; >+ fixed = 0; >+ } >+ if (buf_select && provide_buffers(ring)) >+ return 1; >+ >+ if (write) >+ open_flags = O_WRONLY; >+ else >+ open_flags = O_RDONLY; >+ open_flags |= O_DIRECT; >+ >+ if (fixed) { >+ ret = t_register_buffers(ring, vecs, BUFFERS); >+ if (ret == T_SETUP_SKIP) >+ return 0; >+ if (ret != T_SETUP_OK) { >+ fprintf(stderr, "buffer reg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ fd = open(file, open_flags); >+ if (fd < 0) { >+ if (errno == EINVAL || errno == EPERM || errno == EACCES) >+ return 0; >+ perror("file open"); >+ goto err; >+ } >+ if (sqthread) { >+ ret = io_uring_register_files(ring, &fd, 1); >+ if (ret) { >+ fprintf(stderr, "file reg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ >+ offset = 0; >+ for (i = 0; i < BUFFERS; i++) { >+ sqe = io_uring_get_sqe(ring); >+ if (!sqe) { >+ fprintf(stderr, "sqe get failed\n"); >+ goto err; >+ } >+ offset = BS * (rand() % BUFFERS); >+ if (write) { >+ int do_fixed = fixed; >+ int use_fd = fd; >+ >+ if (sqthread) >+ use_fd = 0; >+ if (fixed && (i & 1)) >+ do_fixed = 0; >+ if (do_fixed) { >+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, >+ offset, i); >+ } else { >+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, >+ offset); >+ } >+ } else { >+ int do_fixed = fixed; >+ int use_fd = fd; >+ >+ if (sqthread) >+ use_fd = 0; >+ if (fixed && (i & 1)) >+ do_fixed = 0; >+ if (do_fixed) { >+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, >+ vecs[i].iov_len, >+ offset, i); >+ } else { >+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, >+ offset); >+ } >+ >+ } >+ if (sqthread) >+ sqe->flags |= IOSQE_FIXED_FILE; >+ if (buf_select) { >+ sqe->flags |= IOSQE_BUFFER_SELECT; >+ sqe->buf_group = buf_select; >+ sqe->user_data = i; >+ } >+ } >+ >+ ret = io_uring_submit(ring); >+ if (ret != BUFFERS) { >+ ret = io_uring_peek_cqe(ring, &cqe); >+ if (!ret && cqe->res == -EOPNOTSUPP) { >+ no_iopoll = 1; >+ io_uring_cqe_seen(ring, cqe); >+ goto out; >+ } >+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); >+ goto err; >+ } >+ >+ for (i = 0; i < BUFFERS; i++) { >+ ret = io_uring_wait_cqe(ring, &cqe); >+ if (ret) { >+ fprintf(stderr, "wait_cqe=%d\n", ret); >+ goto err; >+ } else if (cqe->res == -EOPNOTSUPP) { >+ fprintf(stdout, "File/device/fs doesn't support polled IO\n"); >+ no_iopoll = 1; >+ goto out; >+ } else if (cqe->res != BS) { >+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); >+ goto err; >+ } >+ io_uring_cqe_seen(ring, cqe); >+ } >+ >+ if (fixed) { >+ ret = io_uring_unregister_buffers(ring); >+ if (ret) { >+ fprintf(stderr, "buffer unreg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ if (sqthread) { >+ ret = io_uring_unregister_files(ring); >+ if (ret) { >+ fprintf(stderr, "file unreg failed: %d\n", ret); >+ goto err; >+ } >+ } >+ >+out: >+ close(fd); >+ return 0; >+err: >+ if (fd != -1) >+ close(fd); >+ return 1; >+} >+ >+static int test_iopoll(const char *fname, int write, int sqthread, int fixed, >+ int buf_select, int defer) >+{ >+ struct io_uring ring; >+ int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL; >+ >+ if (no_iopoll) >+ return 0; >+ >+ if (defer) >+ ring_flags |= IORING_SETUP_SINGLE_ISSUER | >+ IORING_SETUP_DEFER_TASKRUN; >+ >+ ret = t_create_ring(64, &ring, ring_flags); >+ if (ret == T_SETUP_SKIP) >+ return 0; >+ if (ret != T_SETUP_OK) { >+ fprintf(stderr, "ring create failed: %d\n", ret); >+ return 1; >+ } >+ ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select); >+ io_uring_queue_exit(&ring); >+ return ret; >+} >+ >+int main(int argc, char *argv[]) >+{ >+ int i, ret; >+ char buf[256]; >+ char *fname; >+ >+ if (argc > 1) { >+ fname = argv[1]; >+ } else { >+ srand((unsigned)time(NULL)); >+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u", >+ (unsigned)rand(), (unsigned)getpid()); >+ fname = buf; >+ t_create_file(fname, FILE_SIZE); >+ } >+ >+ vecs = t_create_buffers(BUFFERS, BS); >+ >+ for (i = 0; i < 16; i++) { >+ int write = (i & 1) != 0; >+ int sqthread = (i & 2) != 0; >+ int fixed = (i & 4) != 0; >+ int buf_select = (i & 8) != 0; >+ int defer = (i & 16) != 0; >+ int nonvec = buf_select; >+ >+ ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer); >+ if (ret) { >+ fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n", >+ write, sqthread, fixed, buf_select, defer); >+ goto err; >+ } >+ if (no_iopoll) >+ break; >+ >+ ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec); >+ if (no_pt) >+ break; >+ if (ret) { >+ fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n", >+ write, sqthread, fixed, nonvec); >+ goto err; >+ } >+ } >+ >+ if (fname != argv[1]) >+ unlink(fname); >+ return ret; >+err: >+ if (fname != argv[1]) >+ unlink(fname); >+ return T_EXIT_FAIL; >+} This patch mostly looks fine. But the code here seems to be largely duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c". Can we consider adding the hybrid poll test as a part of the existing tests as it seems that it would only require passing a extra flag during ring setup. >-- >2.34.1 >