* [PATCH liburing] test: add test cases for hybrid iopoll
[not found] <CGME20241111123656epcas5p20cac863708cd83d1fdbb523625665273@epcas5p2.samsung.com>
@ 2024-11-11 12:36 ` hexue
2024-11-12 10:44 ` Anuj Gupta
2024-11-13 3:32 ` Pavel Begunkov
0 siblings, 2 replies; 8+ messages in thread
From: hexue @ 2024-11-11 12:36 UTC (permalink / raw)
To: axboe, asml.silence; +Cc: io-uring, linux-kernel, hexue
Add a test file for hybrid iopoll to make sure it works safe.Testcass
include basic read/write tests, and run in normal iopoll mode and
passthrough mode respectively.
Signed-off-by: hexue <[email protected]>
---
man/io_uring_setup.2 | 10 +-
src/include/liburing/io_uring.h | 3 +
test/Makefile | 1 +
test/iopoll-hybridpoll.c | 544 ++++++++++++++++++++++++++++++++
4 files changed, 557 insertions(+), 1 deletion(-)
create mode 100644 test/iopoll-hybridpoll.c
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 2f87783..8cfafdc 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
parameter set to the desired number of polling queues. The polling queues
will be shared appropriately between the CPUs in the system, if the number
is less than the number of online CPU threads.
-
+.TP
+.B IORING_SETUP_HYBRID_IOPOLL
+This flag must setup with
+.B IORING_SETUP_IOPOLL
+flag. hybrid poll is a new
+feature baed on iopoll, this could be a suboptimal solution when running
+on a single thread, it offers higher performance than IRQ and lower CPU
+utilization than polling. Similarly, this feature also requires the devices
+to support polling configuration.
.TP
.B IORING_SETUP_SQPOLL
When this flag is specified, a kernel thread is created to perform
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 20bc570..d16364c 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
*/
#define IORING_SETUP_NO_SQARRAY (1U << 16)
+/* Use hybrid poll in iopoll process */
+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
diff --git a/test/Makefile b/test/Makefile
index dfbbcbe..ea9452c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -116,6 +116,7 @@ test_srcs := \
iopoll.c \
iopoll-leak.c \
iopoll-overflow.c \
+ iopoll-hybridpoll.c \
io_uring_enter.c \
io_uring_passthrough.c \
io_uring_register.c \
diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
new file mode 100644
index 0000000..d7c08ae
--- /dev/null
+++ b/test/iopoll-hybridpoll.c
@@ -0,0 +1,544 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: basic read/write tests with
+ * hybrid polled IO, include iopoll and io_uring
+ * passthrough.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+#include "nvme.h"
+
+#define FILE_SIZE (128 * 1024)
+#define BS 4096
+#define BUFFERS (FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int no_pt, no_iopoll;
+
+static int fill_pattern(int tc)
+{
+ unsigned int val, *ptr;
+ int i, j;
+ int u_in_buf = BS / sizeof(val);
+
+ val = (tc / 2) * FILE_SIZE;
+ for (i = 0; i < BUFFERS; i++) {
+ ptr = vecs[i].iov_base;
+ for (j = 0; j < u_in_buf; j++) {
+ *ptr = val;
+ val++;
+ ptr++;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_buf(int tc, void *buf, off_t off)
+{
+ int i, u_in_buf = BS / sizeof(unsigned int);
+ unsigned int *ptr;
+
+ off /= sizeof(unsigned int);
+ off += (tc / 2) * FILE_SIZE;
+ ptr = buf;
+ for (i = 0; i < u_in_buf; i++) {
+ if (off != *ptr) {
+ fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
+ (unsigned long long) off);
+ return 1;
+ }
+ ptr++;
+ off++;
+ }
+
+ return 0;
+}
+
+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
+ int sqthread, int fixed, int nonvec)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct nvme_uring_cmd *cmd;
+ int open_flags;
+ int do_fixed;
+ int i, ret, fd = -1;
+ off_t offset;
+ __u64 slba;
+ __u32 nlb;
+
+ if (write)
+ open_flags = O_WRONLY;
+ else
+ open_flags = O_RDONLY;
+
+ if (fixed) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ fd = open(file, open_flags);
+ if (fd < 0) {
+ if (errno == EACCES || errno == EPERM)
+ return T_EXIT_SKIP;
+ perror("file open");
+ goto err;
+ }
+
+ if (sqthread) {
+ ret = io_uring_register_files(ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "file reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ if (write)
+ fill_pattern(tc);
+
+ offset = 0;
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ if (write) {
+ int use_fd = fd;
+
+ do_fixed = fixed;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else if (nonvec) {
+ io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len, offset);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else {
+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+ offset);
+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+ }
+ } else {
+ int use_fd = fd;
+
+ do_fixed = fixed;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else if (nonvec) {
+ io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len, offset);
+ sqe->cmd_op = NVME_URING_CMD_IO;
+ } else {
+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+ offset);
+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+ }
+ }
+ sqe->opcode = IORING_OP_URING_CMD;
+ if (do_fixed)
+ sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
+ sqe->user_data = ((uint64_t)offset << 32) | i;
+ if (sqthread)
+ sqe->flags |= IOSQE_FIXED_FILE;
+
+ cmd = (struct nvme_uring_cmd *)sqe->cmd;
+ memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+ cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
+
+ slba = offset >> lba_shift;
+ nlb = (BS >> lba_shift) - 1;
+
+ /* cdw10 and cdw11 represent starting lba */
+ cmd->cdw10 = slba & 0xffffffff;
+ cmd->cdw11 = slba >> 32;
+ /* cdw12 represent number of lba's for read/write */
+ cmd->cdw12 = nlb;
+ if (do_fixed || nonvec) {
+ cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
+ cmd->data_len = vecs[i].iov_len;
+ } else {
+ cmd->addr = (__u64)(uintptr_t)&vecs[i];
+ cmd->data_len = 1;
+ }
+ cmd->nsid = nsid;
+
+ offset += BS;
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+ goto err;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ goto err;
+ }
+ if (cqe->res != 0) {
+ if (!no_pt) {
+ no_pt = 1;
+ goto skip;
+ }
+ fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
+ goto err;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ if (!write) {
+ int index = cqe->user_data & 0xffffffff;
+ void *buf = vecs[index].iov_base;
+ off_t voff = cqe->user_data >> 32;
+
+ if (verify_buf(tc, buf, voff))
+ goto err;
+ }
+ }
+
+ if (fixed) {
+ ret = io_uring_unregister_buffers(ring);
+ if (ret) {
+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ if (sqthread) {
+ ret = io_uring_unregister_files(ring);
+ if (ret) {
+ fprintf(stderr, "file unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+skip:
+ close(fd);
+ return 0;
+err:
+ if (fd != -1)
+ close(fd);
+ return 1;
+}
+
+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
+ int fixed, int nonvec)
+{
+ struct io_uring ring;
+ int ret, ring_flags = 0;
+
+ ring_flags |= IORING_SETUP_SQE128;
+ ring_flags |= IORING_SETUP_CQE32;
+ ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
+
+ if (sqthread)
+ ring_flags |= IORING_SETUP_SQPOLL;
+
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ if (ret == -EINVAL) {
+ no_pt = 1;
+ return T_SETUP_SKIP;
+ }
+ fprintf(stderr, "ring create failed: %d\n", ret);
+ return 1;
+ }
+
+ ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
+ io_uring_queue_exit(&ring);
+
+ return ret;
+}
+
+static int provide_buffers(struct io_uring *ring)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ int ret, i;
+
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
+ vecs[i].iov_len, 1, 1, i);
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ fprintf(stderr, "submit: %d\n", ret);
+ return 1;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (cqe->res < 0) {
+ fprintf(stderr, "cqe->res=%d\n", cqe->res);
+ return 1;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+
+ return 0;
+}
+
+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
+ int fixed, int buf_select)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ int open_flags;
+ int i, fd = -1, ret;
+ off_t offset;
+
+ if (buf_select) {
+ write = 0;
+ fixed = 0;
+ }
+ if (buf_select && provide_buffers(ring))
+ return 1;
+
+ if (write)
+ open_flags = O_WRONLY;
+ else
+ open_flags = O_RDONLY;
+ open_flags |= O_DIRECT;
+
+ if (fixed) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ fd = open(file, open_flags);
+ if (fd < 0) {
+ if (errno == EINVAL || errno == EPERM || errno == EACCES)
+ return 0;
+ perror("file open");
+ goto err;
+ }
+ if (sqthread) {
+ ret = io_uring_register_files(ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "file reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+ offset = 0;
+ for (i = 0; i < BUFFERS; i++) {
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ offset = BS * (rand() % BUFFERS);
+ if (write) {
+ int do_fixed = fixed;
+ int use_fd = fd;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ } else {
+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+ offset);
+ }
+ } else {
+ int do_fixed = fixed;
+ int use_fd = fd;
+
+ if (sqthread)
+ use_fd = 0;
+ if (fixed && (i & 1))
+ do_fixed = 0;
+ if (do_fixed) {
+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+ vecs[i].iov_len,
+ offset, i);
+ } else {
+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+ offset);
+ }
+
+ }
+ if (sqthread)
+ sqe->flags |= IOSQE_FIXED_FILE;
+ if (buf_select) {
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+ sqe->buf_group = buf_select;
+ sqe->user_data = i;
+ }
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != BUFFERS) {
+ ret = io_uring_peek_cqe(ring, &cqe);
+ if (!ret && cqe->res == -EOPNOTSUPP) {
+ no_iopoll = 1;
+ io_uring_cqe_seen(ring, cqe);
+ goto out;
+ }
+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+ goto err;
+ }
+
+ for (i = 0; i < BUFFERS; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ goto err;
+ } else if (cqe->res == -EOPNOTSUPP) {
+ fprintf(stdout, "File/device/fs doesn't support polled IO\n");
+ no_iopoll = 1;
+ goto out;
+ } else if (cqe->res != BS) {
+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
+ goto err;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+
+ if (fixed) {
+ ret = io_uring_unregister_buffers(ring);
+ if (ret) {
+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+ if (sqthread) {
+ ret = io_uring_unregister_files(ring);
+ if (ret) {
+ fprintf(stderr, "file unreg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
+out:
+ close(fd);
+ return 0;
+err:
+ if (fd != -1)
+ close(fd);
+ return 1;
+}
+
+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
+ int buf_select, int defer)
+{
+ struct io_uring ring;
+ int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
+
+ if (no_iopoll)
+ return 0;
+
+ if (defer)
+ ring_flags |= IORING_SETUP_SINGLE_ISSUER |
+ IORING_SETUP_DEFER_TASKRUN;
+
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "ring create failed: %d\n", ret);
+ return 1;
+ }
+ ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
+ io_uring_queue_exit(&ring);
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, ret;
+ char buf[256];
+ char *fname;
+
+ if (argc > 1) {
+ fname = argv[1];
+ } else {
+ srand((unsigned)time(NULL));
+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+ (unsigned)rand(), (unsigned)getpid());
+ fname = buf;
+ t_create_file(fname, FILE_SIZE);
+ }
+
+ vecs = t_create_buffers(BUFFERS, BS);
+
+ for (i = 0; i < 16; i++) {
+ int write = (i & 1) != 0;
+ int sqthread = (i & 2) != 0;
+ int fixed = (i & 4) != 0;
+ int buf_select = (i & 8) != 0;
+ int defer = (i & 16) != 0;
+ int nonvec = buf_select;
+
+ ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
+ if (ret) {
+ fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
+ write, sqthread, fixed, buf_select, defer);
+ goto err;
+ }
+ if (no_iopoll)
+ break;
+
+ ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
+ if (no_pt)
+ break;
+ if (ret) {
+ fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
+ write, sqthread, fixed, nonvec);
+ goto err;
+ }
+ }
+
+ if (fname != argv[1])
+ unlink(fname);
+ return ret;
+err:
+ if (fname != argv[1])
+ unlink(fname);
+ return T_EXIT_FAIL;
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH liburing] test: add test cases for hybrid iopoll
2024-11-11 12:36 ` [PATCH liburing] test: add test cases for hybrid iopoll hexue
@ 2024-11-12 10:44 ` Anuj Gupta
2024-11-12 15:43 ` Jens Axboe
[not found] ` <CGME20241113062658epcas5p3d7234648ac86e7a16dab96c3c1d5dc98@epcas5p3.samsung.com>
2024-11-13 3:32 ` Pavel Begunkov
1 sibling, 2 replies; 8+ messages in thread
From: Anuj Gupta @ 2024-11-12 10:44 UTC (permalink / raw)
To: hexue; +Cc: axboe, asml.silence, io-uring, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 15593 bytes --]
On 11/11/24 08:36PM, hexue wrote:
>Add a test file for hybrid iopoll to make sure it works safe.Testcass
>include basic read/write tests, and run in normal iopoll mode and
>passthrough mode respectively.
>
>Signed-off-by: hexue <[email protected]>
>---
> man/io_uring_setup.2 | 10 +-
> src/include/liburing/io_uring.h | 3 +
> test/Makefile | 1 +
i> test/iopoll-hybridpoll.c | 544 ++++++++++++++++++++++++++++++++
> 4 files changed, 557 insertions(+), 1 deletion(-)
> create mode 100644 test/iopoll-hybridpoll.c
>
>diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>index 2f87783..8cfafdc 100644
>--- a/man/io_uring_setup.2
>+++ b/man/io_uring_setup.2
>@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
> parameter set to the desired number of polling queues. The polling queues
> will be shared appropriately between the CPUs in the system, if the number
> is less than the number of online CPU threads.
>-
>+.TP
>+.B IORING_SETUP_HYBRID_IOPOLL
>+This flag must setup with
s/<must setup with>/<must be used with>
>+.B IORING_SETUP_IOPOLL
>+flag. hybrid poll is a new
s/hybrid/Hybrid
"new" is probably not required here
>+feature baed on iopoll, this could be a suboptimal solution when running
s/baed/based
s/<this could>/<which might>
>+on a single thread, it offers higher performance than IRQ and lower CPU
s/<, it>/<. It>
>+utilization than polling. Similarly, this feature also requires the devices
>+to support polling configuration.
This feature would work if a device doesn't have polled queues,right?
The performance might be suboptimal in that case, but the userspace won't
get any errors.
> .TP
> .B IORING_SETUP_SQPOLL
> When this flag is specified, a kernel thread is created to perform
>diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
>index 20bc570..d16364c 100644
>--- a/src/include/liburing/io_uring.h
>+++ b/src/include/liburing/io_uring.h
>@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
> */
> #define IORING_SETUP_NO_SQARRAY (1U << 16)
>
>+/* Use hybrid poll in iopoll process */
>+#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
>+
> enum io_uring_op {
> IORING_OP_NOP,
> IORING_OP_READV,
>diff --git a/test/Makefile b/test/Makefile
>index dfbbcbe..ea9452c 100644
>--- a/test/Makefile
>+++ b/test/Makefile
>@@ -116,6 +116,7 @@ test_srcs := \
> iopoll.c \
> iopoll-leak.c \
> iopoll-overflow.c \
>+ iopoll-hybridpoll.c \
> io_uring_enter.c \
> io_uring_passthrough.c \
> io_uring_register.c \
>diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
>new file mode 100644
>index 0000000..d7c08ae
>--- /dev/null
>+++ b/test/iopoll-hybridpoll.c
>@@ -0,0 +1,544 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Description: basic read/write tests with
>+ * hybrid polled IO, include iopoll and io_uring
>+ * passthrough.
>+ */
>+#include <errno.h>
>+#include <stdio.h>
>+#include <unistd.h>
>+#include <stdlib.h>
>+#include <string.h>
>+#include <fcntl.h>
>+#include <sys/types.h>
>+#include <poll.h>
>+#include <sys/eventfd.h>
>+#include <sys/resource.h>
>+#include "helpers.h"
>+#include "liburing.h"
>+#include "../src/syscall.h"
>+#include "nvme.h"
>+
>+#define FILE_SIZE (128 * 1024)
>+#define BS 4096
>+#define BUFFERS (FILE_SIZE / BS)
>+
>+static struct iovec *vecs;
>+static int no_pt, no_iopoll;
>+
>+static int fill_pattern(int tc)
>+{
>+ unsigned int val, *ptr;
>+ int i, j;
>+ int u_in_buf = BS / sizeof(val);
>+
>+ val = (tc / 2) * FILE_SIZE;
>+ for (i = 0; i < BUFFERS; i++) {
>+ ptr = vecs[i].iov_base;
>+ for (j = 0; j < u_in_buf; j++) {
>+ *ptr = val;
>+ val++;
>+ ptr++;
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+static int verify_buf(int tc, void *buf, off_t off)
>+{
>+ int i, u_in_buf = BS / sizeof(unsigned int);
>+ unsigned int *ptr;
>+
>+ off /= sizeof(unsigned int);
>+ off += (tc / 2) * FILE_SIZE;
>+ ptr = buf;
>+ for (i = 0; i < u_in_buf; i++) {
>+ if (off != *ptr) {
>+ fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
>+ (unsigned long long) off);
>+ return 1;
>+ }
>+ ptr++;
>+ off++;
>+ }
>+
>+ return 0;
>+}
>+
>+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
>+ int sqthread, int fixed, int nonvec)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ struct nvme_uring_cmd *cmd;
>+ int open_flags;
>+ int do_fixed;
>+ int i, ret, fd = -1;
>+ off_t offset;
>+ __u64 slba;
>+ __u32 nlb;
>+
>+ if (write)
>+ open_flags = O_WRONLY;
>+ else
>+ open_flags = O_RDONLY;
>+
>+ if (fixed) {
>+ ret = t_register_buffers(ring, vecs, BUFFERS);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "buffer reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ fd = open(file, open_flags);
>+ if (fd < 0) {
>+ if (errno == EACCES || errno == EPERM)
>+ return T_EXIT_SKIP;
>+ perror("file open");
>+ goto err;
>+ }
>+
>+ if (sqthread) {
>+ ret = io_uring_register_files(ring, &fd, 1);
>+ if (ret) {
>+ fprintf(stderr, "file reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ if (write)
>+ fill_pattern(tc);
>+
>+ offset = 0;
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ if (!sqe) {
>+ fprintf(stderr, "sqe get failed\n");
>+ goto err;
>+ }
>+ if (write) {
>+ int use_fd = fd;
>+
>+ do_fixed = fixed;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else if (nonvec) {
>+ io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len, offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else {
>+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+ }
>+ } else {
>+ int use_fd = fd;
>+
>+ do_fixed = fixed;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else if (nonvec) {
>+ io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len, offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO;
>+ } else {
>+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+ }
>+ }
>+ sqe->opcode = IORING_OP_URING_CMD;
>+ if (do_fixed)
>+ sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
>+ sqe->user_data = ((uint64_t)offset << 32) | i;
>+ if (sqthread)
>+ sqe->flags |= IOSQE_FIXED_FILE;
>+
>+ cmd = (struct nvme_uring_cmd *)sqe->cmd;
>+ memset(cmd, 0, sizeof(struct nvme_uring_cmd));
>+
>+ cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
>+
>+ slba = offset >> lba_shift;
>+ nlb = (BS >> lba_shift) - 1;
>+
>+ /* cdw10 and cdw11 represent starting lba */
>+ cmd->cdw10 = slba & 0xffffffff;
>+ cmd->cdw11 = slba >> 32;
>+ /* cdw12 represent number of lba's for read/write */
>+ cmd->cdw12 = nlb;
>+ if (do_fixed || nonvec) {
>+ cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
>+ cmd->data_len = vecs[i].iov_len;
>+ } else {
>+ cmd->addr = (__u64)(uintptr_t)&vecs[i];
>+ cmd->data_len = 1;
>+ }
>+ cmd->nsid = nsid;
>+
>+ offset += BS;
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+ goto err;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (ret) {
>+ fprintf(stderr, "wait_cqe=%d\n", ret);
>+ goto err;
>+ }
>+ if (cqe->res != 0) {
>+ if (!no_pt) {
>+ no_pt = 1;
>+ goto skip;
>+ }
>+ fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
>+ goto err;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ if (!write) {
>+ int index = cqe->user_data & 0xffffffff;
>+ void *buf = vecs[index].iov_base;
>+ off_t voff = cqe->user_data >> 32;
>+
>+ if (verify_buf(tc, buf, voff))
>+ goto err;
>+ }
>+ }
>+
>+ if (fixed) {
>+ ret = io_uring_unregister_buffers(ring);
>+ if (ret) {
>+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ if (sqthread) {
>+ ret = io_uring_unregister_files(ring);
>+ if (ret) {
>+ fprintf(stderr, "file unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+skip:
>+ close(fd);
>+ return 0;
>+err:
>+ if (fd != -1)
>+ close(fd);
>+ return 1;
>+}
>+
>+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
>+ int fixed, int nonvec)
>+{
>+ struct io_uring ring;
>+ int ret, ring_flags = 0;
>+
>+ ring_flags |= IORING_SETUP_SQE128;
>+ ring_flags |= IORING_SETUP_CQE32;
>+ ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
>+
>+ if (sqthread)
>+ ring_flags |= IORING_SETUP_SQPOLL;
>+
>+ ret = t_create_ring(64, &ring, ring_flags);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ if (ret == -EINVAL) {
>+ no_pt = 1;
>+ return T_SETUP_SKIP;
>+ }
>+ fprintf(stderr, "ring create failed: %d\n", ret);
>+ return 1;
>+ }
>+
>+ ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
>+ io_uring_queue_exit(&ring);
>+
>+ return ret;
>+}
>+
>+static int provide_buffers(struct io_uring *ring)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ int ret, i;
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
>+ vecs[i].iov_len, 1, 1, i);
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ fprintf(stderr, "submit: %d\n", ret);
>+ return 1;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (cqe->res < 0) {
>+ fprintf(stderr, "cqe->res=%d\n", cqe->res);
>+ return 1;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ }
>+
>+ return 0;
>+}
>+
>+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
>+ int fixed, int buf_select)
>+{
>+ struct io_uring_sqe *sqe;
>+ struct io_uring_cqe *cqe;
>+ int open_flags;
>+ int i, fd = -1, ret;
>+ off_t offset;
>+
>+ if (buf_select) {
>+ write = 0;
>+ fixed = 0;
>+ }
>+ if (buf_select && provide_buffers(ring))
>+ return 1;
>+
>+ if (write)
>+ open_flags = O_WRONLY;
>+ else
>+ open_flags = O_RDONLY;
>+ open_flags |= O_DIRECT;
>+
>+ if (fixed) {
>+ ret = t_register_buffers(ring, vecs, BUFFERS);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "buffer reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ fd = open(file, open_flags);
>+ if (fd < 0) {
>+ if (errno == EINVAL || errno == EPERM || errno == EACCES)
>+ return 0;
>+ perror("file open");
>+ goto err;
>+ }
>+ if (sqthread) {
>+ ret = io_uring_register_files(ring, &fd, 1);
>+ if (ret) {
>+ fprintf(stderr, "file reg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+ offset = 0;
>+ for (i = 0; i < BUFFERS; i++) {
>+ sqe = io_uring_get_sqe(ring);
>+ if (!sqe) {
>+ fprintf(stderr, "sqe get failed\n");
>+ goto err;
>+ }
>+ offset = BS * (rand() % BUFFERS);
>+ if (write) {
>+ int do_fixed = fixed;
>+ int use_fd = fd;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ } else {
>+ io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ }
>+ } else {
>+ int do_fixed = fixed;
>+ int use_fd = fd;
>+
>+ if (sqthread)
>+ use_fd = 0;
>+ if (fixed && (i & 1))
>+ do_fixed = 0;
>+ if (do_fixed) {
>+ io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+ vecs[i].iov_len,
>+ offset, i);
>+ } else {
>+ io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+ offset);
>+ }
>+
>+ }
>+ if (sqthread)
>+ sqe->flags |= IOSQE_FIXED_FILE;
>+ if (buf_select) {
>+ sqe->flags |= IOSQE_BUFFER_SELECT;
>+ sqe->buf_group = buf_select;
>+ sqe->user_data = i;
>+ }
>+ }
>+
>+ ret = io_uring_submit(ring);
>+ if (ret != BUFFERS) {
>+ ret = io_uring_peek_cqe(ring, &cqe);
>+ if (!ret && cqe->res == -EOPNOTSUPP) {
>+ no_iopoll = 1;
>+ io_uring_cqe_seen(ring, cqe);
>+ goto out;
>+ }
>+ fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+ goto err;
>+ }
>+
>+ for (i = 0; i < BUFFERS; i++) {
>+ ret = io_uring_wait_cqe(ring, &cqe);
>+ if (ret) {
>+ fprintf(stderr, "wait_cqe=%d\n", ret);
>+ goto err;
>+ } else if (cqe->res == -EOPNOTSUPP) {
>+ fprintf(stdout, "File/device/fs doesn't support polled IO\n");
>+ no_iopoll = 1;
>+ goto out;
>+ } else if (cqe->res != BS) {
>+ fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
>+ goto err;
>+ }
>+ io_uring_cqe_seen(ring, cqe);
>+ }
>+
>+ if (fixed) {
>+ ret = io_uring_unregister_buffers(ring);
>+ if (ret) {
>+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+ if (sqthread) {
>+ ret = io_uring_unregister_files(ring);
>+ if (ret) {
>+ fprintf(stderr, "file unreg failed: %d\n", ret);
>+ goto err;
>+ }
>+ }
>+
>+out:
>+ close(fd);
>+ return 0;
>+err:
>+ if (fd != -1)
>+ close(fd);
>+ return 1;
>+}
>+
>+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
>+ int buf_select, int defer)
>+{
>+ struct io_uring ring;
>+ int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
>+
>+ if (no_iopoll)
>+ return 0;
>+
>+ if (defer)
>+ ring_flags |= IORING_SETUP_SINGLE_ISSUER |
>+ IORING_SETUP_DEFER_TASKRUN;
>+
>+ ret = t_create_ring(64, &ring, ring_flags);
>+ if (ret == T_SETUP_SKIP)
>+ return 0;
>+ if (ret != T_SETUP_OK) {
>+ fprintf(stderr, "ring create failed: %d\n", ret);
>+ return 1;
>+ }
>+ ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
>+ io_uring_queue_exit(&ring);
>+ return ret;
>+}
>+
>+int main(int argc, char *argv[])
>+{
>+ int i, ret;
>+ char buf[256];
>+ char *fname;
>+
>+ if (argc > 1) {
>+ fname = argv[1];
>+ } else {
>+ srand((unsigned)time(NULL));
>+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
>+ (unsigned)rand(), (unsigned)getpid());
>+ fname = buf;
>+ t_create_file(fname, FILE_SIZE);
>+ }
>+
>+ vecs = t_create_buffers(BUFFERS, BS);
>+
>+ for (i = 0; i < 16; i++) {
>+ int write = (i & 1) != 0;
>+ int sqthread = (i & 2) != 0;
>+ int fixed = (i & 4) != 0;
>+ int buf_select = (i & 8) != 0;
>+ int defer = (i & 16) != 0;
>+ int nonvec = buf_select;
>+
>+ ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
>+ if (ret) {
>+ fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
>+ write, sqthread, fixed, buf_select, defer);
>+ goto err;
>+ }
>+ if (no_iopoll)
>+ break;
>+
>+ ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
>+ if (no_pt)
>+ break;
>+ if (ret) {
>+ fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
>+ write, sqthread, fixed, nonvec);
>+ goto err;
>+ }
>+ }
>+
>+ if (fname != argv[1])
>+ unlink(fname);
>+ return ret;
>+err:
>+ if (fname != argv[1])
>+ unlink(fname);
>+ return T_EXIT_FAIL;
>+}
This patch mostly looks fine. But the code here seems to be largely
duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
Can we consider adding the hybrid poll test as a part of the existing
tests as it seems that it would only require passing a extra flag
during ring setup.
>--
>2.34.1
>
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH liburing] test: add test cases for hybrid iopoll
2024-11-12 10:44 ` Anuj Gupta
@ 2024-11-12 15:43 ` Jens Axboe
[not found] ` <CGME20241113062730epcas5p10bec3fe462b9b6e65d22a61c50902b78@epcas5p1.samsung.com>
[not found] ` <CGME20241113062658epcas5p3d7234648ac86e7a16dab96c3c1d5dc98@epcas5p3.samsung.com>
1 sibling, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2024-11-12 15:43 UTC (permalink / raw)
To: Anuj Gupta, hexue; +Cc: asml.silence, io-uring, linux-kernel
On 11/12/24 3:44 AM, Anuj Gupta wrote:
>> +utilization than polling. Similarly, this feature also requires the devices
>> +to support polling configuration.
> This feature would work if a device doesn't have polled queues,right?
> The performance might be suboptimal in that case, but the userspace won't
> get any errors.
We've traditionally been a mix of lax and strict on this. IMHO we should
return -EOPTNOTSUPP for IOPOLL (and IOPOLL|HYBRID) if polling isn't
configured correctly. I've seen way too many not realize that they need
to configure their nvme side for pollable queues for it to do what it
needs to do. If you don't and it's just allowed, then you don't really
get much of a win, you're just burning CPU.
Hence I do think that this should strongly recommend that the devices
support polling, that part is fine.
Agree with your other comments, thanks for reviewing it!
> This patch mostly looks fine. But the code here seems to be largely
> duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
> Can we consider adding the hybrid poll test as a part of the existing
> tests as it seems that it would only require passing a extra flag
> during ring setup.
Yeah I do think modifying test/iopoll.c to test all the same
configurations but with HYBRID added would be the way to go, rather than
duplicate all of this. Ditto for passthrough.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH liburing] test: add test cases for hybrid iopoll
2024-11-11 12:36 ` [PATCH liburing] test: add test cases for hybrid iopoll hexue
2024-11-12 10:44 ` Anuj Gupta
@ 2024-11-13 3:32 ` Pavel Begunkov
[not found] ` <CGME20241113062517epcas5p22b01ddf9f29123ddcd7ffc63f2ce9254@epcas5p2.samsung.com>
1 sibling, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2024-11-13 3:32 UTC (permalink / raw)
To: hexue, axboe; +Cc: io-uring, linux-kernel
On 11/11/24 12:36, hexue wrote:
> Add a test file for hybrid iopoll to make sure it works safe.Testcass
> include basic read/write tests, and run in normal iopoll mode and
> passthrough mode respectively.
>
> Signed-off-by: hexue <[email protected]>
If it's not covered already please add tests for failure cases.
E.g. when SETUP_HYBRID_IOPOLL is set without SETUP_IOPOLL
> +static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
> + int fixed, int nonvec)
> +{
> + struct io_uring ring;
> + int ret, ring_flags = 0;
> +
> + ring_flags |= IORING_SETUP_SQE128;
> + ring_flags |= IORING_SETUP_CQE32;
> + ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
> +
> + if (sqthread)
> + ring_flags |= IORING_SETUP_SQPOLL;
Doesn't it also need IORING_SETUP_IOPOLL?
> +
> + ret = t_create_ring(64, &ring, ring_flags);
> + if (ret == T_SETUP_SKIP)
> + return 0;
> + if (ret != T_SETUP_OK) {
> + if (ret == -EINVAL) {
> + no_pt = 1;
> + return T_SETUP_SKIP;
> + }
> + fprintf(stderr, "ring create failed: %d\n", ret);
> + return 1;
> + }
> +
> + ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
> + io_uring_queue_exit(&ring);
> +
> + return ret;
> +}
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Re: [PATCH liburing] test: add test cases for hybrid iopoll
[not found] ` <CGME20241113062517epcas5p22b01ddf9f29123ddcd7ffc63f2ce9254@epcas5p2.samsung.com>
@ 2024-11-13 6:25 ` hexue
0 siblings, 0 replies; 8+ messages in thread
From: hexue @ 2024-11-13 6:25 UTC (permalink / raw)
To: asml.silence; +Cc: axboe, io-uring, linux-kernel
On 11/13/24 3:32, Pavel Begunkov wrote:
>On 11/11/24 12:36, hexue wrote:
>> Add a test file for hybrid iopoll to make sure it works safe.Testcass
>> include basic read/write tests, and run in normal iopoll mode and
>> passthrough mode respectively.
>>
>> Signed-off-by: hexue <[email protected]>
>If it's not covered already please add tests for failure cases.
>E.g. when SETUP_HYBRID_IOPOLL is set without SETUP_IOPOLL
I'll thinking about this part.
>> + ring_flags |= IORING_SETUP_SQE128;
>> + ring_flags |= IORING_SETUP_CQE32;
>> + ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
>> +
>> + if (sqthread)
>> + ring_flags |= IORING_SETUP_SQPOLL;
>Doesn't it also need IORING_SETUP_IOPOLL?
You're right, will fix this.
--
hexue
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Re: [PATCH liburing] test: add test cases for hybrid iopoll
[not found] ` <CGME20241113062658epcas5p3d7234648ac86e7a16dab96c3c1d5dc98@epcas5p3.samsung.com>
@ 2024-11-13 6:26 ` hexue
0 siblings, 0 replies; 8+ messages in thread
From: hexue @ 2024-11-13 6:26 UTC (permalink / raw)
To: anuj20.g; +Cc: asml.silence, axboe, io-uring, linux-kernel
On 11/12/24 3:44 AM, Anuj Gupta wrote:
>> +utilization than polling. Similarly, this feature also requires the devices
>> +to support polling configuration.
> This feature would work if a device doesn't have polled queues,right?
> The performance might be suboptimal in that case, but the userspace won't
> get any errors.
The usersapce will get error in this case. Hybrid must be dependent on iopoll,
if hybrid polling is set separately,it will not pass detection and an error will
be returned. Therefore, device end cnnfigured are required for polling.
......
> This patch mostly looks fine. But the code here seems to be largely
> duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
> Can we consider adding the hybrid poll test as a part of the existing
> tests as it seems that it would only require passing a extra flag
> during ring setup.
Make sense, I will try to do this in exsiting test file. Thank you very much.
--
hexue
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Re: [PATCH liburing] test: add test cases for hybrid iopoll
[not found] ` <CGME20241113062730epcas5p10bec3fe462b9b6e65d22a61c50902b78@epcas5p1.samsung.com>
@ 2024-11-13 6:27 ` hexue
0 siblings, 0 replies; 8+ messages in thread
From: hexue @ 2024-11-13 6:27 UTC (permalink / raw)
To: axboe; +Cc: anuj20.g, asml.silence, io-uring, linux-kernel
On 11/12/24 15:43, Jens Axboe wrote:
>On 11/12/24 3:44 AM, Anuj Gupta wrote:
>>> +utilization than polling. Similarly, this feature also requires the devices
>>> +to support polling configuration.
>> This feature would work if a device doesn't have polled queues,right?
>> The performance might be suboptimal in that case, but the userspace won't
>> get any errors.
>We've traditionally been a mix of lax and strict on this. IMHO we should
>return -EOPTNOTSUPP for IOPOLL (and IOPOLL|HYBRID) if polling isn't
>configured correctly. I've seen way too many not realize that they need
>to configure their nvme side for pollable queues for it to do what it
>needs to do. If you don't and it's just allowed, then you don't really
>get much of a win, you're just burning CPU.
>
>Hence I do think that this should strongly recommend that the devices
>support polling, that part is fine.
>
>Agree with your other comments, thanks for reviewing it!
>> This patch mostly looks fine. But the code here seems to be largely
>> duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
>> Can we consider adding the hybrid poll test as a part of the existing
>> tests as it seems that it would only require passing a extra flag
>> during ring setup.
>
>Yeah I do think modifying test/iopoll.c to test all the same
>configurations but with HYBRID added would be the way to go, rather than
>duplicate all of this. Ditto for passthrough.
Got it, will add it and submit v2 patch.
--
hexue
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Re: [PATCH liburing] test: add test cases for hybrid iopoll
[not found] ` <CGME20241115033450epcas5p10bdbbfa584b483d8822535d43da868d2@epcas5p1.samsung.com>
@ 2024-11-15 3:34 ` hexue
0 siblings, 0 replies; 8+ messages in thread
From: hexue @ 2024-11-15 3:34 UTC (permalink / raw)
To: axboe; +Cc: asml.silence, io-uring, linux-kernel
>On 11/13/24 10:03 PM, hexue wrote:
>> diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>> index 2f87783..fa928fa 100644
...
>
>This flag must be used with
>
>> +.B IORING_SETUP_IOPOLL
>> +flag. hybrid poll is a new
>
>Like before, skip new. Think about what happens when someone reads this
>in 5 years time. What does new mean? Yes it may be new now, but docs
>are supposed to be timeless.
>
>> +feature baed on iopoll, this could be a suboptimal solution when running
>
>based on
>
>> +on a single thread, it offers higher performance than IRQ and lower CPU
>> +utilization than polling. Similarly, this feature also requires the devices
>> +to support polling configuration.
>
>This doesn't explain how it works. I'd say something like:
>
>Hybrid io polling differs from strict polling in that it will delay a
>bit before doing completion side polling, to avoid wasting too much CPU.
>Like IOPOLL, it requires that devices support polling.
Thanks, will change the description.
...
>> + return -EINVAL;
>> +
>The kernel should already do this, no point duplicating it in liburing.
>
>The test bits look much better now, way simpler. I'll just need to
>double check that they handle EINVAL on setup properly, and EOPNOTSUPP
>at completion time will turn off further testing of it. Did you run it
>on configurations where hybrid io polling will both fail at setup time,
>and at runtime (eg the latter where the kernel supports it, but the
>device/fs does not)?
Yes, I have run both of these error configurations. The running cases are:
hybrid poll without IORING_SETUP_IOPOLL and device with incorrect queue
configuration, EINVAL and EOPNOTSUPP are both identified.
--
hexue
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-11-15 3:35 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20241111123656epcas5p20cac863708cd83d1fdbb523625665273@epcas5p2.samsung.com>
2024-11-11 12:36 ` [PATCH liburing] test: add test cases for hybrid iopoll hexue
2024-11-12 10:44 ` Anuj Gupta
2024-11-12 15:43 ` Jens Axboe
[not found] ` <CGME20241113062730epcas5p10bec3fe462b9b6e65d22a61c50902b78@epcas5p1.samsung.com>
2024-11-13 6:27 ` hexue
[not found] ` <CGME20241113062658epcas5p3d7234648ac86e7a16dab96c3c1d5dc98@epcas5p3.samsung.com>
2024-11-13 6:26 ` hexue
2024-11-13 3:32 ` Pavel Begunkov
[not found] ` <CGME20241113062517epcas5p22b01ddf9f29123ddcd7ffc63f2ce9254@epcas5p2.samsung.com>
2024-11-13 6:25 ` hexue
2024-11-14 15:21 [PATCH liburing v2] " Jens Axboe
[not found] ` <CGME20241115033450epcas5p10bdbbfa584b483d8822535d43da868d2@epcas5p1.samsung.com>
2024-11-15 3:34 ` Re: [PATCH liburing] " hexue
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox