public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH liburing] test: add test cases for hybrid iopoll
       [not found] <CGME20241111123656epcas5p20cac863708cd83d1fdbb523625665273@epcas5p2.samsung.com>
@ 2024-11-11 12:36 ` hexue
  2024-11-12 10:44   ` Anuj Gupta
  2024-11-13  3:32   ` Pavel Begunkov
  0 siblings, 2 replies; 8+ messages in thread
From: hexue @ 2024-11-11 12:36 UTC (permalink / raw)
  To: axboe, asml.silence; +Cc: io-uring, linux-kernel, hexue

Add a test file for hybrid iopoll to make sure it works safe.Testcass
include basic read/write tests, and run in normal iopoll mode and
passthrough mode respectively.

Signed-off-by: hexue <[email protected]>
---
 man/io_uring_setup.2            |  10 +-
 src/include/liburing/io_uring.h |   3 +
 test/Makefile                   |   1 +
 test/iopoll-hybridpoll.c        | 544 ++++++++++++++++++++++++++++++++
 4 files changed, 557 insertions(+), 1 deletion(-)
 create mode 100644 test/iopoll-hybridpoll.c

diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 2f87783..8cfafdc 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
 parameter set to the desired number of polling queues. The polling queues
 will be shared appropriately between the CPUs in the system, if the number
 is less than the number of online CPU threads.
-
+.TP
+.B IORING_SETUP_HYBRID_IOPOLL
+This flag must setup with 
+.B IORING_SETUP_IOPOLL
+flag. hybrid poll is a new
+feature baed on iopoll, this could be a suboptimal solution when running
+on a single thread, it offers higher performance than IRQ and lower CPU
+utilization than polling. Similarly, this feature also requires the devices
+to support polling configuration.
 .TP
 .B IORING_SETUP_SQPOLL
 When this flag is specified, a kernel thread is created to perform
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 20bc570..d16364c 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_NO_SQARRAY		(1U << 16)
 
+/* Use hybrid poll in iopoll process */
+#define IORING_SETUP_HYBRID_IOPOLL      (1U << 17)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
diff --git a/test/Makefile b/test/Makefile
index dfbbcbe..ea9452c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -116,6 +116,7 @@ test_srcs := \
 	iopoll.c \
 	iopoll-leak.c \
 	iopoll-overflow.c \
+	iopoll-hybridpoll.c \
 	io_uring_enter.c \
 	io_uring_passthrough.c \
 	io_uring_register.c \
diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
new file mode 100644
index 0000000..d7c08ae
--- /dev/null
+++ b/test/iopoll-hybridpoll.c
@@ -0,0 +1,544 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: basic read/write tests with 
+ * hybrid polled IO, include iopoll and io_uring
+ * passthrough.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include "helpers.h"
+#include "liburing.h"
+#include "../src/syscall.h"
+#include "nvme.h"
+
+#define FILE_SIZE	(128 * 1024)
+#define BS		4096
+#define BUFFERS		(FILE_SIZE / BS)
+
+static struct iovec *vecs;
+static int no_pt, no_iopoll;
+
+static int fill_pattern(int tc)
+{
+	unsigned int val, *ptr;
+	int i, j;
+	int u_in_buf = BS / sizeof(val);
+
+	val = (tc / 2) * FILE_SIZE;
+	for (i = 0; i < BUFFERS; i++) {
+		ptr = vecs[i].iov_base;
+		for (j = 0; j < u_in_buf; j++) {
+			*ptr = val;
+			val++;
+			ptr++;
+		}
+	}
+
+	return 0;
+}
+
+static int verify_buf(int tc, void *buf, off_t off)
+{
+	int i, u_in_buf = BS / sizeof(unsigned int);
+	unsigned int *ptr;
+
+	off /= sizeof(unsigned int);
+	off += (tc / 2) * FILE_SIZE;
+	ptr = buf;
+	for (i = 0; i < u_in_buf; i++) {
+		if (off != *ptr) {
+			fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
+					(unsigned long long) off);
+			return 1;
+		}
+		ptr++;
+		off++;
+	}
+
+	return 0;
+}
+
+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
+		     int sqthread, int fixed, int nonvec)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct nvme_uring_cmd *cmd;
+	int open_flags;
+	int do_fixed;
+	int i, ret, fd = -1;
+	off_t offset;
+	__u64 slba;
+	__u32 nlb;
+
+	if (write)
+		open_flags = O_WRONLY;
+	else
+		open_flags = O_RDONLY;
+
+	if (fixed) {
+		ret = t_register_buffers(ring, vecs, BUFFERS);
+		if (ret == T_SETUP_SKIP)
+			return 0;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		if (errno == EACCES || errno == EPERM)
+			return T_EXIT_SKIP;
+		perror("file open");
+		goto err;
+	}
+
+	if (sqthread) {
+		ret = io_uring_register_files(ring, &fd, 1);
+		if (ret) {
+			fprintf(stderr, "file reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	if (write)
+		fill_pattern(tc);
+
+	offset = 0;
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		if (write) {
+			int use_fd = fd;
+
+			do_fixed = fixed;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else if (nonvec) {
+				io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else {
+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+								offset);
+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+			}	
+		} else {
+			int use_fd = fd;
+
+			do_fixed = fixed;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else if (nonvec) {
+				io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
+							vecs[i].iov_len, offset);
+				sqe->cmd_op = NVME_URING_CMD_IO;
+			} else {
+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+								offset);
+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+			}
+		}
+		sqe->opcode = IORING_OP_URING_CMD;
+		if (do_fixed)
+			sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
+		sqe->user_data = ((uint64_t)offset << 32) | i;
+		if (sqthread)
+			sqe->flags |= IOSQE_FIXED_FILE;
+
+		cmd = (struct nvme_uring_cmd *)sqe->cmd;
+		memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+		cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
+
+		slba = offset >> lba_shift;
+		nlb = (BS >> lba_shift) - 1;
+
+		/* cdw10 and cdw11 represent starting lba */
+		cmd->cdw10 = slba & 0xffffffff;
+		cmd->cdw11 = slba >> 32;
+		/* cdw12 represent number of lba's for read/write */
+		cmd->cdw12 = nlb;
+		if (do_fixed || nonvec) {
+			cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
+			cmd->data_len = vecs[i].iov_len;
+		} else {
+			cmd->addr = (__u64)(uintptr_t)&vecs[i];
+			cmd->data_len = 1;
+		}
+		cmd->nsid = nsid;
+
+		offset += BS;
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		}
+		if (cqe->res != 0) {
+			if (!no_pt) {
+				no_pt = 1;
+				goto skip;
+			}
+			fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+		if (!write) {
+			int index = cqe->user_data & 0xffffffff;
+			void *buf = vecs[index].iov_base;
+			off_t voff = cqe->user_data >> 32;
+
+			if (verify_buf(tc, buf, voff))
+				goto err;
+		}
+	}
+
+	if (fixed) {
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+	if (sqthread) {
+		ret = io_uring_unregister_files(ring);
+		if (ret) {
+			fprintf(stderr, "file unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+skip:
+	close(fd);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	return 1;
+}
+
+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
+		   int fixed, int nonvec)
+{
+	struct io_uring ring;
+	int ret, ring_flags = 0;
+
+	ring_flags |= IORING_SETUP_SQE128;
+	ring_flags |= IORING_SETUP_CQE32;
+	ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
+
+	if (sqthread)
+		ring_flags |= IORING_SETUP_SQPOLL;
+
+	ret = t_create_ring(64, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		if (ret == -EINVAL) {
+			no_pt = 1;
+			return T_SETUP_SKIP;
+		}
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
+	io_uring_queue_exit(&ring);
+
+	return ret;
+}
+
+static int provide_buffers(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int ret, i;
+
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
+						vecs[i].iov_len, 1, 1, i);
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return 1;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (cqe->res < 0) {
+			fprintf(stderr, "cqe->res=%d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
+		     int fixed, int buf_select)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	int open_flags;
+	int i, fd = -1, ret;
+	off_t offset;
+
+	if (buf_select) {
+		write = 0;
+		fixed = 0;
+	}
+	if (buf_select && provide_buffers(ring))
+		return 1;
+
+	if (write)
+		open_flags = O_WRONLY;
+	else
+		open_flags = O_RDONLY;
+	open_flags |= O_DIRECT;
+
+	if (fixed) {
+		ret = t_register_buffers(ring, vecs, BUFFERS);
+		if (ret == T_SETUP_SKIP)
+			return 0;
+		if (ret != T_SETUP_OK) {
+			fprintf(stderr, "buffer reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+	fd = open(file, open_flags);
+	if (fd < 0) {
+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
+			return 0;
+		perror("file open");
+		goto err;
+	}
+	if (sqthread) {
+		ret = io_uring_register_files(ring, &fd, 1);
+		if (ret) {
+			fprintf(stderr, "file reg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	offset = 0;
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "sqe get failed\n");
+			goto err;
+		}
+		offset = BS * (rand() % BUFFERS);
+		if (write) {
+			int do_fixed = fixed;
+			int use_fd = fd;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+			} else {
+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
+								offset);
+			}
+		} else {
+			int do_fixed = fixed;
+			int use_fd = fd;
+
+			if (sqthread)
+				use_fd = 0;
+			if (fixed && (i & 1))
+				do_fixed = 0;
+			if (do_fixed) {
+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
+								vecs[i].iov_len,
+								offset, i);
+			} else {
+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
+								offset);
+			}
+
+		}
+		if (sqthread)
+			sqe->flags |= IOSQE_FIXED_FILE;
+		if (buf_select) {
+			sqe->flags |= IOSQE_BUFFER_SELECT;
+			sqe->buf_group = buf_select;
+			sqe->user_data = i;
+		}
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		ret = io_uring_peek_cqe(ring, &cqe);
+		if (!ret && cqe->res == -EOPNOTSUPP) {
+			no_iopoll = 1;
+			io_uring_cqe_seen(ring, cqe);
+			goto out;
+		}
+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
+		goto err;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			goto err;
+		} else if (cqe->res == -EOPNOTSUPP) {
+			fprintf(stdout, "File/device/fs doesn't support polled IO\n");
+			no_iopoll = 1;
+			goto out;
+		} else if (cqe->res != BS) {
+			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (fixed) {
+		ret = io_uring_unregister_buffers(ring);
+		if (ret) {
+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+	if (sqthread) {
+		ret = io_uring_unregister_files(ring);
+		if (ret) {
+			fprintf(stderr, "file unreg failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+out:
+	close(fd);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	return 1;
+}
+
+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
+		   int buf_select, int defer)
+{
+	struct io_uring ring;
+	int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
+
+	if (no_iopoll)
+		return 0;
+
+	if (defer)
+		ring_flags |= IORING_SETUP_SINGLE_ISSUER |
+			      IORING_SETUP_DEFER_TASKRUN;
+
+	ret = t_create_ring(64, &ring, ring_flags);
+	if (ret == T_SETUP_SKIP)
+		return 0;
+	if (ret != T_SETUP_OK) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+	ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, ret;
+	char buf[256];
+	char *fname;
+
+	if (argc > 1) {
+		fname = argv[1];
+	} else {
+		srand((unsigned)time(NULL));
+		snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+			(unsigned)rand(), (unsigned)getpid());
+		fname = buf;
+		t_create_file(fname, FILE_SIZE);
+	}
+
+	vecs = t_create_buffers(BUFFERS, BS);
+
+	for (i = 0; i < 16; i++) {
+		int write = (i & 1) != 0;
+		int sqthread = (i & 2) != 0;
+		int fixed = (i & 4) != 0;
+		int buf_select = (i & 8) != 0;
+		int defer = (i & 16) != 0;
+		int nonvec = buf_select;
+
+		ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
+		if (ret) {
+			fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
+				write, sqthread, fixed, buf_select, defer);
+			goto err;
+		}
+		if (no_iopoll)
+			break;
+
+		ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
+		if (no_pt)
+			break;
+		if (ret) {
+			fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
+				write, sqthread, fixed, nonvec);
+			goto err;
+		}
+	}
+
+	if (fname != argv[1])
+		unlink(fname);
+	return ret;
+err:
+	if (fname != argv[1])
+		unlink(fname);
+	return T_EXIT_FAIL;
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH liburing] test: add test cases for hybrid iopoll
  2024-11-11 12:36 ` hexue
@ 2024-11-12 10:44   ` Anuj Gupta
  2024-11-12 15:43     ` Jens Axboe
  2024-11-13  3:32   ` Pavel Begunkov
  1 sibling, 1 reply; 8+ messages in thread
From: Anuj Gupta @ 2024-11-12 10:44 UTC (permalink / raw)
  To: hexue; +Cc: axboe, asml.silence, io-uring, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 15593 bytes --]

On 11/11/24 08:36PM, hexue wrote:
>Add a test file for hybrid iopoll to make sure it works safe.Testcass
>include basic read/write tests, and run in normal iopoll mode and
>passthrough mode respectively.
>
>Signed-off-by: hexue <[email protected]>
>---
> man/io_uring_setup.2            |  10 +-
> src/include/liburing/io_uring.h |   3 +
> test/Makefile                   |   1 +
i> test/iopoll-hybridpoll.c        | 544 ++++++++++++++++++++++++++++++++
> 4 files changed, 557 insertions(+), 1 deletion(-)
> create mode 100644 test/iopoll-hybridpoll.c
>
>diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>index 2f87783..8cfafdc 100644
>--- a/man/io_uring_setup.2
>+++ b/man/io_uring_setup.2
>@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
> parameter set to the desired number of polling queues. The polling queues
> will be shared appropriately between the CPUs in the system, if the number
> is less than the number of online CPU threads.
>-
>+.TP
>+.B IORING_SETUP_HYBRID_IOPOLL
>+This flag must setup with
s/<must setup with>/<must be used with>

>+.B IORING_SETUP_IOPOLL
>+flag. hybrid poll is a new
s/hybrid/Hybrid
"new" is probably not required here

>+feature baed on iopoll, this could be a suboptimal solution when running
s/baed/based
s/<this could>/<which might>

>+on a single thread, it offers higher performance than IRQ and lower CPU
s/<, it>/<. It>

>+utilization than polling. Similarly, this feature also requires the devices
>+to support polling configuration.
This feature would work if a device doesn't have polled queues,right?
The performance might be suboptimal in that case, but the userspace won't
get any errors.

> .TP
> .B IORING_SETUP_SQPOLL
> When this flag is specified, a kernel thread is created to perform
>diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
>index 20bc570..d16364c 100644
>--- a/src/include/liburing/io_uring.h
>+++ b/src/include/liburing/io_uring.h
>@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
>  */
> #define IORING_SETUP_NO_SQARRAY		(1U << 16)
>
>+/* Use hybrid poll in iopoll process */
>+#define IORING_SETUP_HYBRID_IOPOLL      (1U << 17)
>+
> enum io_uring_op {
> 	IORING_OP_NOP,
> 	IORING_OP_READV,
>diff --git a/test/Makefile b/test/Makefile
>index dfbbcbe..ea9452c 100644
>--- a/test/Makefile
>+++ b/test/Makefile
>@@ -116,6 +116,7 @@ test_srcs := \
> 	iopoll.c \
> 	iopoll-leak.c \
> 	iopoll-overflow.c \
>+	iopoll-hybridpoll.c \
> 	io_uring_enter.c \
> 	io_uring_passthrough.c \
> 	io_uring_register.c \
>diff --git a/test/iopoll-hybridpoll.c b/test/iopoll-hybridpoll.c
>new file mode 100644
>index 0000000..d7c08ae
>--- /dev/null
>+++ b/test/iopoll-hybridpoll.c
>@@ -0,0 +1,544 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Description: basic read/write tests with
>+ * hybrid polled IO, include iopoll and io_uring
>+ * passthrough.
>+ */
>+#include <errno.h>
>+#include <stdio.h>
>+#include <unistd.h>
>+#include <stdlib.h>
>+#include <string.h>
>+#include <fcntl.h>
>+#include <sys/types.h>
>+#include <poll.h>
>+#include <sys/eventfd.h>
>+#include <sys/resource.h>
>+#include "helpers.h"
>+#include "liburing.h"
>+#include "../src/syscall.h"
>+#include "nvme.h"
>+
>+#define FILE_SIZE	(128 * 1024)
>+#define BS		4096
>+#define BUFFERS		(FILE_SIZE / BS)
>+
>+static struct iovec *vecs;
>+static int no_pt, no_iopoll;
>+
>+static int fill_pattern(int tc)
>+{
>+	unsigned int val, *ptr;
>+	int i, j;
>+	int u_in_buf = BS / sizeof(val);
>+
>+	val = (tc / 2) * FILE_SIZE;
>+	for (i = 0; i < BUFFERS; i++) {
>+		ptr = vecs[i].iov_base;
>+		for (j = 0; j < u_in_buf; j++) {
>+			*ptr = val;
>+			val++;
>+			ptr++;
>+		}
>+	}
>+
>+	return 0;
>+}
>+
>+static int verify_buf(int tc, void *buf, off_t off)
>+{
>+	int i, u_in_buf = BS / sizeof(unsigned int);
>+	unsigned int *ptr;
>+
>+	off /= sizeof(unsigned int);
>+	off += (tc / 2) * FILE_SIZE;
>+	ptr = buf;
>+	for (i = 0; i < u_in_buf; i++) {
>+		if (off != *ptr) {
>+			fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
>+					(unsigned long long) off);
>+			return 1;
>+		}
>+		ptr++;
>+		off++;
>+	}
>+
>+	return 0;
>+}
>+
>+static int __test_io_uring_passthrough_io(const char *file, struct io_uring *ring, int tc, int write,
>+		     int sqthread, int fixed, int nonvec)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	struct nvme_uring_cmd *cmd;
>+	int open_flags;
>+	int do_fixed;
>+	int i, ret, fd = -1;
>+	off_t offset;
>+	__u64 slba;
>+	__u32 nlb;
>+
>+	if (write)
>+		open_flags = O_WRONLY;
>+	else
>+		open_flags = O_RDONLY;
>+
>+	if (fixed) {
>+		ret = t_register_buffers(ring, vecs, BUFFERS);
>+		if (ret == T_SETUP_SKIP)
>+			return 0;
>+		if (ret != T_SETUP_OK) {
>+			fprintf(stderr, "buffer reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	fd = open(file, open_flags);
>+	if (fd < 0) {
>+		if (errno == EACCES || errno == EPERM)
>+			return T_EXIT_SKIP;
>+		perror("file open");
>+		goto err;
>+	}
>+
>+	if (sqthread) {
>+		ret = io_uring_register_files(ring, &fd, 1);
>+		if (ret) {
>+			fprintf(stderr, "file reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	if (write)
>+		fill_pattern(tc);
>+
>+	offset = 0;
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		if (!sqe) {
>+			fprintf(stderr, "sqe get failed\n");
>+			goto err;
>+		}
>+		if (write) {
>+			int use_fd = fd;
>+
>+			do_fixed = fixed;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else if (nonvec) {
>+				io_uring_prep_write(sqe, use_fd, vecs[i].iov_base,
>+							vecs[i].iov_len, offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else {
>+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+			}	
>+		} else {
>+			int use_fd = fd;
>+
>+			do_fixed = fixed;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else if (nonvec) {
>+				io_uring_prep_read(sqe, use_fd, vecs[i].iov_base,
>+							vecs[i].iov_len, offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO;
>+			} else {
>+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+				sqe->cmd_op = NVME_URING_CMD_IO_VEC;
>+			}
>+		}
>+		sqe->opcode = IORING_OP_URING_CMD;
>+		if (do_fixed)
>+			sqe->uring_cmd_flags |= IORING_URING_CMD_FIXED;
>+		sqe->user_data = ((uint64_t)offset << 32) | i;
>+		if (sqthread)
>+			sqe->flags |= IOSQE_FIXED_FILE;
>+
>+		cmd = (struct nvme_uring_cmd *)sqe->cmd;
>+		memset(cmd, 0, sizeof(struct nvme_uring_cmd));
>+
>+		cmd->opcode = write ? nvme_cmd_write : nvme_cmd_read;
>+
>+		slba = offset >> lba_shift;
>+		nlb = (BS >> lba_shift) - 1;
>+
>+		/* cdw10 and cdw11 represent starting lba */
>+		cmd->cdw10 = slba & 0xffffffff;
>+		cmd->cdw11 = slba >> 32;
>+		/* cdw12 represent number of lba's for read/write */
>+		cmd->cdw12 = nlb;
>+		if (do_fixed || nonvec) {
>+			cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base;
>+			cmd->data_len = vecs[i].iov_len;
>+		} else {
>+			cmd->addr = (__u64)(uintptr_t)&vecs[i];
>+			cmd->data_len = 1;
>+		}
>+		cmd->nsid = nsid;
>+
>+		offset += BS;
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+		goto err;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (ret) {
>+			fprintf(stderr, "wait_cqe=%d\n", ret);
>+			goto err;
>+		}
>+		if (cqe->res != 0) {
>+			if (!no_pt) {
>+				no_pt = 1;
>+				goto skip;
>+			}
>+			fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res);
>+			goto err;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+		if (!write) {
>+			int index = cqe->user_data & 0xffffffff;
>+			void *buf = vecs[index].iov_base;
>+			off_t voff = cqe->user_data >> 32;
>+
>+			if (verify_buf(tc, buf, voff))
>+				goto err;
>+		}
>+	}
>+
>+	if (fixed) {
>+		ret = io_uring_unregister_buffers(ring);
>+		if (ret) {
>+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	if (sqthread) {
>+		ret = io_uring_unregister_files(ring);
>+		if (ret) {
>+			fprintf(stderr, "file unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+skip:
>+	close(fd);
>+	return 0;
>+err:
>+	if (fd != -1)
>+		close(fd);
>+	return 1;
>+}
>+
>+static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
>+		   int fixed, int nonvec)
>+{
>+	struct io_uring ring;
>+	int ret, ring_flags = 0;
>+
>+	ring_flags |= IORING_SETUP_SQE128;
>+	ring_flags |= IORING_SETUP_CQE32;
>+	ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
>+
>+	if (sqthread)
>+		ring_flags |= IORING_SETUP_SQPOLL;
>+
>+	ret = t_create_ring(64, &ring, ring_flags);
>+	if (ret == T_SETUP_SKIP)
>+		return 0;
>+	if (ret != T_SETUP_OK) {
>+		if (ret == -EINVAL) {
>+			no_pt = 1;
>+			return T_SETUP_SKIP;
>+		}
>+		fprintf(stderr, "ring create failed: %d\n", ret);
>+		return 1;
>+	}
>+
>+	ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
>+	io_uring_queue_exit(&ring);
>+
>+	return ret;
>+}
>+
>+static int provide_buffers(struct io_uring *ring)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	int ret, i;
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
>+						vecs[i].iov_len, 1, 1, i);
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		fprintf(stderr, "submit: %d\n", ret);
>+		return 1;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (cqe->res < 0) {
>+			fprintf(stderr, "cqe->res=%d\n", cqe->res);
>+			return 1;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+	}
>+
>+	return 0;
>+}
>+
>+static int __test_iopoll_io(const char *file, struct io_uring *ring, int write, int sqthread,
>+		     int fixed, int buf_select)
>+{
>+	struct io_uring_sqe *sqe;
>+	struct io_uring_cqe *cqe;
>+	int open_flags;
>+	int i, fd = -1, ret;
>+	off_t offset;
>+
>+	if (buf_select) {
>+		write = 0;
>+		fixed = 0;
>+	}
>+	if (buf_select && provide_buffers(ring))
>+		return 1;
>+
>+	if (write)
>+		open_flags = O_WRONLY;
>+	else
>+		open_flags = O_RDONLY;
>+	open_flags |= O_DIRECT;
>+
>+	if (fixed) {
>+		ret = t_register_buffers(ring, vecs, BUFFERS);
>+		if (ret == T_SETUP_SKIP)
>+			return 0;
>+		if (ret != T_SETUP_OK) {
>+			fprintf(stderr, "buffer reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	fd = open(file, open_flags);
>+	if (fd < 0) {
>+		if (errno == EINVAL || errno == EPERM || errno == EACCES)
>+			return 0;
>+		perror("file open");
>+		goto err;
>+	}
>+	if (sqthread) {
>+		ret = io_uring_register_files(ring, &fd, 1);
>+		if (ret) {
>+			fprintf(stderr, "file reg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+	offset = 0;
>+	for (i = 0; i < BUFFERS; i++) {
>+		sqe = io_uring_get_sqe(ring);
>+		if (!sqe) {
>+			fprintf(stderr, "sqe get failed\n");
>+			goto err;
>+		}
>+		offset = BS * (rand() % BUFFERS);
>+		if (write) {
>+			int do_fixed = fixed;
>+			int use_fd = fd;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+			} else {
>+				io_uring_prep_writev(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+			}
>+		} else {
>+			int do_fixed = fixed;
>+			int use_fd = fd;
>+
>+			if (sqthread)
>+				use_fd = 0;
>+			if (fixed && (i & 1))
>+				do_fixed = 0;
>+			if (do_fixed) {
>+				io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base,
>+								vecs[i].iov_len,
>+								offset, i);
>+			} else {
>+				io_uring_prep_readv(sqe, use_fd, &vecs[i], 1,
>+								offset);
>+			}
>+
>+		}
>+		if (sqthread)
>+			sqe->flags |= IOSQE_FIXED_FILE;
>+		if (buf_select) {
>+			sqe->flags |= IOSQE_BUFFER_SELECT;
>+			sqe->buf_group = buf_select;
>+			sqe->user_data = i;
>+		}
>+	}
>+
>+	ret = io_uring_submit(ring);
>+	if (ret != BUFFERS) {
>+		ret = io_uring_peek_cqe(ring, &cqe);
>+		if (!ret && cqe->res == -EOPNOTSUPP) {
>+			no_iopoll = 1;
>+			io_uring_cqe_seen(ring, cqe);
>+			goto out;
>+		}
>+		fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
>+		goto err;
>+	}
>+
>+	for (i = 0; i < BUFFERS; i++) {
>+		ret = io_uring_wait_cqe(ring, &cqe);
>+		if (ret) {
>+			fprintf(stderr, "wait_cqe=%d\n", ret);
>+			goto err;
>+		} else if (cqe->res == -EOPNOTSUPP) {
>+			fprintf(stdout, "File/device/fs doesn't support polled IO\n");
>+			no_iopoll = 1;
>+			goto out;
>+		} else if (cqe->res != BS) {
>+			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS);
>+			goto err;
>+		}
>+		io_uring_cqe_seen(ring, cqe);
>+	}
>+
>+	if (fixed) {
>+		ret = io_uring_unregister_buffers(ring);
>+		if (ret) {
>+			fprintf(stderr, "buffer unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+	if (sqthread) {
>+		ret = io_uring_unregister_files(ring);
>+		if (ret) {
>+			fprintf(stderr, "file unreg failed: %d\n", ret);
>+			goto err;
>+		}
>+	}
>+
>+out:
>+	close(fd);
>+	return 0;
>+err:
>+	if (fd != -1)
>+		close(fd);
>+	return 1;
>+}
>+
>+static int test_iopoll(const char *fname, int write, int sqthread, int fixed,
>+		   int buf_select, int defer)
>+{
>+	struct io_uring ring;
>+	int ret, ring_flags = IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
>+
>+	if (no_iopoll)
>+		return 0;
>+
>+	if (defer)
>+		ring_flags |= IORING_SETUP_SINGLE_ISSUER |
>+			      IORING_SETUP_DEFER_TASKRUN;
>+
>+	ret = t_create_ring(64, &ring, ring_flags);
>+	if (ret == T_SETUP_SKIP)
>+		return 0;
>+	if (ret != T_SETUP_OK) {
>+		fprintf(stderr, "ring create failed: %d\n", ret);
>+		return 1;
>+	}
>+	ret = __test_iopoll_io(fname, &ring, write, sqthread, fixed, buf_select);
>+	io_uring_queue_exit(&ring);
>+	return ret;
>+}
>+
>+int main(int argc, char *argv[])
>+{
>+	int i, ret;
>+	char buf[256];
>+	char *fname;
>+
>+	if (argc > 1) {
>+		fname = argv[1];
>+	} else {
>+		srand((unsigned)time(NULL));
>+		snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
>+			(unsigned)rand(), (unsigned)getpid());
>+		fname = buf;
>+		t_create_file(fname, FILE_SIZE);
>+	}
>+
>+	vecs = t_create_buffers(BUFFERS, BS);
>+
>+	for (i = 0; i < 16; i++) {
>+		int write = (i & 1) != 0;
>+		int sqthread = (i & 2) != 0;
>+		int fixed = (i & 4) != 0;
>+		int buf_select = (i & 8) != 0;
>+		int defer = (i & 16) != 0;
>+		int nonvec = buf_select;
>+
>+		ret = test_iopoll(fname, write, sqthread, fixed, buf_select, defer);
>+		if (ret) {
>+			fprintf(stderr, "test_iopoll_io failed %d/%d/%d/%d/%d\n",
>+				write, sqthread, fixed, buf_select, defer);
>+			goto err;
>+		}
>+		if (no_iopoll)
>+			break;
>+
>+		ret = test_io_uring_passthrough(fname, i, write, sqthread, fixed, nonvec);
>+		if (no_pt)
>+			break;
>+		if (ret) {
>+			fprintf(stderr, "test_io_uring_passthrough_io failed %d/%d/%d/%d\n",
>+				write, sqthread, fixed, nonvec);
>+			goto err;
>+		}
>+	}
>+
>+	if (fname != argv[1])
>+		unlink(fname);
>+	return ret;
>+err:
>+	if (fname != argv[1])
>+		unlink(fname);
>+	return T_EXIT_FAIL;
>+}

This patch mostly looks fine. But the code here seems to be largely
duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
Can we consider adding the hybrid poll test as a part of the existing
tests as it seems that it would only require passing a extra flag
during ring setup.

>-- 
>2.34.1
>

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH liburing] test: add test cases for hybrid iopoll
  2024-11-12 10:44   ` Anuj Gupta
@ 2024-11-12 15:43     ` Jens Axboe
  0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2024-11-12 15:43 UTC (permalink / raw)
  To: Anuj Gupta, hexue; +Cc: asml.silence, io-uring, linux-kernel

On 11/12/24 3:44 AM, Anuj Gupta wrote:
>> +utilization than polling. Similarly, this feature also requires the devices
>> +to support polling configuration.
> This feature would work if a device doesn't have polled queues,right?
> The performance might be suboptimal in that case, but the userspace won't
> get any errors.

We've traditionally been a mix of lax and strict on this. IMHO we should
return -EOPTNOTSUPP for IOPOLL (and IOPOLL|HYBRID) if polling isn't
configured correctly. I've seen way too many not realize that they need
to configure their nvme side for pollable queues for it to do what it
needs to do. If you don't and it's just allowed, then you don't really
get much of a win, you're just burning CPU.

Hence I do think that this should strongly recommend that the devices
support polling, that part is fine.

Agree with your other comments, thanks for reviewing it!

> This patch mostly looks fine. But the code here seems to be largely
> duplicated from "test/io_uring_passthrough.c" and "test/iopoll.c".
> Can we consider adding the hybrid poll test as a part of the existing
> tests as it seems that it would only require passing a extra flag
> during ring setup.

Yeah I do think modifying test/iopoll.c to test all the same
configurations but with HYBRID added would be the way to go, rather than
duplicate all of this. Ditto for passthrough.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH liburing] test: add test cases for hybrid iopoll
  2024-11-11 12:36 ` hexue
  2024-11-12 10:44   ` Anuj Gupta
@ 2024-11-13  3:32   ` Pavel Begunkov
  1 sibling, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2024-11-13  3:32 UTC (permalink / raw)
  To: hexue, axboe; +Cc: io-uring, linux-kernel

On 11/11/24 12:36, hexue wrote:
> Add a test file for hybrid iopoll to make sure it works safe.Testcass
> include basic read/write tests, and run in normal iopoll mode and
> passthrough mode respectively.
> 
> Signed-off-by: hexue <[email protected]>

If it's not covered already please add tests for failure cases.
E.g. when SETUP_HYBRID_IOPOLL is set without SETUP_IOPOLL

> +static int test_io_uring_passthrough(const char *file, int tc, int write, int sqthread,
> +		   int fixed, int nonvec)
> +{
> +	struct io_uring ring;
> +	int ret, ring_flags = 0;
> +
> +	ring_flags |= IORING_SETUP_SQE128;
> +	ring_flags |= IORING_SETUP_CQE32;
> +	ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
> +
> +	if (sqthread)
> +		ring_flags |= IORING_SETUP_SQPOLL;

Doesn't it also need IORING_SETUP_IOPOLL?

> +
> +	ret = t_create_ring(64, &ring, ring_flags);
> +	if (ret == T_SETUP_SKIP)
> +		return 0;
> +	if (ret != T_SETUP_OK) {
> +		if (ret == -EINVAL) {
> +			no_pt = 1;
> +			return T_SETUP_SKIP;
> +		}
> +		fprintf(stderr, "ring create failed: %d\n", ret);
> +		return 1;
> +	}
> +
> +	ret = __test_io_uring_passthrough_io(file, &ring, tc, write, sqthread, fixed, nonvec);
> +	io_uring_queue_exit(&ring);
> +
> +	return ret;
> +}

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH liburing v2] test: add test cases for hybrid iopoll
       [not found] <CGME20241114050337epcas5p174214fb58aedefee4077447fa71b70f0@epcas5p1.samsung.com>
@ 2024-11-14  5:03 ` hexue
  2024-11-14 15:21   ` Jens Axboe
  0 siblings, 1 reply; 8+ messages in thread
From: hexue @ 2024-11-14  5:03 UTC (permalink / raw)
  To: axboe, asml.silence; +Cc: io-uring, linux-kernel, hexue

Add a test file for hybrid iopoll to make sure it works safe.Test case
include basic read/write tests, and run in normal iopoll mode and
passthrough mode respectively.

--
changes since v1:
- remove iopoll-hybridpoll.c
- test hybrid poll with exsiting iopoll and io_uring_passthrough
- add a misconfiguration check

Signed-off-by: hexue <[email protected]>
---
 man/io_uring_setup.2            | 10 +++++++++-
 src/include/liburing/io_uring.h |  3 +++
 src/setup.c                     |  4 ++++
 test/io_uring_passthrough.c     | 14 +++++++++-----
 test/iopoll.c                   | 22 +++++++++++++---------
 5 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 2f87783..fa928fa 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
 parameter set to the desired number of polling queues. The polling queues
 will be shared appropriately between the CPUs in the system, if the number
 is less than the number of online CPU threads.
-
+.TP
+.B IORING_SETUP_HYBRID_IOPOLL
+This flag must setup with
+.B IORING_SETUP_IOPOLL
+flag. hybrid poll is a new
+feature baed on iopoll, this could be a suboptimal solution when running
+on a single thread, it offers higher performance than IRQ and lower CPU
+utilization than polling. Similarly, this feature also requires the devices
+to support polling configuration.
 .TP
 .B IORING_SETUP_SQPOLL
 When this flag is specified, a kernel thread is created to perform
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 20bc570..d16364c 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_NO_SQARRAY		(1U << 16)
 
+/* Use hybrid poll in iopoll process */
+#define IORING_SETUP_HYBRID_IOPOLL      (1U << 17)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
diff --git a/src/setup.c b/src/setup.c
index 073de50..d1a87aa 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -320,6 +320,10 @@ int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
 			ring->int_flags |= INT_FLAG_APP_MEM;
 	}
 
+	if ((p->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
+			IORING_SETUP_HYBRID_IOPOLL)
+		return -EINVAL;
+
 	fd = __sys_io_uring_setup(entries, p);
 	if (fd < 0) {
 		if ((p->flags & IORING_SETUP_NO_MMAP) &&
diff --git a/test/io_uring_passthrough.c b/test/io_uring_passthrough.c
index f18a186..8604c42 100644
--- a/test/io_uring_passthrough.c
+++ b/test/io_uring_passthrough.c
@@ -254,7 +254,7 @@ err:
 }
 
 static int test_io(const char *file, int tc, int read, int sqthread,
-		   int fixed, int nonvec)
+		   int fixed, int nonvec, int hybrid)
 {
 	struct io_uring ring;
 	int ret, ring_flags = 0;
@@ -265,6 +265,9 @@ static int test_io(const char *file, int tc, int read, int sqthread,
 	if (sqthread)
 		ring_flags |= IORING_SETUP_SQPOLL;
 
+	if (hybrid)
+		ring_flags |= IORING_SETUP_IOPOLL | IORING_SETUP_HYBRID_IOPOLL;
+
 	ret = t_create_ring(64, &ring, ring_flags);
 	if (ret == T_SETUP_SKIP)
 		return 0;
@@ -449,18 +452,19 @@ int main(int argc, char *argv[])
 
 	vecs = t_create_buffers(BUFFERS, BS);
 
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < 32; i++) {
 		int read = (i & 1) != 0;
 		int sqthread = (i & 2) != 0;
 		int fixed = (i & 4) != 0;
 		int nonvec = (i & 8) != 0;
+		int hybrid = (i & 16) != 0;
 
-		ret = test_io(fname, i, read, sqthread, fixed, nonvec);
+		ret = test_io(fname, i, read, sqthread, fixed, nonvec, hybrid);
 		if (no_pt)
 			break;
 		if (ret) {
-			fprintf(stderr, "test_io failed %d/%d/%d/%d\n",
-				read, sqthread, fixed, nonvec);
+			fprintf(stderr, "test_io failed %d/%d/%d/%d%d\n",
+				read, sqthread, fixed, nonvec, hybrid);
 			goto err;
 		}
 	}
diff --git a/test/iopoll.c b/test/iopoll.c
index 2e0f7ea..0d7bd77 100644
--- a/test/iopoll.c
+++ b/test/iopoll.c
@@ -351,7 +351,7 @@ ok:
 }
 
 static int test_io(const char *file, int write, int sqthread, int fixed,
-		   int buf_select, int defer)
+		   int hybrid, int buf_select, int defer)
 {
 	struct io_uring ring;
 	int ret, ring_flags = IORING_SETUP_IOPOLL;
@@ -363,6 +363,9 @@ static int test_io(const char *file, int write, int sqthread, int fixed,
 		ring_flags |= IORING_SETUP_SINGLE_ISSUER |
 			      IORING_SETUP_DEFER_TASKRUN;
 
+	if (hybrid)
+		ring_flags |= IORING_SETUP_HYBRID_IOPOLL;
+
 	ret = t_create_ring(64, &ring, ring_flags);
 	if (ret == T_SETUP_SKIP)
 		return 0;
@@ -418,22 +421,23 @@ int main(int argc, char *argv[])
 
 	vecs = t_create_buffers(BUFFERS, BS);
 
-	nr = 32;
+	nr = 64;
 	if (no_buf_select)
-		nr = 8;
-	else if (!t_probe_defer_taskrun())
 		nr = 16;
+	else if (!t_probe_defer_taskrun())
+		nr = 32;
 	for (i = 0; i < nr; i++) {
 		int write = (i & 1) != 0;
 		int sqthread = (i & 2) != 0;
 		int fixed = (i & 4) != 0;
-		int buf_select = (i & 8) != 0;
-		int defer = (i & 16) != 0;
+		int hybrid = (i & 8) != 0;
+		int buf_select = (i & 16) != 0;
+		int defer = (i & 32) != 0;
 
-		ret = test_io(fname, write, sqthread, fixed, buf_select, defer);
+		ret = test_io(fname, write, sqthread, fixed, hybrid, buf_select, defer);
 		if (ret) {
-			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n",
-				write, sqthread, fixed, buf_select, defer);
+			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d%d\n",
+				write, sqthread, fixed, hybrid, buf_select, defer);
 			goto err;
 		}
 		if (no_iopoll)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH liburing v2] test: add test cases for hybrid iopoll
  2024-11-14  5:03 ` [PATCH liburing v2] test: add test cases for hybrid iopoll hexue
@ 2024-11-14 15:21   ` Jens Axboe
       [not found]     ` <CGME20241115033450epcas5p10bdbbfa584b483d8822535d43da868d2@epcas5p1.samsung.com>
  0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2024-11-14 15:21 UTC (permalink / raw)
  To: hexue, asml.silence; +Cc: io-uring, linux-kernel

On 11/13/24 10:03 PM, hexue wrote:
> diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
> index 2f87783..fa928fa 100644
> --- a/man/io_uring_setup.2
> +++ b/man/io_uring_setup.2
> @@ -78,7 +78,15 @@ in question. For NVMe devices, the nvme driver must be loaded with the
>  parameter set to the desired number of polling queues. The polling queues
>  will be shared appropriately between the CPUs in the system, if the number
>  is less than the number of online CPU threads.
> -
> +.TP
> +.B IORING_SETUP_HYBRID_IOPOLL
> +This flag must setup with

This flag must be used with

> +.B IORING_SETUP_IOPOLL
> +flag. hybrid poll is a new

Like before, skip new. Think about what happens when someone reads this
in 5 years time. What does new mean? Yes it may be new now, but docs
are supposed to be timeless.

> +feature baed on iopoll, this could be a suboptimal solution when running

based on

> +on a single thread, it offers higher performance than IRQ and lower CPU
> +utilization than polling. Similarly, this feature also requires the devices
> +to support polling configuration.

This doesn't explain how it works. I'd say something like:

Hybrid io polling differs from strict polling in that it will delay a
bit before doing completion side polling, to avoid wasting too much CPU.
Like IOPOLL, it requires that devices support polling.


> diff --git a/src/setup.c b/src/setup.c
> index 073de50..d1a87aa 100644
> --- a/src/setup.c
> +++ b/src/setup.c
> @@ -320,6 +320,10 @@ int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
>  			ring->int_flags |= INT_FLAG_APP_MEM;
>  	}
>  
> +	if ((p->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
> +			IORING_SETUP_HYBRID_IOPOLL)
> +		return -EINVAL;
> +

The kernel should already do this, no point duplicating it in liburing.

The test bits look much better now, way simpler. I'll just need to
double check that they handle EINVAL on setup properly, and EOPNOTSUPP
at completion time will turn off further testing of it. Did you run it
on configurations where hybrid io polling will both fail at setup time,
and at runtime (eg the latter where the kernel supports it, but the
device/fs does not)?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH liburing] test: add test cases for hybrid iopoll
       [not found]     ` <CGME20241115033450epcas5p10bdbbfa584b483d8822535d43da868d2@epcas5p1.samsung.com>
@ 2024-11-15  3:34       ` hexue
  2024-11-15 15:40         ` Jens Axboe
  0 siblings, 1 reply; 8+ messages in thread
From: hexue @ 2024-11-15  3:34 UTC (permalink / raw)
  To: axboe; +Cc: asml.silence, io-uring, linux-kernel

>On 11/13/24 10:03 PM, hexue wrote:
>> diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
>> index 2f87783..fa928fa 100644
...
>
>This flag must be used with
>
>> +.B IORING_SETUP_IOPOLL
>> +flag. hybrid poll is a new
>
>Like before, skip new. Think about what happens when someone reads this
>in 5 years time. What does new mean? Yes it may be new now, but docs
>are supposed to be timeless.
>
>> +feature baed on iopoll, this could be a suboptimal solution when running
>
>based on
>
>> +on a single thread, it offers higher performance than IRQ and lower CPU
>> +utilization than polling. Similarly, this feature also requires the devices
>> +to support polling configuration.
>
>This doesn't explain how it works. I'd say something like:
>
>Hybrid io polling differs from strict polling in that it will delay a
>bit before doing completion side polling, to avoid wasting too much CPU.
>Like IOPOLL, it requires that devices support polling.

Thanks, will change the description.

...

>> +		return -EINVAL;
>> +

>The kernel should already do this, no point duplicating it in liburing.
>
>The test bits look much better now, way simpler. I'll just need to
>double check that they handle EINVAL on setup properly, and EOPNOTSUPP
>at completion time will turn off further testing of it. Did you run it
>on configurations where hybrid io polling will both fail at setup time,
>and at runtime (eg the latter where the kernel supports it, but the
>device/fs does not)?

Yes, I have run both of these error configurations. The running cases are: 
hybrid poll without IORING_SETUP_IOPOLL and device with incorrect queue
configuration, EINVAL and EOPNOTSUPP are both identified.

--
hexue

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH liburing] test: add test cases for hybrid iopoll
  2024-11-15  3:34       ` Re: [PATCH liburing] " hexue
@ 2024-11-15 15:40         ` Jens Axboe
  0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2024-11-15 15:40 UTC (permalink / raw)
  To: hexue; +Cc: asml.silence, io-uring, linux-kernel

On 11/14/24 8:34 PM, hexue wrote:
>> The kernel should already do this, no point duplicating it in liburing.
>>
>> The test bits look much better now, way simpler. I'll just need to
>> double check that they handle EINVAL on setup properly, and EOPNOTSUPP
>> at completion time will turn off further testing of it. Did you run it
>> on configurations where hybrid io polling will both fail at setup time,
>> and at runtime (eg the latter where the kernel supports it, but the
>> device/fs does not)?
> 
> Yes, I have run both of these error configurations. The running cases are: 
> hybrid poll without IORING_SETUP_IOPOLL and device with incorrect queue
> configuration, EINVAL and EOPNOTSUPP are both identified.

I figured that was the case, as the existing test case should already
cover both of those cases. I'll get this applied once you send the
updated version.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-11-15 15:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CGME20241114050337epcas5p174214fb58aedefee4077447fa71b70f0@epcas5p1.samsung.com>
2024-11-14  5:03 ` [PATCH liburing v2] test: add test cases for hybrid iopoll hexue
2024-11-14 15:21   ` Jens Axboe
     [not found]     ` <CGME20241115033450epcas5p10bdbbfa584b483d8822535d43da868d2@epcas5p1.samsung.com>
2024-11-15  3:34       ` Re: [PATCH liburing] " hexue
2024-11-15 15:40         ` Jens Axboe
     [not found] <CGME20241111123656epcas5p20cac863708cd83d1fdbb523625665273@epcas5p2.samsung.com>
2024-11-11 12:36 ` hexue
2024-11-12 10:44   ` Anuj Gupta
2024-11-12 15:43     ` Jens Axboe
2024-11-13  3:32   ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox