public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests
  2022-05-07 14:06 [PATCH v3 0/4] fast poll multishot mode Hao Xu
@ 2022-05-07 14:06 ` Hao Xu
  0 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-07 14:06 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov, linux-kernel

From: Hao Xu <[email protected]>

Add a flag to indicate multishot mode for fast poll. currently only
accept use it, but there may be more operations leveraging it in the
future. Also add a mask IO_APOLL_MULTI_POLLED which stands for
REQ_F_APOLL_MULTI | REQ_F_POLLED, to make the code short and cleaner.

Signed-off-by: Hao Xu <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b6d491c9a25f..c2ee184ac693 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -116,6 +116,8 @@
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
 				 IO_REQ_CLEAN_FLAGS)
 
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
 struct io_uring {
@@ -810,6 +812,7 @@ enum {
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_PARTIAL_IO_BIT,
+	REQ_F_APOLL_MULTISHOT_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -874,6 +877,8 @@ enum {
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 	/* request has already done partial IO */
 	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
+	/* fast poll multishot mode */
+	REQ_F_APOLL_MULTISHOT	= BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests
       [not found] <[email protected]>
@ 2022-05-07 17:15 ` Hao Xu
  0 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-07 17:15 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

Add a flag to indicate multishot mode for fast poll. currently only
accept use it, but there may be more operations leveraging it in the
future. Also add a mask IO_APOLL_MULTI_POLLED which stands for
REQ_F_APOLL_MULTI | REQ_F_POLLED, to make the code short and cleaner.

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b6d491c9a25f..c2ee184ac693 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -116,6 +116,8 @@
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
 				 IO_REQ_CLEAN_FLAGS)
 
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
 struct io_uring {
@@ -810,6 +812,7 @@ enum {
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_PARTIAL_IO_BIT,
+	REQ_F_APOLL_MULTISHOT_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -874,6 +877,8 @@ enum {
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 	/* request has already done partial IO */
 	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
+	/* fast poll multishot mode */
+	REQ_F_APOLL_MULTISHOT	= BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 1/4] io_uring: add IORING_ACCEPT_MULTISHOT for accept
       [not found] <[email protected]>
@ 2022-05-08 15:32 ` Hao Xu
  2022-05-08 15:32 ` [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-08 15:32 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

add an accept_flag IORING_ACCEPT_MULTISHOT for accept, which is to
support multishot.

Signed-off-by: Hao Xu <[email protected]>
---
 include/uapi/linux/io_uring.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 06621a278cb6..7c3d70d12428 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -223,6 +223,11 @@ enum {
  */
 #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
 
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests
       [not found] <[email protected]>
  2022-05-08 15:32 ` [PATCH 1/4] io_uring: add IORING_ACCEPT_MULTISHOT for accept Hao Xu
@ 2022-05-08 15:32 ` Hao Xu
  2022-05-08 15:32 ` [PATCH 3/4] io_uring: let fast poll support multishot Hao Xu
  2022-05-08 15:32 ` [PATCH 4/4] io_uring: implement multishot mode for accept Hao Xu
  3 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-08 15:32 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

Add a flag to indicate multishot mode for fast poll. currently only
accept use it, but there may be more operations leveraging it in the
future. Also add a mask IO_APOLL_MULTI_POLLED which stands for
REQ_F_APOLL_MULTI | REQ_F_POLLED, to make the code short and cleaner.

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b6d491c9a25f..c2ee184ac693 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -116,6 +116,8 @@
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
 				 IO_REQ_CLEAN_FLAGS)
 
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
 struct io_uring {
@@ -810,6 +812,7 @@ enum {
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_PARTIAL_IO_BIT,
+	REQ_F_APOLL_MULTISHOT_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -874,6 +877,8 @@ enum {
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 	/* request has already done partial IO */
 	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
+	/* fast poll multishot mode */
+	REQ_F_APOLL_MULTISHOT	= BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] io_uring: let fast poll support multishot
       [not found] <[email protected]>
  2022-05-08 15:32 ` [PATCH 1/4] io_uring: add IORING_ACCEPT_MULTISHOT for accept Hao Xu
  2022-05-08 15:32 ` [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu
@ 2022-05-08 15:32 ` Hao Xu
  2022-05-10  3:20   ` Jens Axboe
  2022-05-08 15:32 ` [PATCH 4/4] io_uring: implement multishot mode for accept Hao Xu
  3 siblings, 1 reply; 10+ messages in thread
From: Hao Xu @ 2022-05-08 15:32 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

For operations like accept, multishot is a useful feature, since we can
reduce a number of accept sqe. Let's integrate it to fast poll, it may
be good for other operations in the future.

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c2ee184ac693..e0d12af04cd1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5955,6 +5955,7 @@ static void io_poll_remove_entries(struct io_kiocb *req)
 	rcu_read_unlock();
 }
 
+static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags);
 /*
  * All poll tw should go through this. Checks for poll events, manages
  * references, does rewait, etc.
@@ -5963,10 +5964,10 @@ static void io_poll_remove_entries(struct io_kiocb *req)
  * either spurious wakeup or multishot CQE is served. 0 when it's done with
  * the request, then the mask is stored in req->cqe.res.
  */
-static int io_poll_check_events(struct io_kiocb *req, bool locked)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	int v;
+	int v, ret;
 
 	/* req->task == current here, checking PF_EXITING is safe */
 	if (unlikely(req->task->flags & PF_EXITING))
@@ -5990,23 +5991,37 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
 			req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
 		}
 
-		/* multishot, just fill an CQE and proceed */
-		if (req->cqe.res && !(req->apoll_events & EPOLLONESHOT)) {
-			__poll_t mask = mangle_poll(req->cqe.res & req->apoll_events);
+		if ((unlikely(!req->cqe.res)))
+			continue;
+		if (req->apoll_events & EPOLLONESHOT)
+			return 0;
+
+		/* multishot, just fill a CQE and proceed */
+		if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+			__poll_t mask = mangle_poll(req->cqe.res &
+						    req->apoll_events);
 			bool filled;
 
 			spin_lock(&ctx->completion_lock);
-			filled = io_fill_cqe_aux(ctx, req->cqe.user_data, mask,
-						 IORING_CQE_F_MORE);
+			filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+						 mask, IORING_CQE_F_MORE);
 			io_commit_cqring(ctx);
 			spin_unlock(&ctx->completion_lock);
-			if (unlikely(!filled))
-				return -ECANCELED;
-			io_cqring_ev_posted(ctx);
-		} else if (req->cqe.res) {
-			return 0;
+			if (filled) {
+				io_cqring_ev_posted(ctx);
+				continue;
+			}
+			return -ECANCELED;
 		}
 
+		io_tw_lock(req->ctx, locked);
+		if (unlikely(req->task->flags & PF_EXITING))
+			return -EFAULT;
+		ret = io_issue_sqe(req,
+				   IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+		if (ret)
+			return ret;
+
 		/*
 		 * Release all references, retry if someone tried to restart
 		 * task_work while we were executing it.
@@ -6021,7 +6036,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
 
-	ret = io_poll_check_events(req, *locked);
+	ret = io_poll_check_events(req, locked);
 	if (ret > 0)
 		return;
 
@@ -6046,7 +6061,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
 
-	ret = io_poll_check_events(req, *locked);
+	ret = io_poll_check_events(req, locked);
 	if (ret > 0)
 		return;
 
@@ -6286,7 +6301,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 	struct io_ring_ctx *ctx = req->ctx;
 	struct async_poll *apoll;
 	struct io_poll_table ipt;
-	__poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+	__poll_t mask = POLLERR | POLLPRI;
 	int ret;
 
 	if (!def->pollin && !def->pollout)
@@ -6295,6 +6310,8 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 		return IO_APOLL_ABORTED;
 	if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
 		return IO_APOLL_ABORTED;
+	if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+		mask |= EPOLLONESHOT;
 
 	if (def->pollin) {
 		mask |= POLLIN | POLLRDNORM;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] io_uring: implement multishot mode for accept
       [not found] <[email protected]>
                   ` (2 preceding siblings ...)
  2022-05-08 15:32 ` [PATCH 3/4] io_uring: let fast poll support multishot Hao Xu
@ 2022-05-08 15:32 ` Hao Xu
  2022-05-10  3:20   ` Jens Axboe
  3 siblings, 1 reply; 10+ messages in thread
From: Hao Xu @ 2022-05-08 15:32 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

Refactor io_accept() to support multishot mode.

theoretical analysis:
  1) when connections come in fast
    - singleshot:
              add accept sqe(userpsace) --> accept inline
                              ^                 |
                              |-----------------|
    - multishot:
             add accept sqe(userspace) --> accept inline
                                              ^     |
                                              |--*--|

    we do accept repeatedly in * place until get EAGAIN

  2) when connections come in at a low pressure
    similar thing like 1), we reduce a lot of userspace-kernel context
    switch and useless vfs_poll()

tests:
Did some tests, which goes in this way:

  server    client(multiple)
  accept    connect
  read      write
  write     read
  close     close

Basically, raise up a number of clients(on same machine with server) to
connect to the server, and then write some data to it, the server will
write those data back to the client after it receives them, and then
close the connection after write return. Then the client will read the
data and then close the connection. Here I test 10000 clients connect
one server, data size 128 bytes. And each client has a go routine for
it, so they come to the server in short time.
test 20 times before/after this patchset, time spent:(unit cycle, which
is the return value of clock())
before:
  1930136+1940725+1907981+1947601+1923812+1928226+1911087+1905897+1941075
  +1934374+1906614+1912504+1949110+1908790+1909951+1941672+1969525+1934984
  +1934226+1914385)/20.0 = 1927633.75
after:
  1858905+1917104+1895455+1963963+1892706+1889208+1874175+1904753+1874112
  +1874985+1882706+1884642+1864694+1906508+1916150+1924250+1869060+1889506
  +1871324+1940803)/20.0 = 1894750.45

(1927633.75 - 1894750.45) / 1927633.75 = 1.65%

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e0d12af04cd1..f21172913336 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1146,6 +1146,7 @@ static const struct io_op_def io_op_defs[] = {
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 		.poll_exclusive		= 1,
+		.ioprio			= 1,	/* used for flags */
 	},
 	[IORING_OP_ASYNC_CANCEL] = {
 		.audit_skip		= 1,
@@ -5706,6 +5707,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_accept *accept = &req->accept;
+	unsigned flags;
 
 	if (sqe->len || sqe->buf_index)
 		return -EINVAL;
@@ -5714,19 +5716,26 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 	accept->flags = READ_ONCE(sqe->accept_flags);
 	accept->nofile = rlimit(RLIMIT_NOFILE);
+	flags = READ_ONCE(sqe->ioprio);
+	if (flags & ~IORING_ACCEPT_MULTISHOT)
+		return -EINVAL;
 
 	accept->file_slot = READ_ONCE(sqe->file_index);
-	if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
+	if (accept->file_slot && ((accept->flags & SOCK_CLOEXEC) ||
+	   flags & IORING_ACCEPT_MULTISHOT))
 		return -EINVAL;
 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
 		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+	if (flags & IORING_ACCEPT_MULTISHOT)
+		req->flags |= REQ_F_APOLL_MULTISHOT;
 	return 0;
 }
 
 static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 {
+	struct io_ring_ctx *ctx = req->ctx;
 	struct io_accept *accept = &req->accept;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -5734,6 +5743,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 	struct file *file;
 	int ret, fd;
 
+retry:
 	if (!fixed) {
 		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
 		if (unlikely(fd < 0))
@@ -5745,8 +5755,12 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 		if (!fixed)
 			put_unused_fd(fd);
 		ret = PTR_ERR(file);
-		if (ret == -EAGAIN && force_nonblock)
-			return -EAGAIN;
+		if (ret == -EAGAIN && force_nonblock) {
+			if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+			    IO_APOLL_MULTI_POLLED)
+				ret = 0;
+			return ret;
+		}
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
 		req_set_fail(req);
@@ -5757,8 +5771,26 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 		ret = io_install_fixed_file(req, file, issue_flags,
 					    accept->file_slot - 1);
 	}
-	__io_req_complete(req, issue_flags, ret, 0);
-	return 0;
+
+	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+		__io_req_complete(req, issue_flags, ret, 0);
+		return 0;
+	}
+	if (ret >= 0) {
+		bool filled;
+
+		spin_lock(&ctx->completion_lock);
+		filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+					 IORING_CQE_F_MORE);
+		io_commit_cqring(ctx);
+		spin_unlock(&ctx->completion_lock);
+		if (!filled)
+			return -ECANCELED;
+		io_cqring_ev_posted(ctx);
+		goto retry;
+	}
+
+	return ret;
 }
 
 static int io_connect_prep_async(struct io_kiocb *req)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests
       [not found] <[email protected]>
@ 2022-05-08 15:37 ` Hao Xu
  0 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-08 15:37 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

Add a flag to indicate multishot mode for fast poll. currently only
accept use it, but there may be more operations leveraging it in the
future. Also add a mask IO_APOLL_MULTI_POLLED which stands for
REQ_F_APOLL_MULTI | REQ_F_POLLED, to make the code short and cleaner.

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b6d491c9a25f..c2ee184ac693 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -116,6 +116,8 @@
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
 				 IO_REQ_CLEAN_FLAGS)
 
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
 struct io_uring {
@@ -810,6 +812,7 @@ enum {
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_PARTIAL_IO_BIT,
+	REQ_F_APOLL_MULTISHOT_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -874,6 +877,8 @@ enum {
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 	/* request has already done partial IO */
 	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
+	/* fast poll multishot mode */
+	REQ_F_APOLL_MULTISHOT	= BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] io_uring: implement multishot mode for accept
  2022-05-08 15:32 ` [PATCH 4/4] io_uring: implement multishot mode for accept Hao Xu
@ 2022-05-10  3:20   ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2022-05-10  3:20 UTC (permalink / raw)
  To: Hao Xu, io-uring; +Cc: Pavel Begunkov

On 5/8/22 9:32 AM, Hao Xu wrote:
> From: Hao Xu <[email protected]>
> 
> Refactor io_accept() to support multishot mode.
> 
> theoretical analysis:
>   1) when connections come in fast
>     - singleshot:
>               add accept sqe(userpsace) --> accept inline

userspace here too, like in the cover letter.

> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index e0d12af04cd1..f21172913336 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1146,6 +1146,7 @@ static const struct io_op_def io_op_defs[] = {
>  		.unbound_nonreg_file	= 1,
>  		.pollin			= 1,
>  		.poll_exclusive		= 1,
> +		.ioprio			= 1,	/* used for flags */
>  	},
>  	[IORING_OP_ASYNC_CANCEL] = {
>  		.audit_skip		= 1,
> @@ -5706,6 +5707,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
>  static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
>  	struct io_accept *accept = &req->accept;
> +	unsigned flags;
>  
>  	if (sqe->len || sqe->buf_index)
>  		return -EINVAL;
> @@ -5714,19 +5716,26 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>  	accept->flags = READ_ONCE(sqe->accept_flags);
>  	accept->nofile = rlimit(RLIMIT_NOFILE);
> +	flags = READ_ONCE(sqe->ioprio);
> +	if (flags & ~IORING_ACCEPT_MULTISHOT)
> +		return -EINVAL;
>  
>  	accept->file_slot = READ_ONCE(sqe->file_index);
> -	if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
> +	if (accept->file_slot && ((accept->flags & SOCK_CLOEXEC) ||
> +	   flags & IORING_ACCEPT_MULTISHOT))
>  		return -EINVAL;

So something ala:

	if (accept_>file_slot) {
		if (accept->flags & SOCK_CLOEXEC)
			return -EINVAL;
		if (flags & IORING_ACCEPT_MULTISHOT &&
		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
			return -EINVAL;
	}

when rebased as mentioned in the cover reply.


	if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))

>  static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
>  {
> +	struct io_ring_ctx *ctx = req->ctx;
>  	struct io_accept *accept = &req->accept;
>  	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
>  	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
> @@ -5734,6 +5743,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
>  	struct file *file;
>  	int ret, fd;
>  
> +retry:
>  	if (!fixed) {
>  		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
>  		if (unlikely(fd < 0))
> @@ -5745,8 +5755,12 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
>  		if (!fixed)
>  			put_unused_fd(fd);
>  		ret = PTR_ERR(file);
> -		if (ret == -EAGAIN && force_nonblock)
> -			return -EAGAIN;
> +		if (ret == -EAGAIN && force_nonblock) {
> +			if ((req->flags & IO_APOLL_MULTI_POLLED) ==
> +			    IO_APOLL_MULTI_POLLED)
> +				ret = 0;
> +			return ret;
> +		}

Could probably do with a comment here for this check, it's not
immediately obvious unless you've already been deep in this part.

> @@ -5757,8 +5771,26 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
>  		ret = io_install_fixed_file(req, file, issue_flags,
>  					    accept->file_slot - 1);
>  	}
> -	__io_req_complete(req, issue_flags, ret, 0);
> -	return 0;
> +
> +	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
> +		__io_req_complete(req, issue_flags, ret, 0);
> +		return 0;
> +	}
> +	if (ret >= 0) {
> +		bool filled;
> +
> +		spin_lock(&ctx->completion_lock);
> +		filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
> +					 IORING_CQE_F_MORE);
> +		io_commit_cqring(ctx);
> +		spin_unlock(&ctx->completion_lock);
> +		if (!filled)
> +			return -ECANCELED;
> +		io_cqring_ev_posted(ctx);
> +		goto retry;
> +	}
> +
> +	return ret;
>  }

As mentioned, I still think this should be:

		...
		if (filled) {
			io_cqring_ev_posted(ctx);
			goto retry;
		}
		ret = -ECANCELED;
	}

	return ret;

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/4] io_uring: let fast poll support multishot
  2022-05-08 15:32 ` [PATCH 3/4] io_uring: let fast poll support multishot Hao Xu
@ 2022-05-10  3:20   ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2022-05-10  3:20 UTC (permalink / raw)
  To: Hao Xu, io-uring; +Cc: Pavel Begunkov

On 5/8/22 9:32 AM, Hao Xu wrote:
> From: Hao Xu <[email protected]>
> 
> For operations like accept, multishot is a useful feature, since we can
> reduce a number of accept sqe. Let's integrate it to fast poll, it may
> be good for other operations in the future.

This, and 1-2 look good to me now.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests
  2022-05-14 14:20 [PATCH v6 0/4] fast poll multishot mode Hao Xu
@ 2022-05-14 14:20 ` Hao Xu
  0 siblings, 0 replies; 10+ messages in thread
From: Hao Xu @ 2022-05-14 14:20 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Pavel Begunkov

From: Hao Xu <[email protected]>

Add a flag to indicate multishot mode for fast poll. currently only
accept use it, but there may be more operations leveraging it in the
future. Also add a mask IO_APOLL_MULTI_POLLED which stands for
REQ_F_APOLL_MULTI | REQ_F_POLLED, to make the code short and cleaner.

Signed-off-by: Hao Xu <[email protected]>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c4bd0faf0b27..e1e550de5956 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -116,6 +116,8 @@
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
 				 IO_REQ_CLEAN_FLAGS)
 
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
 struct io_uring {
@@ -812,6 +814,7 @@ enum {
 	REQ_F_SINGLE_POLL_BIT,
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_PARTIAL_IO_BIT,
+	REQ_F_APOLL_MULTISHOT_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -876,6 +879,8 @@ enum {
 	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 	/* request has already done partial IO */
 	REQ_F_PARTIAL_IO	= BIT(REQ_F_PARTIAL_IO_BIT),
+	/* fast poll multishot mode */
+	REQ_F_APOLL_MULTISHOT	= BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-05-14 14:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <[email protected]>
2022-05-08 15:32 ` [PATCH 1/4] io_uring: add IORING_ACCEPT_MULTISHOT for accept Hao Xu
2022-05-08 15:32 ` [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu
2022-05-08 15:32 ` [PATCH 3/4] io_uring: let fast poll support multishot Hao Xu
2022-05-10  3:20   ` Jens Axboe
2022-05-08 15:32 ` [PATCH 4/4] io_uring: implement multishot mode for accept Hao Xu
2022-05-10  3:20   ` Jens Axboe
2022-05-14 14:20 [PATCH v6 0/4] fast poll multishot mode Hao Xu
2022-05-14 14:20 ` [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu
     [not found] <[email protected]>
2022-05-08 15:37 ` Hao Xu
     [not found] <[email protected]>
2022-05-07 17:15 ` Hao Xu
  -- strict thread matches above, loose matches on Subject: below --
2022-05-07 14:06 [PATCH v3 0/4] fast poll multishot mode Hao Xu
2022-05-07 14:06 ` [PATCH 2/4] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox