From: Hao Xu <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
[email protected]
Subject: [PATCH 5/5] io_uring: implement multishot mode for accept
Date: Fri, 6 May 2022 15:01:02 +0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Hao Xu <[email protected]>
Refactor io_accept() to support multishot mode.
theoretical analysis:
1) when connections come in fast
- singleshot:
add accept sqe(userpsace) --> accept inline
^ |
|-----------------|
- multishot:
add accept sqe(userspace) --> accept inline
^ |
|--*--|
we do accept repeatedly in * place until get EAGAIN
2) when connections come in at a low pressure
similar thing like 1), we reduce a lot of userspace-kernel context
switch and useless vfs_poll()
tests:
Did some tests, which goes in this way:
server client(multiple)
accept connect
read write
write read
close close
Basically, raise up a number of clients(on same machine with server) to
connect to the server, and then write some data to it, the server will
write those data back to the client after it receives them, and then
close the connection after write return. Then the client will read the
data and then close the connection. Here I test 10000 clients connect
one server, data size 128 bytes. And each client has a go routine for
it, so they come to the server in short time.
test 20 times before/after this patchset, time spent:(unit cycle, which
is the return value of clock())
before:
1930136+1940725+1907981+1947601+1923812+1928226+1911087+1905897+1941075
+1934374+1906614+1912504+1949110+1908790+1909951+1941672+1969525+1934984
+1934226+1914385)/20.0 = 1927633.75
after:
1858905+1917104+1895455+1963963+1892706+1889208+1874175+1904753+1874112
+1874985+1882706+1884642+1864694+1906508+1916150+1924250+1869060+1889506
+1871324+1940803)/20.0 = 1894750.45
(1927633.75 - 1894750.45) / 1927633.75 = 1.65%
Signed-off-by: Hao Xu <[email protected]>
---
fs/io_uring.c | 54 +++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 48 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0a83ecc457d1..9febe7774dc3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1254,6 +1254,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
+static void io_poll_remove_entries(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -5690,24 +5691,29 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = &req->accept;
+ bool multishot;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index)
+ if (sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
+ multishot = !!(READ_ONCE(sqe->ioprio) & IORING_ACCEPT_MULTISHOT);
accept->file_slot = READ_ONCE(sqe->file_index);
- if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
+ if (accept->file_slot && ((accept->flags & SOCK_CLOEXEC) || multishot))
return -EINVAL;
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+ if (multishot)
+ req->flags |= REQ_F_APOLL_MULTISHOT;
+
return 0;
}
@@ -5730,6 +5736,7 @@ static inline void io_poll_clean(struct io_kiocb *req)
static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
+ struct io_ring_ctx *ctx = req->ctx;
struct io_accept *accept = &req->accept;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -5737,10 +5744,13 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct file *file;
int ret, fd;
+retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
- if (unlikely(fd < 0))
+ if (unlikely(fd < 0)) {
+ io_poll_clean(req);
return fd;
+ }
}
file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
accept->flags);
@@ -5748,8 +5758,12 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
- if (ret == -EAGAIN && force_nonblock)
- return -EAGAIN;
+ if (ret == -EAGAIN && force_nonblock) {
+ if ((req->flags & REQ_F_APOLL_MULTI_POLLED) ==
+ REQ_F_APOLL_MULTI_POLLED)
+ ret = 0;
+ return ret;
+ }
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
@@ -5760,7 +5774,35 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
ret = io_install_fixed_file(req, file, issue_flags,
accept->file_slot - 1);
}
- __io_req_complete(req, issue_flags, ret, 0);
+
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+ if (ret >= 0) {
+ bool filled;
+
+ spin_lock(&ctx->completion_lock);
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+ IORING_CQE_F_MORE);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ if (unlikely(!filled)) {
+ io_poll_clean(req);
+ return -ECANCELED;
+ }
+ io_cqring_ev_posted(ctx);
+ goto retry;
+ } else {
+ /*
+ * the apoll multishot req should handle poll
+ * cancellation by itself since the upper layer
+ * who called io_queue_sqe() cannot get errors
+ * happened here.
+ */
+ io_poll_clean(req);
+ return ret;
+ }
+ } else {
+ __io_req_complete(req, issue_flags, ret, 0);
+ }
return 0;
}
--
2.36.0
next prev parent reply other threads:[~2022-05-06 7:01 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-06 7:00 [PATCH v2 0/5] fast poll multishot mode Hao Xu
2022-05-06 7:00 ` [PATCH 1/5] io_uring: add IORING_ACCEPT_MULTISHOT for accept Hao Xu
2022-05-06 14:32 ` Jens Axboe
2022-05-07 4:05 ` Hao Xu
2022-05-06 7:00 ` [PATCH 2/5] io_uring: add REQ_F_APOLL_MULTISHOT for requests Hao Xu
2022-05-06 7:01 ` [PATCH 3/5] io_uring: let fast poll support multishot Hao Xu
2022-05-06 17:19 ` Pavel Begunkov
2022-05-06 22:02 ` Jens Axboe
2022-05-07 6:32 ` Hao Xu
2022-05-07 9:26 ` Pavel Begunkov
2022-05-07 7:08 ` Hao Xu
2022-05-07 9:47 ` Pavel Begunkov
2022-05-07 11:06 ` Hao Xu
2022-05-06 18:02 ` kernel test robot
2022-05-06 7:01 ` [PATCH 4/5] io_uring: add a helper for poll clean Hao Xu
2022-05-06 11:04 ` kernel test robot
2022-05-06 12:47 ` kernel test robot
2022-05-06 14:36 ` Jens Axboe
2022-05-07 6:37 ` Hao Xu
2022-05-06 16:22 ` Pavel Begunkov
2022-05-07 6:43 ` Hao Xu
2022-05-07 9:29 ` Pavel Begunkov
2022-05-06 7:01 ` Hao Xu [this message]
2022-05-06 14:42 ` [PATCH 5/5] io_uring: implement multishot mode for accept Jens Axboe
2022-05-07 9:13 ` Hao Xu
2022-05-06 20:50 ` Jens Axboe
2022-05-06 21:29 ` Jens Axboe
2022-05-06 7:36 ` [PATCH v2 0/5] fast poll multishot mode Hao Xu
2022-05-06 14:18 ` Jens Axboe
2022-05-06 16:01 ` Pavel Begunkov
2022-05-06 16:03 ` Jens Axboe
2022-05-06 22:23 ` Jens Axboe
2022-05-06 23:26 ` Jens Axboe
2022-05-07 2:33 ` Jens Axboe
2022-05-07 3:08 ` Jens Axboe
2022-05-07 16:01 ` Hao Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox