* [PATCH v2 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
@ 2020-05-27 16:49 Xiaoguang Wang
2020-05-27 16:49 ` [PATCH v2 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
0 siblings, 1 reply; 2+ messages in thread
From: Xiaoguang Wang @ 2020-05-27 16:49 UTC (permalink / raw)
To: io-uring; +Cc: axboe, asml.silence, joseph.qi, Xiaoguang Wang
If requests can be submitted and completed inline, we don't need to
initialize whole io_wq_work in io_init_req(), which is an expensive
operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
io_wq_work is initialized.
I use /dev/nullb0 to evaluate performance improvement in my physical
machine:
modprobe null_blk nr_devices=1 completion_nsec=0
sudo taskset -c 60 fio -name=fiotest -filename=/dev/nullb0 -iodepth=128
-thread -rw=read -ioengine=io_uring -direct=1 -bs=4k -size=100G -numjobs=1
-time_based -runtime=120
before this patch:
Run status group 0 (all jobs):
READ: bw=724MiB/s (759MB/s), 724MiB/s-724MiB/s (759MB/s-759MB/s),
io=84.8GiB (91.1GB), run=120001-120001msec
With this patch:
Run status group 0 (all jobs):
READ: bw=761MiB/s (798MB/s), 761MiB/s-761MiB/s (798MB/s-798MB/s),
io=89.2GiB (95.8GB), run=120001-120001msec
About 5% improvement.
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/io-wq.h | 5 ----
fs/io_uring.c | 78 ++++++++++++++++++++++++++++++++++++++++-----------
2 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 5ba12de7572f..3d85d365d764 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -94,11 +94,6 @@ struct io_wq_work {
pid_t task_pid;
};
-#define INIT_IO_WORK(work, _func) \
- do { \
- *(work) = (struct io_wq_work){ .func = _func }; \
- } while (0) \
-
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{
if (!work->list.next)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2af87f73848e..7ba8590a45a6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -535,6 +535,7 @@ enum {
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
+ REQ_F_WORK_INITIALIZED_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -590,6 +591,8 @@ enum {
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
/* doesn't need file table for this request */
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
+ /* io_wq_work is initialized */
+ REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
};
struct async_poll {
@@ -635,6 +638,7 @@ struct io_kiocb {
unsigned int flags;
refcount_t refs;
struct task_struct *task;
+ const struct cred *creds;
unsigned long fsize;
u64 user_data;
u32 result;
@@ -882,6 +886,12 @@ static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
+static inline void init_io_work(struct io_kiocb *req,
+ void (*func)(struct io_wq_work **))
+{
+ req->work = (struct io_wq_work){ .func = func };
+ req->flags |= REQ_F_WORK_INITIALIZED;
+}
struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
@@ -1035,8 +1045,15 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
mmgrab(current->mm);
req->work.mm = current->mm;
}
- if (!req->work.creds)
- req->work.creds = get_current_cred();
+
+ if (!req->work.creds) {
+ if (!req->creds)
+ req->work.creds = get_current_cred();
+ else {
+ req->work.creds = req->creds;
+ req->creds = NULL;
+ }
+ }
if (!req->work.fs && def->needs_fs) {
spin_lock(¤t->fs->lock);
if (!current->fs->in_exec) {
@@ -1053,6 +1070,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
static inline void io_req_work_drop_env(struct io_kiocb *req)
{
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ return;
+
if (req->work.mm) {
mmdrop(req->work.mm);
req->work.mm = NULL;
@@ -2923,7 +2943,10 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock)
{
/* fsync always requires a blocking context */
if (force_nonblock) {
- req->work.func = io_fsync_finish;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_fsync_finish);
+ else
+ req->work.func = io_fsync_finish;
return -EAGAIN;
}
__io_fsync(req);
@@ -2971,7 +2994,10 @@ static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
{
/* fallocate always requiring blocking context */
if (force_nonblock) {
- req->work.func = io_fallocate_finish;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_fallocate_finish);
+ else
+ req->work.func = io_fallocate_finish;
return -EAGAIN;
}
@@ -3500,7 +3526,10 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
/* submission ref will be dropped, take it for async */
refcount_inc(&req->refs);
- req->work.func = io_close_finish;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_close_finish);
+ else
+ req->work.func = io_close_finish;
/*
* Do manual async queue here to avoid grabbing files - we don't
* need the files, and it'll cause io_close_finish() to close
@@ -3563,7 +3592,10 @@ static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
- req->work.func = io_sync_file_range_finish;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_sync_file_range_finish);
+ else
+ req->work.func = io_sync_file_range_finish;
return -EAGAIN;
}
@@ -4032,7 +4064,10 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
ret = __io_accept(req, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
- req->work.func = io_accept_finish;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_accept_finish);
+ else
+ req->work.func = io_accept_finish;
return -EAGAIN;
}
return 0;
@@ -5032,6 +5067,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (!sqe)
return 0;
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_wq_submit_work);
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@@ -5667,19 +5705,24 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt;
- const struct cred *old_creds = NULL;
+ const struct cred *creds, *old_creds = NULL;
int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
- if (req->work.creds && req->work.creds != current_cred()) {
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ creds = req->work.creds;
+ else
+ creds = req->creds;
+
+ if (creds && creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
- if (old_creds == req->work.creds)
+ if (old_creds == creds)
old_creds = NULL; /* restored original creds */
else
- old_creds = override_creds(req->work.creds);
+ old_creds = override_creds(creds);
}
ret = io_issue_sqe(req, sqe, true);
@@ -5696,6 +5739,9 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
goto exit;
}
punt:
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ init_io_work(req, io_wq_submit_work);
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (ret)
@@ -5948,7 +5994,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
refcount_set(&req->refs, 2);
req->task = NULL;
req->result = 0;
- INIT_IO_WORK(&req->work, io_wq_submit_work);
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
@@ -5970,11 +6015,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality);
if (id) {
- req->work.creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!req->work.creds))
+ req->creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!req->creds))
return -EINVAL;
- get_cred(req->work.creds);
- }
+ get_cred(req->creds);
+ } else
+ req->creds = NULL;
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags |= sqe_flags;
--
2.17.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH v2 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature
2020-05-27 16:49 [PATCH v2 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
@ 2020-05-27 16:49 ` Xiaoguang Wang
0 siblings, 0 replies; 2+ messages in thread
From: Xiaoguang Wang @ 2020-05-27 16:49 UTC (permalink / raw)
To: io-uring; +Cc: axboe, asml.silence, joseph.qi, Xiaoguang Wang
Basically IORING_OP_POLL_ADD command and async armed poll handlers
for regular commands don't touch io_wq_work, so there is no need to
always do io_wq_work copy. Here add a new flag 'REQ_F_WORK_NEED_RESTORE'
to control whether to do io_wq_work copy.
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/io_uring.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7ba8590a45a6..a704fc93a81d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -536,6 +536,7 @@ enum {
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_WORK_INITIALIZED_BIT,
+ REQ_F_WORK_NEED_RESTORE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -593,6 +594,8 @@ enum {
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
+ /* need restore io_wq_work */
+ REQ_F_WORK_NEED_RESTORE = BIT(REQ_F_WORK_NEED_RESTORE_BIT),
};
struct async_poll {
@@ -4411,7 +4414,8 @@ static void io_async_task_func(struct callback_head *cb)
spin_unlock_irq(&ctx->completion_lock);
/* restore ->work in case we need to retry again */
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_NEED_RESTORE)
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll);
if (!canceled) {
@@ -4508,7 +4512,10 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
return false;
req->flags |= REQ_F_POLLED;
- memcpy(&apoll->work, &req->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_INITIALIZED) {
+ req->flags |= REQ_F_WORK_NEED_RESTORE;
+ memcpy(&apoll->work, &req->work, sizeof(req->work));
+ }
had_io = req->io != NULL;
get_task_struct(current);
@@ -4533,7 +4540,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
if (!had_io)
io_poll_remove_double(req);
spin_unlock_irq(&ctx->completion_lock);
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_NEED_RESTORE)
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll);
return false;
}
@@ -4578,7 +4586,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
* io_req_work_drop_env below when dropping the
* final reference.
*/
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_NEED_RESTORE)
+ memcpy(&req->work, &apoll->work,
+ sizeof(req->work));
kfree(apoll);
}
}
--
2.17.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-05-27 16:49 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-05-27 16:49 [PATCH v2 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
2020-05-27 16:49 ` [PATCH v2 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox