* [PATCH v2 0/9] scrap 24 bytes from io_kiocb
@ 2020-07-13 20:37 Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
Make io_kiocb slimmer by 24 bytes mainly by revising lists usage. The
drawback is adding extra kmalloc in draining path, but that's a slow
path, so meh. It also frees some space for the deferred completion path
if would be needed in the future, but the main idea here is to shrink it
to 3 cachelines in the end.
This depends on "rw iovec copy cleanup" series
v2: [1] invalid kfree() in the end of io_{write,read}()
handled by "rw iovec copy cleanup" series
[1] rename io_cleanup_req() -> __io_clean_op()
[3] rename iopoll_list -> inflight_entry (Jens)
[8] correct sequence types,
[8] return back sequence-based fast check in defer
Pavel Begunkov (9):
io_uring: share completion list w/ per-op space
io_uring: rename ctx->poll into ctx->iopoll
io_uring: use inflight_entry list for iopoll'ing
io_uring: use completion list for CQ overflow
io_uring: add req->timeout.list
io_uring: remove init for unused list
io_uring: use non-intrusive list for defer
io_uring: remove sequence from io_kiocb
io_uring: place cflags into completion data
fs/io_uring.c | 198 +++++++++++++++++++++++++++++++-------------------
1 file changed, 122 insertions(+), 76 deletions(-)
--
2.24.0
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 1/9] io_uring: share completion list w/ per-op space
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll Pavel Begunkov
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
Calling io_req_complete(req) means that the request is done, and there
nothing left but to clean it up. That also means that per-op data
after that should not be used, so we're free to reuse it in completion
path, e.g. to store overflow_list as done in this patch.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0b9c0333d8c0..b60307a69599 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -487,6 +487,11 @@ struct io_statx {
struct statx __user *buffer;
};
+struct io_completion {
+ struct file *file;
+ struct list_head list;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -621,6 +626,8 @@ struct io_kiocb {
struct io_splice splice;
struct io_provide_buf pbuf;
struct io_statx statx;
+ /* use only after cleaning per-op data, see io_clean_op() */
+ struct io_completion compl;
};
struct io_async_ctx *io;
@@ -895,7 +902,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_complete_rw_common(struct kiocb *kiocb, long res,
struct io_comp_state *cs);
-static void io_cleanup_req(struct io_kiocb *req);
+static void __io_clean_op(struct io_kiocb *req);
static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
int fd, struct file **out_file, bool fixed);
static void __io_queue_sqe(struct io_kiocb *req,
@@ -935,6 +942,12 @@ static void io_get_req_task(struct io_kiocb *req)
req->flags |= REQ_F_TASK_PINNED;
}
+static inline void io_clean_op(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ __io_clean_op(req);
+}
+
/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
static void __io_put_req_task(struct io_kiocb *req)
{
@@ -1412,8 +1425,8 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
while (!list_empty(&cs->list)) {
struct io_kiocb *req;
- req = list_first_entry(&cs->list, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
+ list_del(&req->compl.list);
__io_cqring_fill_event(req, req->result, req->cflags);
if (!(req->flags & REQ_F_LINK_HEAD)) {
req->flags |= REQ_F_COMP_LOCKED;
@@ -1438,9 +1451,10 @@ static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
io_cqring_add_event(req, res, cflags);
io_put_req(req);
} else {
+ io_clean_op(req);
req->result = res;
req->cflags = cflags;
- list_add_tail(&req->list, &cs->list);
+ list_add_tail(&req->compl.list, &cs->list);
if (++cs->nr >= 32)
io_submit_flush_completions(cs);
}
@@ -1514,8 +1528,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
static void io_dismantle_req(struct io_kiocb *req)
{
- if (req->flags & REQ_F_NEED_CLEANUP)
- io_cleanup_req(req);
+ io_clean_op(req);
if (req->io)
kfree(req->io);
@@ -5380,7 +5393,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EIOCBQUEUED;
}
-static void io_cleanup_req(struct io_kiocb *req)
+static void __io_clean_op(struct io_kiocb *req)
{
struct io_async_ctx *io = req->io;
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing Pavel Begunkov
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
It supports both polling and I/O polling. Rename ctx->poll to clearly
show that it's only in I/O poll case.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b60307a69599..03bb8a69d09c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -320,12 +320,12 @@ struct io_ring_ctx {
spinlock_t completion_lock;
/*
- * ->poll_list is protected by the ctx->uring_lock for
+ * ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
* For SQPOLL, only the single threaded io_sq_thread() will
* manipulate the list, hence no extra locking is needed there.
*/
- struct list_head poll_list;
+ struct list_head iopoll_list;
struct hlist_head *cancel_hash;
unsigned cancel_hash_bits;
bool poll_multi_file;
@@ -1063,7 +1063,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
- INIT_LIST_HEAD(&ctx->poll_list);
+ INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
init_waitqueue_head(&ctx->inflight_wait);
@@ -2008,7 +2008,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
spin = !ctx->poll_multi_file && *nr_events < min;
ret = 0;
- list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
+ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, list) {
struct kiocb *kiocb = &req->rw.kiocb;
/*
@@ -2050,7 +2050,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
long min)
{
- while (!list_empty(&ctx->poll_list) && !need_resched()) {
+ while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
int ret;
ret = io_do_iopoll(ctx, nr_events, min);
@@ -2073,7 +2073,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
return;
mutex_lock(&ctx->uring_lock);
- while (!list_empty(&ctx->poll_list)) {
+ while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
io_do_iopoll(ctx, &nr_events, 0);
@@ -2290,12 +2290,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* how we do polling eventually, not spinning if we're on potentially
* different devices.
*/
- if (list_empty(&ctx->poll_list)) {
+ if (list_empty(&ctx->iopoll_list)) {
ctx->poll_multi_file = false;
} else if (!ctx->poll_multi_file) {
struct io_kiocb *list_req;
- list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
+ list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
list);
if (list_req->file != req->file)
ctx->poll_multi_file = true;
@@ -2306,9 +2306,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* it to the front so we find it first.
*/
if (READ_ONCE(req->iopoll_completed))
- list_add(&req->list, &ctx->poll_list);
+ list_add(&req->list, &ctx->iopoll_list);
else
- list_add_tail(&req->list, &ctx->poll_list);
+ list_add_tail(&req->list, &ctx->iopoll_list);
if ((ctx->flags & IORING_SETUP_SQPOLL) &&
wq_has_sleeper(&ctx->sqo_wait))
@@ -6306,11 +6306,11 @@ static int io_sq_thread(void *data)
while (!kthread_should_park()) {
unsigned int to_submit;
- if (!list_empty(&ctx->poll_list)) {
+ if (!list_empty(&ctx->iopoll_list)) {
unsigned nr_events = 0;
mutex_lock(&ctx->uring_lock);
- if (!list_empty(&ctx->poll_list) && !need_resched())
+ if (!list_empty(&ctx->iopoll_list) && !need_resched())
io_do_iopoll(ctx, &nr_events, 0);
else
timeout = jiffies + ctx->sq_thread_idle;
@@ -6339,7 +6339,7 @@ static int io_sq_thread(void *data)
* more IO, we should wait for the application to
* reap events and wake us up.
*/
- if (!list_empty(&ctx->poll_list) || need_resched() ||
+ if (!list_empty(&ctx->iopoll_list) || need_resched() ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
io_run_task_work();
@@ -6352,13 +6352,13 @@ static int io_sq_thread(void *data)
/*
* While doing polled IO, before going to sleep, we need
- * to check if there are new reqs added to poll_list, it
- * is because reqs may have been punted to io worker and
- * will be added to poll_list later, hence check the
- * poll_list again.
+ * to check if there are new reqs added to iopoll_list,
+ * it is because reqs may have been punted to io worker
+ * and will be added to iopoll_list later, hence check
+ * the iopoll_list again.
*/
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
- !list_empty_careful(&ctx->poll_list)) {
+ !list_empty_careful(&ctx->iopoll_list)) {
finish_wait(&ctx->sqo_wait, &wait);
continue;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 4/9] io_uring: use completion list for CQ overflow Pavel Begunkov
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->inflight_entry is used to track requests that grabbed files_struct.
Let's share it with iopoll list, because the only iopoll'ed ops are
reads and writes, which don't need a file table.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 03bb8a69d09c..1ad6d47a6223 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -650,6 +650,10 @@ struct io_kiocb {
struct list_head link_list;
+ /*
+ * 1. used with ctx->iopoll_list with reads/writes
+ * 2. to track reqs with ->files (see io_op_def::file_table)
+ */
struct list_head inflight_entry;
struct percpu_ref *fixed_file_refs;
@@ -1942,8 +1946,8 @@ static void io_iopoll_queue(struct list_head *again)
struct io_kiocb *req;
do {
- req = list_first_entry(again, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(again, struct io_kiocb, inflight_entry);
+ list_del(&req->inflight_entry);
if (!io_rw_reissue(req, -EAGAIN))
io_complete_rw_common(&req->rw.kiocb, -EAGAIN, NULL);
} while (!list_empty(again));
@@ -1966,13 +1970,13 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
while (!list_empty(done)) {
int cflags = 0;
- req = list_first_entry(done, struct io_kiocb, list);
+ req = list_first_entry(done, struct io_kiocb, inflight_entry);
if (READ_ONCE(req->result) == -EAGAIN) {
req->iopoll_completed = 0;
- list_move_tail(&req->list, &again);
+ list_move_tail(&req->inflight_entry, &again);
continue;
}
- list_del(&req->list);
+ list_del(&req->inflight_entry);
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_kbuf(req);
@@ -2008,7 +2012,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
spin = !ctx->poll_multi_file && *nr_events < min;
ret = 0;
- list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, list) {
+ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
struct kiocb *kiocb = &req->rw.kiocb;
/*
@@ -2017,7 +2021,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
* and complete those lists first, if we have entries there.
*/
if (READ_ONCE(req->iopoll_completed)) {
- list_move_tail(&req->list, &done);
+ list_move_tail(&req->inflight_entry, &done);
continue;
}
if (!list_empty(&done))
@@ -2029,7 +2033,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
/* iopoll may have completed current req */
if (READ_ONCE(req->iopoll_completed))
- list_move_tail(&req->list, &done);
+ list_move_tail(&req->inflight_entry, &done);
if (ret && spin)
spin = false;
@@ -2296,7 +2300,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
struct io_kiocb *list_req;
list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
- list);
+ inflight_entry);
if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -2306,9 +2310,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* it to the front so we find it first.
*/
if (READ_ONCE(req->iopoll_completed))
- list_add(&req->list, &ctx->iopoll_list);
+ list_add(&req->inflight_entry, &ctx->iopoll_list);
else
- list_add_tail(&req->list, &ctx->iopoll_list);
+ list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
if ((ctx->flags & IORING_SETUP_SQPOLL) &&
wq_has_sleeper(&ctx->sqo_wait))
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 4/9] io_uring: use completion list for CQ overflow
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (2 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 5/9] io_uring: add req->timeout.list Pavel Begunkov
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
As with the completion path, also use compl.list for overflowed
requests. If cleaned up properly, nobody needs per-op data there
anymore.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1ad6d47a6223..584ff83cf0a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1338,8 +1338,8 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
break;
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
- list);
- list_move(&req->list, &list);
+ compl.list);
+ list_move(&req->compl.list, &list);
req->flags &= ~REQ_F_OVERFLOW;
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
@@ -1361,8 +1361,8 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
io_cqring_ev_posted(ctx);
while (!list_empty(&list)) {
- req = list_first_entry(&list, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(&list, struct io_kiocb, compl.list);
+ list_del(&req->compl.list);
io_put_req(req);
}
@@ -1395,11 +1395,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
set_bit(0, &ctx->cq_check_overflow);
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
}
+ io_clean_op(req);
req->flags |= REQ_F_OVERFLOW;
- refcount_inc(&req->refs);
req->result = res;
req->cflags = cflags;
- list_add_tail(&req->list, &ctx->cq_overflow_list);
+ refcount_inc(&req->refs);
+ list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
}
}
@@ -7812,7 +7813,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (cancel_req->flags & REQ_F_OVERFLOW) {
spin_lock_irq(&ctx->completion_lock);
- list_del(&cancel_req->list);
+ list_del(&cancel_req->compl.list);
cancel_req->flags &= ~REQ_F_OVERFLOW;
if (list_empty(&ctx->cq_overflow_list)) {
clear_bit(0, &ctx->sq_check_overflow);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 5/9] io_uring: add req->timeout.list
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (3 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 4/9] io_uring: use completion list for CQ overflow Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 6/9] io_uring: remove init for unused list Pavel Begunkov
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
Instead of using shared req->list, hang timeouts up on their own list
entry. struct io_timeout have enough extra space for it, but if that
will be a problem ->inflight_entry can reused for that.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 584ff83cf0a4..b9bd2002db98 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -396,6 +396,7 @@ struct io_timeout {
int flags;
u32 off;
u32 target_seq;
+ struct list_head list;
};
struct io_rw {
@@ -1212,7 +1213,7 @@ static void io_kill_timeout(struct io_kiocb *req)
ret = hrtimer_try_to_cancel(&req->io->timeout.timer);
if (ret != -1) {
atomic_inc(&req->ctx->cq_timeouts);
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0);
io_put_req(req);
@@ -1224,7 +1225,7 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
struct io_kiocb *req, *tmp;
spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list)
io_kill_timeout(req);
spin_unlock_irq(&ctx->completion_lock);
}
@@ -1247,7 +1248,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
{
while (!list_empty(&ctx->timeout_list)) {
struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
- struct io_kiocb, list);
+ struct io_kiocb, timeout.list);
if (io_is_timeout_noseq(req))
break;
@@ -1255,7 +1256,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
- atomic_read(&ctx->cq_timeouts))
break;
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
io_kill_timeout(req);
}
}
@@ -4980,8 +4981,8 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
* We could be racing with timeout deletion. If the list is empty,
* then timeout lookup already found it and will be handling it.
*/
- if (!list_empty(&req->list))
- list_del_init(&req->list);
+ if (!list_empty(&req->timeout.list))
+ list_del_init(&req->timeout.list);
io_cqring_fill_event(req, -ETIME);
io_commit_cqring(ctx);
@@ -4998,9 +4999,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
struct io_kiocb *req;
int ret = -ENOENT;
- list_for_each_entry(req, &ctx->timeout_list, list) {
+ list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
if (user_data == req->user_data) {
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
ret = 0;
break;
}
@@ -5120,7 +5121,8 @@ static int io_timeout(struct io_kiocb *req)
* the one we need first.
*/
list_for_each_prev(entry, &ctx->timeout_list) {
- struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
+ timeout.list);
if (io_is_timeout_noseq(nxt))
continue;
@@ -5129,7 +5131,7 @@ static int io_timeout(struct io_kiocb *req)
break;
}
add:
- list_add(&req->list, entry);
+ list_add(&req->timeout.list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
spin_unlock_irq(&ctx->completion_lock);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 6/9] io_uring: remove init for unused list
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (4 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 5/9] io_uring: add req->timeout.list Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 7/9] io_uring: use non-intrusive list for defer Pavel Begunkov
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
poll*() doesn't use req->list, don't init it.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b9bd2002db98..b789edbe4339 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4947,7 +4947,6 @@ static int io_poll_add(struct io_kiocb *req)
__poll_t mask;
INIT_HLIST_NODE(&req->hash_node);
- INIT_LIST_HEAD(&req->list);
ipt.pt._qproc = io_poll_queue_proc;
mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 7/9] io_uring: use non-intrusive list for defer
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (5 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 6/9] io_uring: remove init for unused list Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 8/9] io_uring: remove sequence from io_kiocb Pavel Begunkov
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
The only left user of req->list is DRAIN, hence instead of keeping a
separate per request list for it, do that with old fashion non-intrusive
lists allocated on demand. That's a really slow path, so that's OK.
This removes req->list and so sheds 16 bytes from io_kiocb.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b789edbe4339..672eb57565dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -640,7 +640,6 @@ struct io_kiocb {
u16 buf_index;
struct io_ring_ctx *ctx;
- struct list_head list;
unsigned int flags;
refcount_t refs;
struct task_struct *task;
@@ -675,6 +674,11 @@ struct io_kiocb {
struct callback_head task_work;
};
+struct io_defer_entry {
+ struct list_head list;
+ struct io_kiocb *req;
+};
+
#define IO_IOPOLL_BATCH 8
struct io_comp_state {
@@ -1233,14 +1237,15 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
do {
- struct io_kiocb *req = list_first_entry(&ctx->defer_list,
- struct io_kiocb, list);
+ struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
+ struct io_defer_entry, list);
- if (req_need_defer(req))
+ if (req_need_defer(de->req))
break;
- list_del_init(&req->list);
+ list_del_init(&de->list);
/* punt-init is done before queueing for defer */
- __io_queue_async_work(req);
+ __io_queue_async_work(de->req);
+ kfree(de);
} while (!list_empty(&ctx->defer_list));
}
@@ -5372,6 +5377,7 @@ static int io_req_defer_prep(struct io_kiocb *req,
static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
+ struct io_defer_entry *de;
int ret;
/* Still need defer if there is pending req in defer list. */
@@ -5386,15 +5392,20 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
io_prep_async_link(req);
+ de = kmalloc(sizeof(*de), GFP_KERNEL);
+ if (!de)
+ return -ENOMEM;
spin_lock_irq(&ctx->completion_lock);
if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
spin_unlock_irq(&ctx->completion_lock);
+ kfree(de);
return 0;
}
trace_io_uring_defer(ctx, req, req->user_data);
- list_add_tail(&req->list, &ctx->defer_list);
+ de->req = req;
+ list_add_tail(&de->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
return -EIOCBQUEUED;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 8/9] io_uring: remove sequence from io_kiocb
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (6 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 7/9] io_uring: use non-intrusive list for defer Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 9/9] io_uring: place cflags into completion data Pavel Begunkov
2020-07-13 21:10 ` [PATCH v2 0/9] scrap 24 bytes from io_kiocb Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->sequence is used only for deferred (i.e. DRAIN) requests, but
initialised for every request. Remove req->sequence from io_kiocb
together with its initialisation in io_init_req().
Replace it with a new field in struct io_defer_entry, that will be
calculated only when needed in io_req_defer(), which is a slow path.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 672eb57565dc..e70129fac6db 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -638,6 +638,7 @@ struct io_kiocb {
u8 iopoll_completed;
u16 buf_index;
+ u32 result;
struct io_ring_ctx *ctx;
unsigned int flags;
@@ -645,8 +646,6 @@ struct io_kiocb {
struct task_struct *task;
unsigned long fsize;
u64 user_data;
- u32 result;
- u32 sequence;
struct list_head link_list;
@@ -677,6 +676,7 @@ struct io_kiocb {
struct io_defer_entry {
struct list_head list;
struct io_kiocb *req;
+ u32 seq;
};
#define IO_IOPOLL_BATCH 8
@@ -1089,13 +1089,13 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
return NULL;
}
-static inline bool req_need_defer(struct io_kiocb *req)
+static bool req_need_defer(struct io_kiocb *req, u32 seq)
{
if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
struct io_ring_ctx *ctx = req->ctx;
- return req->sequence != ctx->cached_cq_tail
- + atomic_read(&ctx->cached_cq_overflow);
+ return seq != ctx->cached_cq_tail
+ + atomic_read(&ctx->cached_cq_overflow);
}
return false;
@@ -1240,7 +1240,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
- if (req_need_defer(de->req))
+ if (req_need_defer(de->req, de->seq))
break;
list_del_init(&de->list);
/* punt-init is done before queueing for defer */
@@ -5374,14 +5374,35 @@ static int io_req_defer_prep(struct io_kiocb *req,
return ret;
}
+static u32 io_get_sequence(struct io_kiocb *req)
+{
+ struct io_kiocb *pos;
+ struct io_ring_ctx *ctx = req->ctx;
+ u32 total_submitted, nr_reqs = 1;
+
+ if (req->flags & REQ_F_LINK_HEAD)
+ list_for_each_entry(pos, &req->link_list, link_list)
+ nr_reqs++;
+
+ total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
+ return total_submitted - nr_reqs;
+}
+
static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_defer_entry *de;
int ret;
+ u32 seq;
/* Still need defer if there is pending req in defer list. */
- if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list))
+ if (likely(list_empty_careful(&ctx->defer_list) &&
+ !(req->flags & REQ_F_IO_DRAIN)))
+ return 0;
+
+ seq = io_get_sequence(req);
+ /* Still a chance to pass the sequence check */
+ if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
return 0;
if (!req->io) {
@@ -5397,7 +5418,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -ENOMEM;
spin_lock_irq(&ctx->completion_lock);
- if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
+ if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
spin_unlock_irq(&ctx->completion_lock);
kfree(de);
return 0;
@@ -5405,6 +5426,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
trace_io_uring_defer(ctx, req, req->user_data);
de->req = req;
+ de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
return -EIOCBQUEUED;
@@ -6181,12 +6203,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
unsigned int sqe_flags;
int id;
- /*
- * All io need record the previous position, if LINK vs DARIN,
- * it can be used to mark the position of the first IO in the
- * link list.
- */
- req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped;
req->opcode = READ_ONCE(sqe->opcode);
req->user_data = READ_ONCE(sqe->user_data);
req->io = NULL;
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 9/9] io_uring: place cflags into completion data
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (7 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 8/9] io_uring: remove sequence from io_kiocb Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 21:10 ` [PATCH v2 0/9] scrap 24 bytes from io_kiocb Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->cflags is used only for defer-completion path, just use
completion data to store it. With the 4 bytes from the ->sequence
patch and compacting io_kiocb, this frees 8 bytes.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e70129fac6db..7038c4f08805 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -491,6 +491,7 @@ struct io_statx {
struct io_completion {
struct file *file;
struct list_head list;
+ int cflags;
};
struct io_async_connect {
@@ -632,7 +633,6 @@ struct io_kiocb {
};
struct io_async_ctx *io;
- int cflags;
u8 opcode;
/* polled IO has completed */
u8 iopoll_completed;
@@ -1350,7 +1350,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result);
- WRITE_ONCE(cqe->flags, req->cflags);
+ WRITE_ONCE(cqe->flags, req->compl.cflags);
} else {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1404,7 +1404,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
io_clean_op(req);
req->flags |= REQ_F_OVERFLOW;
req->result = res;
- req->cflags = cflags;
+ req->compl.cflags = cflags;
refcount_inc(&req->refs);
list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
}
@@ -1438,7 +1438,7 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
list_del(&req->compl.list);
- __io_cqring_fill_event(req, req->result, req->cflags);
+ __io_cqring_fill_event(req, req->result, req->compl.cflags);
if (!(req->flags & REQ_F_LINK_HEAD)) {
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
@@ -1464,7 +1464,7 @@ static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
} else {
io_clean_op(req);
req->result = res;
- req->cflags = cflags;
+ req->compl.cflags = cflags;
list_add_tail(&req->compl.list, &cs->list);
if (++cs->nr >= 32)
io_submit_flush_completions(cs);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 0/9] scrap 24 bytes from io_kiocb
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (8 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 9/9] io_uring: place cflags into completion data Pavel Begunkov
@ 2020-07-13 21:10 ` Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2020-07-13 21:10 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 2:37 PM, Pavel Begunkov wrote:
> Make io_kiocb slimmer by 24 bytes mainly by revising lists usage. The
> drawback is adding extra kmalloc in draining path, but that's a slow
> path, so meh. It also frees some space for the deferred completion path
> if would be needed in the future, but the main idea here is to shrink it
> to 3 cachelines in the end.
>
> This depends on "rw iovec copy cleanup" series
This looks really good to me, and also tests out good - both in terms
of functionality and KASAN, but also increases performance slightly
as expected. Applied, thanks!
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2020-07-13 21:10 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 4/9] io_uring: use completion list for CQ overflow Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 5/9] io_uring: add req->timeout.list Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 6/9] io_uring: remove init for unused list Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 7/9] io_uring: use non-intrusive list for defer Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 8/9] io_uring: remove sequence from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 9/9] io_uring: place cflags into completion data Pavel Begunkov
2020-07-13 21:10 ` [PATCH v2 0/9] scrap 24 bytes from io_kiocb Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox