* [PATCH v2 1/9] io_uring: share completion list w/ per-op space
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll Pavel Begunkov
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
Calling io_req_complete(req) means that the request is done, and there
nothing left but to clean it up. That also means that per-op data
after that should not be used, so we're free to reuse it in completion
path, e.g. to store overflow_list as done in this patch.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0b9c0333d8c0..b60307a69599 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -487,6 +487,11 @@ struct io_statx {
struct statx __user *buffer;
};
+struct io_completion {
+ struct file *file;
+ struct list_head list;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -621,6 +626,8 @@ struct io_kiocb {
struct io_splice splice;
struct io_provide_buf pbuf;
struct io_statx statx;
+ /* use only after cleaning per-op data, see io_clean_op() */
+ struct io_completion compl;
};
struct io_async_ctx *io;
@@ -895,7 +902,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_complete_rw_common(struct kiocb *kiocb, long res,
struct io_comp_state *cs);
-static void io_cleanup_req(struct io_kiocb *req);
+static void __io_clean_op(struct io_kiocb *req);
static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
int fd, struct file **out_file, bool fixed);
static void __io_queue_sqe(struct io_kiocb *req,
@@ -935,6 +942,12 @@ static void io_get_req_task(struct io_kiocb *req)
req->flags |= REQ_F_TASK_PINNED;
}
+static inline void io_clean_op(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ __io_clean_op(req);
+}
+
/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
static void __io_put_req_task(struct io_kiocb *req)
{
@@ -1412,8 +1425,8 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
while (!list_empty(&cs->list)) {
struct io_kiocb *req;
- req = list_first_entry(&cs->list, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
+ list_del(&req->compl.list);
__io_cqring_fill_event(req, req->result, req->cflags);
if (!(req->flags & REQ_F_LINK_HEAD)) {
req->flags |= REQ_F_COMP_LOCKED;
@@ -1438,9 +1451,10 @@ static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
io_cqring_add_event(req, res, cflags);
io_put_req(req);
} else {
+ io_clean_op(req);
req->result = res;
req->cflags = cflags;
- list_add_tail(&req->list, &cs->list);
+ list_add_tail(&req->compl.list, &cs->list);
if (++cs->nr >= 32)
io_submit_flush_completions(cs);
}
@@ -1514,8 +1528,7 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
static void io_dismantle_req(struct io_kiocb *req)
{
- if (req->flags & REQ_F_NEED_CLEANUP)
- io_cleanup_req(req);
+ io_clean_op(req);
if (req->io)
kfree(req->io);
@@ -5380,7 +5393,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EIOCBQUEUED;
}
-static void io_cleanup_req(struct io_kiocb *req)
+static void __io_clean_op(struct io_kiocb *req)
{
struct io_async_ctx *io = req->io;
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing Pavel Begunkov
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
It supports both polling and I/O polling. Rename ctx->poll to clearly
show that it's only in I/O poll case.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b60307a69599..03bb8a69d09c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -320,12 +320,12 @@ struct io_ring_ctx {
spinlock_t completion_lock;
/*
- * ->poll_list is protected by the ctx->uring_lock for
+ * ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
* For SQPOLL, only the single threaded io_sq_thread() will
* manipulate the list, hence no extra locking is needed there.
*/
- struct list_head poll_list;
+ struct list_head iopoll_list;
struct hlist_head *cancel_hash;
unsigned cancel_hash_bits;
bool poll_multi_file;
@@ -1063,7 +1063,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
- INIT_LIST_HEAD(&ctx->poll_list);
+ INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
init_waitqueue_head(&ctx->inflight_wait);
@@ -2008,7 +2008,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
spin = !ctx->poll_multi_file && *nr_events < min;
ret = 0;
- list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
+ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, list) {
struct kiocb *kiocb = &req->rw.kiocb;
/*
@@ -2050,7 +2050,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
long min)
{
- while (!list_empty(&ctx->poll_list) && !need_resched()) {
+ while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
int ret;
ret = io_do_iopoll(ctx, nr_events, min);
@@ -2073,7 +2073,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
return;
mutex_lock(&ctx->uring_lock);
- while (!list_empty(&ctx->poll_list)) {
+ while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
io_do_iopoll(ctx, &nr_events, 0);
@@ -2290,12 +2290,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* how we do polling eventually, not spinning if we're on potentially
* different devices.
*/
- if (list_empty(&ctx->poll_list)) {
+ if (list_empty(&ctx->iopoll_list)) {
ctx->poll_multi_file = false;
} else if (!ctx->poll_multi_file) {
struct io_kiocb *list_req;
- list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
+ list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
list);
if (list_req->file != req->file)
ctx->poll_multi_file = true;
@@ -2306,9 +2306,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* it to the front so we find it first.
*/
if (READ_ONCE(req->iopoll_completed))
- list_add(&req->list, &ctx->poll_list);
+ list_add(&req->list, &ctx->iopoll_list);
else
- list_add_tail(&req->list, &ctx->poll_list);
+ list_add_tail(&req->list, &ctx->iopoll_list);
if ((ctx->flags & IORING_SETUP_SQPOLL) &&
wq_has_sleeper(&ctx->sqo_wait))
@@ -6306,11 +6306,11 @@ static int io_sq_thread(void *data)
while (!kthread_should_park()) {
unsigned int to_submit;
- if (!list_empty(&ctx->poll_list)) {
+ if (!list_empty(&ctx->iopoll_list)) {
unsigned nr_events = 0;
mutex_lock(&ctx->uring_lock);
- if (!list_empty(&ctx->poll_list) && !need_resched())
+ if (!list_empty(&ctx->iopoll_list) && !need_resched())
io_do_iopoll(ctx, &nr_events, 0);
else
timeout = jiffies + ctx->sq_thread_idle;
@@ -6339,7 +6339,7 @@ static int io_sq_thread(void *data)
* more IO, we should wait for the application to
* reap events and wake us up.
*/
- if (!list_empty(&ctx->poll_list) || need_resched() ||
+ if (!list_empty(&ctx->iopoll_list) || need_resched() ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
io_run_task_work();
@@ -6352,13 +6352,13 @@ static int io_sq_thread(void *data)
/*
* While doing polled IO, before going to sleep, we need
- * to check if there are new reqs added to poll_list, it
- * is because reqs may have been punted to io worker and
- * will be added to poll_list later, hence check the
- * poll_list again.
+ * to check if there are new reqs added to iopoll_list,
+ * it is because reqs may have been punted to io worker
+ * and will be added to iopoll_list later, hence check
+ * the iopoll_list again.
*/
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
- !list_empty_careful(&ctx->poll_list)) {
+ !list_empty_careful(&ctx->iopoll_list)) {
finish_wait(&ctx->sqo_wait, &wait);
continue;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 1/9] io_uring: share completion list w/ per-op space Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 2/9] io_uring: rename ctx->poll into ctx->iopoll Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 4/9] io_uring: use completion list for CQ overflow Pavel Begunkov
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->inflight_entry is used to track requests that grabbed files_struct.
Let's share it with iopoll list, because the only iopoll'ed ops are
reads and writes, which don't need a file table.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 03bb8a69d09c..1ad6d47a6223 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -650,6 +650,10 @@ struct io_kiocb {
struct list_head link_list;
+ /*
+ * 1. used with ctx->iopoll_list with reads/writes
+ * 2. to track reqs with ->files (see io_op_def::file_table)
+ */
struct list_head inflight_entry;
struct percpu_ref *fixed_file_refs;
@@ -1942,8 +1946,8 @@ static void io_iopoll_queue(struct list_head *again)
struct io_kiocb *req;
do {
- req = list_first_entry(again, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(again, struct io_kiocb, inflight_entry);
+ list_del(&req->inflight_entry);
if (!io_rw_reissue(req, -EAGAIN))
io_complete_rw_common(&req->rw.kiocb, -EAGAIN, NULL);
} while (!list_empty(again));
@@ -1966,13 +1970,13 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
while (!list_empty(done)) {
int cflags = 0;
- req = list_first_entry(done, struct io_kiocb, list);
+ req = list_first_entry(done, struct io_kiocb, inflight_entry);
if (READ_ONCE(req->result) == -EAGAIN) {
req->iopoll_completed = 0;
- list_move_tail(&req->list, &again);
+ list_move_tail(&req->inflight_entry, &again);
continue;
}
- list_del(&req->list);
+ list_del(&req->inflight_entry);
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_kbuf(req);
@@ -2008,7 +2012,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
spin = !ctx->poll_multi_file && *nr_events < min;
ret = 0;
- list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, list) {
+ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
struct kiocb *kiocb = &req->rw.kiocb;
/*
@@ -2017,7 +2021,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
* and complete those lists first, if we have entries there.
*/
if (READ_ONCE(req->iopoll_completed)) {
- list_move_tail(&req->list, &done);
+ list_move_tail(&req->inflight_entry, &done);
continue;
}
if (!list_empty(&done))
@@ -2029,7 +2033,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
/* iopoll may have completed current req */
if (READ_ONCE(req->iopoll_completed))
- list_move_tail(&req->list, &done);
+ list_move_tail(&req->inflight_entry, &done);
if (ret && spin)
spin = false;
@@ -2296,7 +2300,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
struct io_kiocb *list_req;
list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
- list);
+ inflight_entry);
if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -2306,9 +2310,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* it to the front so we find it first.
*/
if (READ_ONCE(req->iopoll_completed))
- list_add(&req->list, &ctx->iopoll_list);
+ list_add(&req->inflight_entry, &ctx->iopoll_list);
else
- list_add_tail(&req->list, &ctx->iopoll_list);
+ list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
if ((ctx->flags & IORING_SETUP_SQPOLL) &&
wq_has_sleeper(&ctx->sqo_wait))
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 4/9] io_uring: use completion list for CQ overflow
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (2 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 3/9] io_uring: use inflight_entry list for iopoll'ing Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 5/9] io_uring: add req->timeout.list Pavel Begunkov
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
As with the completion path, also use compl.list for overflowed
requests. If cleaned up properly, nobody needs per-op data there
anymore.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1ad6d47a6223..584ff83cf0a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1338,8 +1338,8 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
break;
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
- list);
- list_move(&req->list, &list);
+ compl.list);
+ list_move(&req->compl.list, &list);
req->flags &= ~REQ_F_OVERFLOW;
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
@@ -1361,8 +1361,8 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
io_cqring_ev_posted(ctx);
while (!list_empty(&list)) {
- req = list_first_entry(&list, struct io_kiocb, list);
- list_del(&req->list);
+ req = list_first_entry(&list, struct io_kiocb, compl.list);
+ list_del(&req->compl.list);
io_put_req(req);
}
@@ -1395,11 +1395,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
set_bit(0, &ctx->cq_check_overflow);
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
}
+ io_clean_op(req);
req->flags |= REQ_F_OVERFLOW;
- refcount_inc(&req->refs);
req->result = res;
req->cflags = cflags;
- list_add_tail(&req->list, &ctx->cq_overflow_list);
+ refcount_inc(&req->refs);
+ list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
}
}
@@ -7812,7 +7813,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (cancel_req->flags & REQ_F_OVERFLOW) {
spin_lock_irq(&ctx->completion_lock);
- list_del(&cancel_req->list);
+ list_del(&cancel_req->compl.list);
cancel_req->flags &= ~REQ_F_OVERFLOW;
if (list_empty(&ctx->cq_overflow_list)) {
clear_bit(0, &ctx->sq_check_overflow);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 5/9] io_uring: add req->timeout.list
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (3 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 4/9] io_uring: use completion list for CQ overflow Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 6/9] io_uring: remove init for unused list Pavel Begunkov
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
Instead of using shared req->list, hang timeouts up on their own list
entry. struct io_timeout have enough extra space for it, but if that
will be a problem ->inflight_entry can reused for that.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 584ff83cf0a4..b9bd2002db98 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -396,6 +396,7 @@ struct io_timeout {
int flags;
u32 off;
u32 target_seq;
+ struct list_head list;
};
struct io_rw {
@@ -1212,7 +1213,7 @@ static void io_kill_timeout(struct io_kiocb *req)
ret = hrtimer_try_to_cancel(&req->io->timeout.timer);
if (ret != -1) {
atomic_inc(&req->ctx->cq_timeouts);
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0);
io_put_req(req);
@@ -1224,7 +1225,7 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
struct io_kiocb *req, *tmp;
spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list)
io_kill_timeout(req);
spin_unlock_irq(&ctx->completion_lock);
}
@@ -1247,7 +1248,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
{
while (!list_empty(&ctx->timeout_list)) {
struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
- struct io_kiocb, list);
+ struct io_kiocb, timeout.list);
if (io_is_timeout_noseq(req))
break;
@@ -1255,7 +1256,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
- atomic_read(&ctx->cq_timeouts))
break;
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
io_kill_timeout(req);
}
}
@@ -4980,8 +4981,8 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
* We could be racing with timeout deletion. If the list is empty,
* then timeout lookup already found it and will be handling it.
*/
- if (!list_empty(&req->list))
- list_del_init(&req->list);
+ if (!list_empty(&req->timeout.list))
+ list_del_init(&req->timeout.list);
io_cqring_fill_event(req, -ETIME);
io_commit_cqring(ctx);
@@ -4998,9 +4999,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
struct io_kiocb *req;
int ret = -ENOENT;
- list_for_each_entry(req, &ctx->timeout_list, list) {
+ list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
if (user_data == req->user_data) {
- list_del_init(&req->list);
+ list_del_init(&req->timeout.list);
ret = 0;
break;
}
@@ -5120,7 +5121,8 @@ static int io_timeout(struct io_kiocb *req)
* the one we need first.
*/
list_for_each_prev(entry, &ctx->timeout_list) {
- struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
+ timeout.list);
if (io_is_timeout_noseq(nxt))
continue;
@@ -5129,7 +5131,7 @@ static int io_timeout(struct io_kiocb *req)
break;
}
add:
- list_add(&req->list, entry);
+ list_add(&req->timeout.list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
spin_unlock_irq(&ctx->completion_lock);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 6/9] io_uring: remove init for unused list
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (4 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 5/9] io_uring: add req->timeout.list Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 7/9] io_uring: use non-intrusive list for defer Pavel Begunkov
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
poll*() doesn't use req->list, don't init it.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b9bd2002db98..b789edbe4339 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4947,7 +4947,6 @@ static int io_poll_add(struct io_kiocb *req)
__poll_t mask;
INIT_HLIST_NODE(&req->hash_node);
- INIT_LIST_HEAD(&req->list);
ipt.pt._qproc = io_poll_queue_proc;
mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 7/9] io_uring: use non-intrusive list for defer
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (5 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 6/9] io_uring: remove init for unused list Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 8/9] io_uring: remove sequence from io_kiocb Pavel Begunkov
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
The only left user of req->list is DRAIN, hence instead of keeping a
separate per request list for it, do that with old fashion non-intrusive
lists allocated on demand. That's a really slow path, so that's OK.
This removes req->list and so sheds 16 bytes from io_kiocb.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b789edbe4339..672eb57565dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -640,7 +640,6 @@ struct io_kiocb {
u16 buf_index;
struct io_ring_ctx *ctx;
- struct list_head list;
unsigned int flags;
refcount_t refs;
struct task_struct *task;
@@ -675,6 +674,11 @@ struct io_kiocb {
struct callback_head task_work;
};
+struct io_defer_entry {
+ struct list_head list;
+ struct io_kiocb *req;
+};
+
#define IO_IOPOLL_BATCH 8
struct io_comp_state {
@@ -1233,14 +1237,15 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
do {
- struct io_kiocb *req = list_first_entry(&ctx->defer_list,
- struct io_kiocb, list);
+ struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
+ struct io_defer_entry, list);
- if (req_need_defer(req))
+ if (req_need_defer(de->req))
break;
- list_del_init(&req->list);
+ list_del_init(&de->list);
/* punt-init is done before queueing for defer */
- __io_queue_async_work(req);
+ __io_queue_async_work(de->req);
+ kfree(de);
} while (!list_empty(&ctx->defer_list));
}
@@ -5372,6 +5377,7 @@ static int io_req_defer_prep(struct io_kiocb *req,
static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
+ struct io_defer_entry *de;
int ret;
/* Still need defer if there is pending req in defer list. */
@@ -5386,15 +5392,20 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
io_prep_async_link(req);
+ de = kmalloc(sizeof(*de), GFP_KERNEL);
+ if (!de)
+ return -ENOMEM;
spin_lock_irq(&ctx->completion_lock);
if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
spin_unlock_irq(&ctx->completion_lock);
+ kfree(de);
return 0;
}
trace_io_uring_defer(ctx, req, req->user_data);
- list_add_tail(&req->list, &ctx->defer_list);
+ de->req = req;
+ list_add_tail(&de->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
return -EIOCBQUEUED;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 8/9] io_uring: remove sequence from io_kiocb
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (6 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 7/9] io_uring: use non-intrusive list for defer Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 20:37 ` [PATCH v2 9/9] io_uring: place cflags into completion data Pavel Begunkov
2020-07-13 21:10 ` [PATCH v2 0/9] scrap 24 bytes from io_kiocb Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->sequence is used only for deferred (i.e. DRAIN) requests, but
initialised for every request. Remove req->sequence from io_kiocb
together with its initialisation in io_init_req().
Replace it with a new field in struct io_defer_entry, that will be
calculated only when needed in io_req_defer(), which is a slow path.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 672eb57565dc..e70129fac6db 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -638,6 +638,7 @@ struct io_kiocb {
u8 iopoll_completed;
u16 buf_index;
+ u32 result;
struct io_ring_ctx *ctx;
unsigned int flags;
@@ -645,8 +646,6 @@ struct io_kiocb {
struct task_struct *task;
unsigned long fsize;
u64 user_data;
- u32 result;
- u32 sequence;
struct list_head link_list;
@@ -677,6 +676,7 @@ struct io_kiocb {
struct io_defer_entry {
struct list_head list;
struct io_kiocb *req;
+ u32 seq;
};
#define IO_IOPOLL_BATCH 8
@@ -1089,13 +1089,13 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
return NULL;
}
-static inline bool req_need_defer(struct io_kiocb *req)
+static bool req_need_defer(struct io_kiocb *req, u32 seq)
{
if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
struct io_ring_ctx *ctx = req->ctx;
- return req->sequence != ctx->cached_cq_tail
- + atomic_read(&ctx->cached_cq_overflow);
+ return seq != ctx->cached_cq_tail
+ + atomic_read(&ctx->cached_cq_overflow);
}
return false;
@@ -1240,7 +1240,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
- if (req_need_defer(de->req))
+ if (req_need_defer(de->req, de->seq))
break;
list_del_init(&de->list);
/* punt-init is done before queueing for defer */
@@ -5374,14 +5374,35 @@ static int io_req_defer_prep(struct io_kiocb *req,
return ret;
}
+static u32 io_get_sequence(struct io_kiocb *req)
+{
+ struct io_kiocb *pos;
+ struct io_ring_ctx *ctx = req->ctx;
+ u32 total_submitted, nr_reqs = 1;
+
+ if (req->flags & REQ_F_LINK_HEAD)
+ list_for_each_entry(pos, &req->link_list, link_list)
+ nr_reqs++;
+
+ total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
+ return total_submitted - nr_reqs;
+}
+
static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_defer_entry *de;
int ret;
+ u32 seq;
/* Still need defer if there is pending req in defer list. */
- if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list))
+ if (likely(list_empty_careful(&ctx->defer_list) &&
+ !(req->flags & REQ_F_IO_DRAIN)))
+ return 0;
+
+ seq = io_get_sequence(req);
+ /* Still a chance to pass the sequence check */
+ if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
return 0;
if (!req->io) {
@@ -5397,7 +5418,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -ENOMEM;
spin_lock_irq(&ctx->completion_lock);
- if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
+ if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
spin_unlock_irq(&ctx->completion_lock);
kfree(de);
return 0;
@@ -5405,6 +5426,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
trace_io_uring_defer(ctx, req, req->user_data);
de->req = req;
+ de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
return -EIOCBQUEUED;
@@ -6181,12 +6203,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
unsigned int sqe_flags;
int id;
- /*
- * All io need record the previous position, if LINK vs DARIN,
- * it can be used to mark the position of the first IO in the
- * link list.
- */
- req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped;
req->opcode = READ_ONCE(sqe->opcode);
req->user_data = READ_ONCE(sqe->user_data);
req->io = NULL;
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 9/9] io_uring: place cflags into completion data
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (7 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 8/9] io_uring: remove sequence from io_kiocb Pavel Begunkov
@ 2020-07-13 20:37 ` Pavel Begunkov
2020-07-13 21:10 ` [PATCH v2 0/9] scrap 24 bytes from io_kiocb Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-07-13 20:37 UTC (permalink / raw)
To: Jens Axboe, io-uring
req->cflags is used only for defer-completion path, just use
completion data to store it. With the 4 bytes from the ->sequence
patch and compacting io_kiocb, this frees 8 bytes.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e70129fac6db..7038c4f08805 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -491,6 +491,7 @@ struct io_statx {
struct io_completion {
struct file *file;
struct list_head list;
+ int cflags;
};
struct io_async_connect {
@@ -632,7 +633,6 @@ struct io_kiocb {
};
struct io_async_ctx *io;
- int cflags;
u8 opcode;
/* polled IO has completed */
u8 iopoll_completed;
@@ -1350,7 +1350,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (cqe) {
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, req->result);
- WRITE_ONCE(cqe->flags, req->cflags);
+ WRITE_ONCE(cqe->flags, req->compl.cflags);
} else {
WRITE_ONCE(ctx->rings->cq_overflow,
atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1404,7 +1404,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
io_clean_op(req);
req->flags |= REQ_F_OVERFLOW;
req->result = res;
- req->cflags = cflags;
+ req->compl.cflags = cflags;
refcount_inc(&req->refs);
list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
}
@@ -1438,7 +1438,7 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
list_del(&req->compl.list);
- __io_cqring_fill_event(req, req->result, req->cflags);
+ __io_cqring_fill_event(req, req->result, req->compl.cflags);
if (!(req->flags & REQ_F_LINK_HEAD)) {
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
@@ -1464,7 +1464,7 @@ static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
} else {
io_clean_op(req);
req->result = res;
- req->cflags = cflags;
+ req->compl.cflags = cflags;
list_add_tail(&req->compl.list, &cs->list);
if (++cs->nr >= 32)
io_submit_flush_completions(cs);
--
2.24.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 0/9] scrap 24 bytes from io_kiocb
2020-07-13 20:37 [PATCH v2 0/9] scrap 24 bytes from io_kiocb Pavel Begunkov
` (8 preceding siblings ...)
2020-07-13 20:37 ` [PATCH v2 9/9] io_uring: place cflags into completion data Pavel Begunkov
@ 2020-07-13 21:10 ` Jens Axboe
9 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2020-07-13 21:10 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 2:37 PM, Pavel Begunkov wrote:
> Make io_kiocb slimmer by 24 bytes mainly by revising lists usage. The
> drawback is adding extra kmalloc in draining path, but that's a slow
> path, so meh. It also frees some space for the deferred completion path
> if would be needed in the future, but the main idea here is to shrink it
> to 3 cachelines in the end.
>
> This depends on "rw iovec copy cleanup" series
This looks really good to me, and also tests out good - both in terms
of functionality and KASAN, but also increases performance slightly
as expected. Applied, thanks!
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread