* [PATCH 01/14] io_uring: optimise io_rw_reissue()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 02/14] io_uring: refactor io_resubmit_prep() Pavel Begunkov
` (13 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
The hot path is IO completing on the first try. Reshuffle io_rw_reissue() so
it's checked first.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4bcabc51fc5a..7f4bc5092b5c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2713,12 +2713,13 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
static bool io_rw_reissue(struct io_kiocb *req, long res)
{
#ifdef CONFIG_BLOCK
- umode_t mode = file_inode(req->file)->i_mode;
+ umode_t mode;
int ret;
- if (!S_ISBLK(mode) && !S_ISREG(mode))
+ if (res != -EAGAIN && res != -EOPNOTSUPP)
return false;
- if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
+ mode = file_inode(req->file)->i_mode;
+ if ((!S_ISBLK(mode) && !S_ISREG(mode)) || io_wq_current_is_worker())
return false;
lockdep_assert_held(&req->ctx->uring_lock);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 02/14] io_uring: refactor io_resubmit_prep()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
2021-01-19 13:32 ` [PATCH 01/14] io_uring: optimise io_rw_reissue() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 03/14] io_uring: cleanup personalities under uring_lock Pavel Begunkov
` (12 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
It's awkward to pass return a value into a function for it to return it
back. Check it at the caller site and clean up io_resubmit_prep() a bit.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 33 +++++++++++++--------------------
1 file changed, 13 insertions(+), 20 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7f4bc5092b5c..eb8bee704374 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2664,17 +2664,16 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res,
}
#ifdef CONFIG_BLOCK
-static bool io_resubmit_prep(struct io_kiocb *req, int error)
+static bool io_resubmit_prep(struct io_kiocb *req)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
ssize_t ret = -ECANCELED;
struct iov_iter iter;
int rw;
- if (error) {
- ret = error;
- goto end_req;
- }
+ /* already prepared */
+ if (req->async_data)
+ return true;
switch (req->opcode) {
case IORING_OP_READV:
@@ -2690,22 +2689,16 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
default:
printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
req->opcode);
- goto end_req;
+ return false;
}
- if (!req->async_data) {
- ret = io_import_iovec(rw, req, &iovec, &iter, false);
- if (ret < 0)
- goto end_req;
- ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
- if (!ret)
- return true;
- kfree(iovec);
- } else {
+ ret = io_import_iovec(rw, req, &iovec, &iter, false);
+ if (ret < 0)
+ return false;
+ ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
+ if (!ret)
return true;
- }
-end_req:
- req_set_fail_links(req);
+ kfree(iovec);
return false;
}
#endif
@@ -2726,12 +2719,12 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
ret = io_sq_thread_acquire_mm_files(req->ctx, req);
- if (io_resubmit_prep(req, ret)) {
+ if (!ret && io_resubmit_prep(req)) {
refcount_inc(&req->refs);
io_queue_async_work(req);
return true;
}
-
+ req_set_fail_links(req);
#endif
return false;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 03/14] io_uring: cleanup personalities under uring_lock
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
2021-01-19 13:32 ` [PATCH 01/14] io_uring: optimise io_rw_reissue() Pavel Begunkov
2021-01-19 13:32 ` [PATCH 02/14] io_uring: refactor io_resubmit_prep() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 04/14] io_uring: inline io_async_submit() Pavel Begunkov
` (11 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
personality_idr is usually synchronised by uring_lock, the exception
would be removing personalities in io_ring_ctx_wait_and_kill(), which
is legit as refs are killed by that point but still would be more
resilient to do it under the lock.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index eb8bee704374..88786b649ade 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8749,6 +8749,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
ctx->cq_overflow_flushed = 1;
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true, NULL, NULL);
+ idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
mutex_unlock(&ctx->uring_lock);
io_kill_timeouts(ctx, NULL, NULL);
@@ -8759,7 +8760,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
/* if we failed setting up the ctx, we might not have any rings */
io_iopoll_try_reap_events(ctx);
- idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
/*
* Do this upfront, so we won't have a grace period where the ring
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 04/14] io_uring: inline io_async_submit()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (2 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 03/14] io_uring: cleanup personalities under uring_lock Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 05/14] io_uring: inline __io_commit_cqring() Pavel Begunkov
` (10 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
The name is confusing and it's used only in one place.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 88786b649ade..36be2b2e0570 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1257,11 +1257,6 @@ static inline void io_req_init_async(struct io_kiocb *req)
refcount_inc(&req->work.identity->count);
}
-static inline bool io_async_submit(struct io_ring_ctx *ctx)
-{
- return ctx->flags & IORING_SETUP_SQPOLL;
-}
-
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -6923,7 +6918,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
}
trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
- true, io_async_submit(ctx));
+ true, ctx->flags & IORING_SETUP_SQPOLL);
err = io_submit_sqe(req, sqe, &link, &state.comp);
if (err)
goto fail_req;
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 05/14] io_uring: inline __io_commit_cqring()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (3 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 04/14] io_uring: inline io_async_submit() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 06/14] io_uring: further deduplicate #CQ events calc Pavel Begunkov
` (9 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
Inline it in its only user, that's cleaner
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 36be2b2e0570..5dfda399eb80 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1341,14 +1341,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
-static void __io_commit_cqring(struct io_ring_ctx *ctx)
-{
- struct io_rings *rings = ctx->rings;
-
- /* order cqe stores with ring update */
- smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
-}
-
static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
{
if (req->work.identity == &tctx->__identity)
@@ -1672,7 +1664,9 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
static void io_commit_cqring(struct io_ring_ctx *ctx)
{
io_flush_timeouts(ctx);
- __io_commit_cqring(ctx);
+
+ /* order cqe stores with ring update */
+ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
if (unlikely(!list_empty(&ctx->defer_list)))
__io_queue_deferred(ctx);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 06/14] io_uring: further deduplicate #CQ events calc
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (4 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 05/14] io_uring: inline __io_commit_cqring() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 07/14] io_uring: simplify io_alloc_req() Pavel Begunkov
` (8 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
Apparently, there is one more place hand coded calculation of number of
CQ events in the ring. Use __io_cqring_events() helper in
io_get_cqring() as well. Naturally, assembly stays identical.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dfda399eb80..b05d0b94e334 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1679,21 +1679,25 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
}
+static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
+{
+ return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
+}
+
static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
unsigned tail;
- tail = ctx->cached_cq_tail;
/*
* writes to the cq entry need to come after reading head; the
* control dependency is enough as we're using WRITE_ONCE to
* fill the cq entry
*/
- if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)
+ if (__io_cqring_events(ctx) == rings->cq_ring_entries)
return NULL;
- ctx->cached_cq_tail++;
+ tail = ctx->cached_cq_tail++;
return &rings->cqes[tail & ctx->cq_mask];
}
@@ -1708,11 +1712,6 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
return io_wq_current_is_worker();
}
-static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx)
-{
- return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
-}
-
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
/* see waitqueue_active() comment */
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 07/14] io_uring: simplify io_alloc_req()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (5 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 06/14] io_uring: further deduplicate #CQ events calc Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 08/14] io_uring: remove __io_state_file_put Pavel Begunkov
` (7 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
Get rid of a label in io_alloc_req(), it's cleaner to do return
directly.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b05d0b94e334..d1ced93c1ea3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1963,7 +1963,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
if (unlikely(ret <= 0)) {
state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
if (!state->reqs[0])
- goto fallback;
+ return io_get_fallback_req(ctx);
ret = 1;
}
state->free_reqs = ret;
@@ -1971,8 +1971,6 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
state->free_reqs--;
return state->reqs[state->free_reqs];
-fallback:
- return io_get_fallback_req(ctx);
}
static inline void io_put_file(struct io_kiocb *req, struct file *file,
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 08/14] io_uring: remove __io_state_file_put
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (6 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 07/14] io_uring: simplify io_alloc_req() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 09/14] io_uring: deduplicate failing task_work_add Pavel Begunkov
` (6 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
The check in io_state_file_put() is optimised pretty well when called
from __io_file_get(). Don't pollute the code with all these variants.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d1ced93c1ea3..93c14bc970d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2790,16 +2790,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
wake_up(&ctx->sq_data->wait);
}
-static inline void __io_state_file_put(struct io_submit_state *state)
-{
- fput_many(state->file, state->file_refs);
- state->file_refs = 0;
-}
-
static inline void io_state_file_put(struct io_submit_state *state)
{
- if (state->file_refs)
- __io_state_file_put(state);
+ if (state->file_refs) {
+ fput_many(state->file, state->file_refs);
+ state->file_refs = 0;
+ }
}
/*
@@ -2817,7 +2813,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
state->file_refs--;
return state->file;
}
- __io_state_file_put(state);
+ io_state_file_put(state);
}
state->file = fget_many(fd, state->ios_left);
if (unlikely(!state->file))
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 09/14] io_uring: deduplicate failing task_work_add
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (7 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 08/14] io_uring: remove __io_state_file_put Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 10/14] io_uring: don't block resource recycle by oveflows Pavel Begunkov
` (5 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
When io_req_task_work_add() fails, the request will be cancelled by
enqueueing via task_works of io-wq. Extract a function for that.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 46 +++++++++++++++++-----------------------------
1 file changed, 17 insertions(+), 29 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 93c14bc970d3..c895e42201c8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2146,6 +2146,16 @@ static int io_req_task_work_add(struct io_kiocb *req)
return ret;
}
+static void io_req_task_work_add_fallback(struct io_kiocb *req,
+ void (*cb)(struct callback_head *))
+{
+ struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
+
+ init_task_work(&req->task_work, cb);
+ task_work_add(tsk, &req->task_work, TWA_NONE);
+ wake_up_process(tsk);
+}
+
static void __io_req_task_cancel(struct io_kiocb *req, int error)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -2200,14 +2210,8 @@ static void io_req_task_queue(struct io_kiocb *req)
percpu_ref_get(&req->ctx->refs);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- init_task_work(&req->task_work, io_req_task_cancel);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
}
static inline void io_queue_next(struct io_kiocb *req)
@@ -2325,13 +2329,8 @@ static void io_free_req_deferred(struct io_kiocb *req)
init_task_work(&req->task_work, io_put_req_deferred_cb);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
}
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
@@ -3400,15 +3399,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
/* submit ref gets dropped, acquire a new one */
refcount_inc(&req->refs);
ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- struct task_struct *tsk;
-
- /* queue just for cancelation */
- init_task_work(&req->task_work, io_req_task_cancel);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
- }
+ if (unlikely(ret))
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
return 1;
}
@@ -5119,12 +5111,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
*/
ret = io_req_task_work_add(req);
if (unlikely(ret)) {
- struct task_struct *tsk;
-
WRITE_ONCE(poll->canceled, true);
- tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
+ io_req_task_work_add_fallback(req, func);
}
return 1;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 10/14] io_uring: don't block resource recycle by oveflows
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (8 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 09/14] io_uring: deduplicate failing task_work_add Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 22:59 ` Jens Axboe
2021-01-19 13:32 ` [PATCH 11/14] io_uring: add a helper timeout mode calculation Pavel Begunkov
` (4 subsequent siblings)
14 siblings, 1 reply; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
We don't want resource recycling (e.g. pre-registered files fput) to be
delayed by overflowed requests. Drop fixed_file_refs before putting into
overflow lists.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c895e42201c8..9720e38b5b97 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1851,6 +1851,13 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
io_clean_op(req);
req->result = res;
req->compl.cflags = cflags;
+
+ /* Don't block resource recycling by overflowed requests */
+ if (req->fixed_file_refs) {
+ percpu_ref_put(req->fixed_file_refs);
+ req->fixed_file_refs = NULL;
+ }
+
refcount_inc(&req->refs);
list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 10/14] io_uring: don't block resource recycle by oveflows
2021-01-19 13:32 ` [PATCH 10/14] io_uring: don't block resource recycle by oveflows Pavel Begunkov
@ 2021-01-19 22:59 ` Jens Axboe
2021-01-19 23:08 ` Pavel Begunkov
0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2021-01-19 22:59 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 1/19/21 6:32 AM, Pavel Begunkov wrote:
> We don't want resource recycling (e.g. pre-registered files fput) to be
> delayed by overflowed requests. Drop fixed_file_refs before putting into
> overflow lists.
I am applying this on top of the first 9 of the Bijan series, and
hence this one is borken now. I've applied the rest, please resend
if necessary
--
Jens Axboe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 10/14] io_uring: don't block resource recycle by oveflows
2021-01-19 22:59 ` Jens Axboe
@ 2021-01-19 23:08 ` Pavel Begunkov
0 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 23:08 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 19/01/2021 22:59, Jens Axboe wrote:
> On 1/19/21 6:32 AM, Pavel Begunkov wrote:
>> We don't want resource recycling (e.g. pre-registered files fput) to be
>> delayed by overflowed requests. Drop fixed_file_refs before putting into
>> overflow lists.
>
> I am applying this on top of the first 9 of the Bijan series, and
> hence this one is borken now. I've applied the rest, please resend
> if necessary
Ah, indeed, forgot that it would be. Thanks!
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 11/14] io_uring: add a helper timeout mode calculation
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (9 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 10/14] io_uring: don't block resource recycle by oveflows Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 12/14] io_uring: help inlining of io_req_complete() Pavel Begunkov
` (3 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
Deduplicates translation of timeout flags into hrtimer_mode.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9720e38b5b97..a004102fbbde 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5744,6 +5744,12 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
return 0;
}
+static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
+{
+ return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
+ : HRTIMER_MODE_REL;
+}
+
/*
* Remove or update an existing timeout command
*/
@@ -5754,14 +5760,11 @@ static int io_timeout_remove(struct io_kiocb *req)
int ret;
spin_lock_irq(&ctx->completion_lock);
- if (req->timeout_rem.flags & IORING_TIMEOUT_UPDATE) {
- enum hrtimer_mode mode = (tr->flags & IORING_TIMEOUT_ABS)
- ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
-
- ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
- } else {
+ if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE))
ret = io_timeout_cancel(ctx, tr->addr);
- }
+ else
+ ret = io_timeout_update(ctx, tr->addr, &tr->ts,
+ io_translate_timeout_mode(tr->flags));
io_cqring_fill_event(req, ret);
io_commit_cqring(ctx);
@@ -5801,11 +5804,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
- if (flags & IORING_TIMEOUT_ABS)
- data->mode = HRTIMER_MODE_ABS;
- else
- data->mode = HRTIMER_MODE_REL;
-
+ data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
return 0;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 12/14] io_uring: help inlining of io_req_complete()
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (10 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 11/14] io_uring: add a helper timeout mode calculation Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 13/14] io_uring: don't flush CQEs deep down the stack Pavel Begunkov
` (2 subsequent siblings)
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
__io_req_complete() inlining is a bit weird, some compilers don't
optimise out the non-NULL branch of it even when called as
io_req_complete(). Help it a bit by extracting state and stateless
helpers out of __io_req_complete().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a004102fbbde..b0f54f4495c7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1868,7 +1868,8 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
__io_cqring_fill_event(req, res, 0);
}
-static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
+static void io_req_complete_nostate(struct io_kiocb *req, long res,
+ unsigned int cflags)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
@@ -1879,6 +1880,7 @@ static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
+ io_put_req(req);
}
static void io_submit_flush_completions(struct io_comp_state *cs)
@@ -1914,23 +1916,27 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
cs->nr = 0;
}
-static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
- struct io_comp_state *cs)
+static void io_req_complete_state(struct io_kiocb *req, long res,
+ unsigned int cflags, struct io_comp_state *cs)
{
- if (!cs) {
- io_cqring_add_event(req, res, cflags);
- io_put_req(req);
- } else {
- io_clean_op(req);
- req->result = res;
- req->compl.cflags = cflags;
- list_add_tail(&req->compl.list, &cs->list);
- if (++cs->nr >= 32)
- io_submit_flush_completions(cs);
- }
+ io_clean_op(req);
+ req->result = res;
+ req->compl.cflags = cflags;
+ list_add_tail(&req->compl.list, &cs->list);
+ if (++cs->nr >= 32)
+ io_submit_flush_completions(cs);
+}
+
+static inline void __io_req_complete(struct io_kiocb *req, long res,
+ unsigned cflags, struct io_comp_state *cs)
+{
+ if (!cs)
+ io_req_complete_nostate(req, res, cflags);
+ else
+ io_req_complete_state(req, res, cflags, cs);
}
-static void io_req_complete(struct io_kiocb *req, long res)
+static inline void io_req_complete(struct io_kiocb *req, long res)
{
__io_req_complete(req, res, 0, NULL);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 13/14] io_uring: don't flush CQEs deep down the stack
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (11 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 12/14] io_uring: help inlining of io_req_complete() Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 13:32 ` [PATCH 14/14] io_uring: save atomic dec for inline executed reqs Pavel Begunkov
2021-01-19 23:01 ` [PATCH for-next 00/14] mostly cleanups for 5.12 Jens Axboe
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_submit_flush_completions() is called down the stack in the _state
version of io_req_complete(), that's ok because is only called by
io_uring opcode handler functions directly. Move it up to
__io_queue_sqe() as preparation.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b0f54f4495c7..1e46d471aa76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1923,8 +1923,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res,
req->result = res;
req->compl.cflags = cflags;
list_add_tail(&req->compl.list, &cs->list);
- if (++cs->nr >= 32)
- io_submit_flush_completions(cs);
+ cs->nr++;
}
static inline void __io_req_complete(struct io_kiocb *req, long res,
@@ -6538,7 +6537,15 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
io_queue_linked_timeout(linked_timeout);
} else if (likely(!ret)) {
/* drop submission reference */
- req = io_put_req_find_next(req);
+ if (cs) {
+ io_put_req(req);
+ if (cs->nr >= 32)
+ io_submit_flush_completions(cs);
+ req = NULL;
+ } else {
+ req = io_put_req_find_next(req);
+ }
+
if (linked_timeout)
io_queue_linked_timeout(linked_timeout);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 14/14] io_uring: save atomic dec for inline executed reqs
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (12 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 13/14] io_uring: don't flush CQEs deep down the stack Pavel Begunkov
@ 2021-01-19 13:32 ` Pavel Begunkov
2021-01-19 23:01 ` [PATCH for-next 00/14] mostly cleanups for 5.12 Jens Axboe
14 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-01-19 13:32 UTC (permalink / raw)
To: Jens Axboe, io-uring
When a request is completed with comp_state, its completion reference
put is deferred to io_submit_flush_completions(), but the submission
is put not far from there, so do it together to save one atomic dec per
request. That targets requests that complete inline, e.g. buffered rw,
send/recv.
Proper benchmarking haven't been conducted but for nops(batch=32) it was
around 7901 vs 8117 KIOPS (~2.7%), or ~4% per perf profiling.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1e46d471aa76..fb4e2a97e4f3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -618,6 +618,7 @@ enum {
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_WORK_INITIALIZED_BIT,
REQ_F_LTIMEOUT_ACTIVE_BIT,
+ REQ_F_COMPLETE_INLINE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -661,6 +662,8 @@ enum {
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
/* linked timeout is active, i.e. prepared by link's head */
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
+ /* completion is deferred through io_comp_state */
+ REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
};
struct async_poll {
@@ -1899,14 +1902,15 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
* io_free_req() doesn't care about completion_lock unless one
* of these flags is set. REQ_F_WORK_INITIALIZED is in the list
* because of a potential deadlock with req->work.fs->lock
+ * We defer both, completion and submission refs.
*/
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|REQ_F_WORK_INITIALIZED)) {
spin_unlock_irq(&ctx->completion_lock);
- io_put_req(req);
+ io_double_put_req(req);
spin_lock_irq(&ctx->completion_lock);
} else {
- io_put_req(req);
+ io_double_put_req(req);
}
}
io_commit_cqring(ctx);
@@ -1922,8 +1926,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res,
io_clean_op(req);
req->result = res;
req->compl.cflags = cflags;
- list_add_tail(&req->compl.list, &cs->list);
- cs->nr++;
+ req->flags |= REQ_F_COMPLETE_INLINE;
}
static inline void __io_req_complete(struct io_kiocb *req, long res,
@@ -6537,9 +6540,9 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
io_queue_linked_timeout(linked_timeout);
} else if (likely(!ret)) {
/* drop submission reference */
- if (cs) {
- io_put_req(req);
- if (cs->nr >= 32)
+ if (req->flags & REQ_F_COMPLETE_INLINE) {
+ list_add_tail(&req->compl.list, &cs->list);
+ if (++cs->nr >= 32)
io_submit_flush_completions(cs);
req = NULL;
} else {
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH for-next 00/14] mostly cleanups for 5.12
2021-01-19 13:32 [PATCH for-next 00/14] mostly cleanups for 5.12 Pavel Begunkov
` (13 preceding siblings ...)
2021-01-19 13:32 ` [PATCH 14/14] io_uring: save atomic dec for inline executed reqs Pavel Begunkov
@ 2021-01-19 23:01 ` Jens Axboe
14 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2021-01-19 23:01 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 1/19/21 6:32 AM, Pavel Begunkov wrote:
> Easy patches that should not conflict with other stuff, actually based
> on 5.11 because it has more essential changes, but I'll rebase if
> doesn't apply after rc4 and 5.12 rebase/merge.
>
> 1-11 are easy mostly cleanups, and 12-14 are optimisations that
> may end up to be preps.
Applied except 10/14 as mentioned, thanks!
--
Jens Axboe
^ permalink raw reply [flat|nested] 18+ messages in thread