* [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups
@ 2021-02-04 13:51 Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 01/13] io_uring: deduplicate core cancellations sequence Pavel Begunkov
` (13 more replies)
0 siblings, 14 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:51 UTC (permalink / raw)
To: Jens Axboe, io-uring
Bunch of random cleanups, noticeable part of which (4-9/13) refactor
io_read(), which is currently not in the best shape and is hard to
understand.
7/13 may actually fix a problem.
10/13 should make NOWAIT and NONBLOCK to work right
v2: add 9-13/13
Pavel Begunkov (13):
io_uring: deduplicate core cancellations sequence
io_uring: refactor scheduling in io_cqring_wait
io_uring: refactor io_cqring_wait
io_uring: refactor io_read for unsupported nowait
io_uring: further simplify do_read error parsing
io_uring: let io_setup_async_rw take care of iovec
io_uring: don't forget to adjust io_size
io_uring: inline io_read()'s iovec freeing
io_uring: highlight read-retry loop
io_uring: treat NONBLOCK and RWF_NOWAIT similarly
io_uring: io_import_iovec return type cleanup
io_uring: deduplicate file table slot calculation
io_uring/io-wq: return 2-step work swap scheme
fs/io-wq.c | 16 +--
fs/io-wq.h | 4 +-
fs/io_uring.c | 366 ++++++++++++++++++++++----------------------------
3 files changed, 166 insertions(+), 220 deletions(-)
--
2.24.0
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 01/13] io_uring: deduplicate core cancellations sequence
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
@ 2021-02-04 13:51 ` Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 02/13] io_uring: refactor scheduling in io_cqring_wait Pavel Begunkov
` (12 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:51 UTC (permalink / raw)
To: Jens Axboe, io-uring
Files and task cancellations go over same steps trying to cancel
requests in io-wq, poll, etc. Deduplicate it with a helper.
note: new io_uring_try_cancel_requests() is former
__io_uring_cancel_task_requests() with files passed as an agrument and
flushing overflowed requests.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 85 ++++++++++++++++++++++++---------------------------
1 file changed, 40 insertions(+), 45 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 24ad36d71289..a750c504366d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1003,9 +1003,9 @@ enum io_mem_account {
ACCT_PINNED,
};
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
- struct task_struct *task);
-
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files);
static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node);
static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
struct io_ring_ctx *ctx);
@@ -8817,7 +8817,7 @@ static void io_ring_exit_work(struct work_struct *work)
* as nobody else will be looking for them.
*/
do {
- __io_uring_cancel_task_requests(ctx, NULL);
+ io_uring_try_cancel_requests(ctx, NULL, NULL);
} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
io_ring_ctx_free(ctx);
}
@@ -8931,6 +8931,40 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
}
}
+static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files)
+{
+ struct io_task_cancel cancel = { .task = task, .files = files, };
+
+ while (1) {
+ enum io_wq_cancel cret;
+ bool ret = false;
+
+ if (ctx->io_wq) {
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+ &cancel, true);
+ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+ }
+
+ /* SQPOLL thread does its own polling */
+ if (!(ctx->flags & IORING_SETUP_SQPOLL) && !files) {
+ while (!list_empty_careful(&ctx->iopoll_list)) {
+ io_iopoll_try_reap_events(ctx);
+ ret = true;
+ }
+ }
+
+ ret |= io_poll_remove_all(ctx, task, files);
+ ret |= io_kill_timeouts(ctx, task, files);
+ ret |= io_run_task_work();
+ io_cqring_overflow_flush(ctx, true, task, files);
+ if (!ret)
+ break;
+ cond_resched();
+ }
+}
+
static int io_uring_count_inflight(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files)
@@ -8950,7 +8984,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
while (!list_empty_careful(&ctx->inflight_list)) {
- struct io_task_cancel cancel = { .task = task, .files = files };
DEFINE_WAIT(wait);
int inflight;
@@ -8958,13 +8991,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (!inflight)
break;
- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
- io_poll_remove_all(ctx, task, files);
- io_kill_timeouts(ctx, task, files);
- io_cqring_overflow_flush(ctx, true, task, files);
- /* cancellations _may_ trigger task work */
- io_run_task_work();
-
+ io_uring_try_cancel_requests(ctx, task, files);
prepare_to_wait(&task->io_uring->wait, &wait,
TASK_UNINTERRUPTIBLE);
if (inflight == io_uring_count_inflight(ctx, task, files))
@@ -8973,37 +9000,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
}
}
-static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
- struct task_struct *task)
-{
- while (1) {
- struct io_task_cancel cancel = { .task = task, .files = NULL, };
- enum io_wq_cancel cret;
- bool ret = false;
-
- if (ctx->io_wq) {
- cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
- &cancel, true);
- ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
- }
-
- /* SQPOLL thread does its own polling */
- if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
- while (!list_empty_careful(&ctx->iopoll_list)) {
- io_iopoll_try_reap_events(ctx);
- ret = true;
- }
- }
-
- ret |= io_poll_remove_all(ctx, task, NULL);
- ret |= io_kill_timeouts(ctx, task, NULL);
- ret |= io_run_task_work();
- if (!ret)
- break;
- cond_resched();
- }
-}
-
static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
@@ -9033,11 +9029,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
}
io_cancel_defer_files(ctx, task, files);
- io_cqring_overflow_flush(ctx, true, task, files);
io_uring_cancel_files(ctx, task, files);
if (!files)
- __io_uring_cancel_task_requests(ctx, task);
+ io_uring_try_cancel_requests(ctx, task, NULL);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
atomic_dec(&task->io_uring->in_idle);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 02/13] io_uring: refactor scheduling in io_cqring_wait
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 01/13] io_uring: deduplicate core cancellations sequence Pavel Begunkov
@ 2021-02-04 13:51 ` Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 03/13] io_uring: refactor io_cqring_wait Pavel Begunkov
` (11 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:51 UTC (permalink / raw)
To: Jens Axboe, io-uring
schedule_timeout() with timeout=MAX_SCHEDULE_TIMEOUT is guaranteed to
work just as schedule(), so instead of hand-coding it based on arguments
always use the timeout version and simplify code.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a750c504366d..5b735635b8f0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7213,9 +7213,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
.to_wait = min_events,
};
struct io_rings *rings = ctx->rings;
- struct timespec64 ts;
- signed long timeout = 0;
- int ret = 0;
+ signed long timeout = MAX_SCHEDULE_TIMEOUT;
+ int ret;
do {
io_cqring_overflow_flush(ctx, false, NULL, NULL);
@@ -7239,6 +7238,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
}
if (uts) {
+ struct timespec64 ts;
+
if (get_timespec64(&ts, uts))
return -EFAULT;
timeout = timespec64_to_jiffies(&ts);
@@ -7264,14 +7265,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
finish_wait(&ctx->wait, &iowq.wq);
continue;
}
- if (uts) {
- timeout = schedule_timeout(timeout);
- if (timeout == 0) {
- ret = -ETIME;
- break;
- }
- } else {
- schedule();
+ timeout = schedule_timeout(timeout);
+ if (timeout == 0) {
+ ret = -ETIME;
+ break;
}
} while (1);
finish_wait(&ctx->wait, &iowq.wq);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 03/13] io_uring: refactor io_cqring_wait
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 01/13] io_uring: deduplicate core cancellations sequence Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 02/13] io_uring: refactor scheduling in io_cqring_wait Pavel Begunkov
@ 2021-02-04 13:51 ` Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 04/13] io_uring: refactor io_read for unsupported nowait Pavel Begunkov
` (10 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:51 UTC (permalink / raw)
To: Jens Axboe, io-uring
It's easy to make a mistake in io_cqring_wait() because for all
break/continue clauses we need to watch for prepare/finish_wait to be
used correctly. Extract all those into a new helper
io_cqring_wait_schedule(), and transforming the loop into simple series
of func calls: prepare(); check_and_schedule(); finish();
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 43 ++++++++++++++++++++++---------------------
1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5b735635b8f0..dcb9e937daa3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7195,6 +7195,25 @@ static int io_run_task_work_sig(void)
return -EINTR;
}
+/* when returns >0, the caller should retry */
+static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+ struct io_wait_queue *iowq,
+ signed long *timeout)
+{
+ int ret;
+
+ /* make sure we run task_work before checking for signals */
+ ret = io_run_task_work_sig();
+ if (ret || io_should_wake(iowq))
+ return ret;
+ /* let the caller flush overflows, retry */
+ if (test_bit(0, &ctx->cq_check_overflow))
+ return 1;
+
+ *timeout = schedule_timeout(*timeout);
+ return !*timeout ? -ETIME : 1;
+}
+
/*
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
@@ -7251,27 +7270,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
io_cqring_overflow_flush(ctx, false, NULL, NULL);
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
- /* make sure we run task_work before checking for signals */
- ret = io_run_task_work_sig();
- if (ret > 0) {
- finish_wait(&ctx->wait, &iowq.wq);
- continue;
- }
- else if (ret < 0)
- break;
- if (io_should_wake(&iowq))
- break;
- if (test_bit(0, &ctx->cq_check_overflow)) {
- finish_wait(&ctx->wait, &iowq.wq);
- continue;
- }
- timeout = schedule_timeout(timeout);
- if (timeout == 0) {
- ret = -ETIME;
- break;
- }
- } while (1);
- finish_wait(&ctx->wait, &iowq.wq);
+ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+ finish_wait(&ctx->wait, &iowq.wq);
+ } while (ret > 0);
restore_saved_sigmask_unless(ret == -EINTR);
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 04/13] io_uring: refactor io_read for unsupported nowait
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (2 preceding siblings ...)
2021-02-04 13:51 ` [PATCH v2 03/13] io_uring: refactor io_cqring_wait Pavel Begunkov
@ 2021-02-04 13:51 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 05/13] io_uring: further simplify do_read error parsing Pavel Begunkov
` (9 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:51 UTC (permalink / raw)
To: Jens Axboe, io-uring
!io_file_supports_async() case of io_read() is hard to read, it jumps
somewhere in the middle of the function just to do async setup and fail
on a similar check. Call io_setup_async_rw() directly for this case,
it's much easier to follow.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index dcb9e937daa3..866e0ea83dbe 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3506,7 +3506,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
ssize_t io_size, ret, ret2;
- bool no_async;
if (rw) {
iter = &rw->iter;
@@ -3527,9 +3526,12 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
kiocb->ki_flags |= IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- no_async = force_nonblock && !io_file_supports_async(req->file, READ);
- if (no_async)
- goto copy_iov;
+ if (force_nonblock && !io_file_supports_async(req->file, READ)) {
+ ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
+ if (!ret)
+ return -EAGAIN;
+ goto out_free;
+ }
ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
if (unlikely(ret))
@@ -3568,8 +3570,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
ret = ret2;
goto out_free;
}
- if (no_async)
- return -EAGAIN;
rw = req->async_data;
/* it's copied and will be cleaned with ->io */
iovec = NULL;
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 05/13] io_uring: further simplify do_read error parsing
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (3 preceding siblings ...)
2021-02-04 13:51 ` [PATCH v2 04/13] io_uring: refactor io_read for unsupported nowait Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 06/13] io_uring: let io_setup_async_rw take care of iovec Pavel Begunkov
` (8 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
First, instead of checking iov_iter_count(iter) for 0 to find out that
all needed bytes were read, just compare returned code against io_size.
It's more reliable and arguably cleaner.
Also, place the half-read case into an else branch and delete an extra
label.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 866e0ea83dbe..1d1fa1f77332 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3552,19 +3552,18 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
- goto copy_iov;
- } else if (ret <= 0) {
+ } else if (ret <= 0 || ret == io_size) {
/* make sure -ERESTARTSYS -> -EINTR is done */
goto done;
- }
+ } else {
+ /* we did blocking attempt. no retry. */
+ if (!force_nonblock || (req->file->f_flags & O_NONBLOCK) ||
+ !(req->flags & REQ_F_ISREG))
+ goto done;
- /* read it all, or we did blocking attempt. no retry. */
- if (!iov_iter_count(iter) || !force_nonblock ||
- (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG))
- goto done;
+ io_size -= ret;
+ }
- io_size -= ret;
-copy_iov:
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
if (ret2) {
ret = ret2;
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 06/13] io_uring: let io_setup_async_rw take care of iovec
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (4 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 05/13] io_uring: further simplify do_read error parsing Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 07/13] io_uring: don't forget to adjust io_size Pavel Begunkov
` (7 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
Now we give out ownership of iovec into io_setup_async_rw(), so it
either sets request's context right or frees the iovec on error itself.
Makes our life a bit easier at call sites.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 24 +++++++++---------------
1 file changed, 9 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1d1fa1f77332..f8492d62b6a1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2721,11 +2721,7 @@ static bool io_resubmit_prep(struct io_kiocb *req)
ret = io_import_iovec(rw, req, &iovec, &iter, false);
if (ret < 0)
return false;
- ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
- if (!ret)
- return true;
- kfree(iovec);
- return false;
+ return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
}
#endif
@@ -3366,8 +3362,10 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
if (!force && !io_op_defs[req->opcode].needs_async_data)
return 0;
if (!req->async_data) {
- if (__io_alloc_async_data(req))
+ if (__io_alloc_async_data(req)) {
+ kfree(iovec);
return -ENOMEM;
+ }
io_req_map_rw(req, iovec, fast_iov, iter);
}
@@ -3528,9 +3526,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* If the file doesn't support async, just async punt */
if (force_nonblock && !io_file_supports_async(req->file, READ)) {
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
- if (!ret)
- return -EAGAIN;
- goto out_free;
+ return ret ?: -EAGAIN;
}
ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
@@ -3565,10 +3561,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
}
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
- if (ret2) {
- ret = ret2;
- goto out_free;
- }
+ if (ret2)
+ return ret2;
+
rw = req->async_data;
/* it's copied and will be cleaned with ->io */
iovec = NULL;
@@ -3703,8 +3698,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
- if (!ret)
- return -EAGAIN;
+ return ret ?: -EAGAIN;
}
out_free:
/* it's reportedly faster than delegating the null check to kfree() */
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 07/13] io_uring: don't forget to adjust io_size
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (5 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 06/13] io_uring: let io_setup_async_rw take care of iovec Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 08/13] io_uring: inline io_read()'s iovec freeing Pavel Begunkov
` (6 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
We have invariant in io_read() of how much we're trying to read spilled
into an iter and io_size variable. The last one controls decision making
about whether to do read-retries. However, io_size is modified only
after the first read attempt, so if we happen to go for a third retry in
a single call to io_read(), we will get io_size greater than in the
iterator, so may lead to various side effects up to live-locking.
Modify io_size each time.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f8492d62b6a1..25fffff27c76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3548,16 +3548,11 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
- } else if (ret <= 0 || ret == io_size) {
- /* make sure -ERESTARTSYS -> -EINTR is done */
+ } else if (ret <= 0 || ret == io_size || !force_nonblock ||
+ (req->file->f_flags & O_NONBLOCK) ||
+ !(req->flags & REQ_F_ISREG)) {
+ /* read all, failed, already did sync or don't want to retry */
goto done;
- } else {
- /* we did blocking attempt. no retry. */
- if (!force_nonblock || (req->file->f_flags & O_NONBLOCK) ||
- !(req->flags & REQ_F_ISREG))
- goto done;
-
- io_size -= ret;
}
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
@@ -3570,6 +3565,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* now use our persistent iterator, if we aren't already */
iter = &rw->iter;
retry:
+ io_size -= ret;
rw->bytes_done += ret;
/* if we can retry, do so with the callbacks armed */
if (!io_rw_should_retry(req)) {
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 08/13] io_uring: inline io_read()'s iovec freeing
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (6 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 07/13] io_uring: don't forget to adjust io_size Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 09/13] io_uring: highlight read-retry loop Pavel Begunkov
` (5 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_read() has not the simpliest control flow with a lot of jumps and
it's hard to read. One of those is a out_free: label, which frees iovec.
However, from the middle of io_read() iovec is NULL'ed and so
kfree(iovec) is no-op, it leaves us with two place where we can inline
it and further clean up the code.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 31 +++++++++++++------------------
1 file changed, 13 insertions(+), 18 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 25fffff27c76..35ad889afaec 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3530,14 +3530,18 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
}
ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
- if (unlikely(ret))
- goto out_free;
+ if (unlikely(ret)) {
+ kfree(iovec);
+ return ret;
+ }
ret = io_iter_do_read(req, iter);
if (ret == -EIOCBQUEUED) {
- ret = 0;
- goto out_free;
+ /* it's faster to check here then delegate to kfree */
+ if (iovec)
+ kfree(iovec);
+ return 0;
} else if (ret == -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -3560,8 +3564,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
return ret2;
rw = req->async_data;
- /* it's copied and will be cleaned with ->io */
- iovec = NULL;
/* now use our persistent iterator, if we aren't already */
iter = &rw->iter;
retry:
@@ -3580,21 +3582,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
* do, then just retry at the new offset.
*/
ret = io_iter_do_read(req, iter);
- if (ret == -EIOCBQUEUED) {
- ret = 0;
- goto out_free;
- } else if (ret > 0 && ret < io_size) {
- /* we got some bytes, but not all. retry. */
+ if (ret == -EIOCBQUEUED)
+ return 0;
+ /* we got some bytes, but not all. retry. */
+ if (ret > 0 && ret < io_size)
goto retry;
- }
done:
kiocb_done(kiocb, ret, cs);
- ret = 0;
-out_free:
- /* it's reportedly faster than delegating the null check to kfree() */
- if (iovec)
- kfree(iovec);
- return ret;
+ return 0;
}
static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 09/13] io_uring: highlight read-retry loop
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (7 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 08/13] io_uring: inline io_read()'s iovec freeing Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 10/13] io_uring: treat NONBLOCK and RWF_NOWAIT similarly Pavel Begunkov
` (4 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
We already have implicit do-while for read-retries but with goto in the
end. Convert it to an actual do-while, it highlights it so making a
bit more understandable and is cleaner in general.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 35ad889afaec..bbf8ea8370d6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3566,27 +3566,27 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
rw = req->async_data;
/* now use our persistent iterator, if we aren't already */
iter = &rw->iter;
-retry:
- io_size -= ret;
- rw->bytes_done += ret;
- /* if we can retry, do so with the callbacks armed */
- if (!io_rw_should_retry(req)) {
- kiocb->ki_flags &= ~IOCB_WAITQ;
- return -EAGAIN;
- }
- /*
- * Now retry read with the IOCB_WAITQ parts set in the iocb. If we
- * get -EIOCBQUEUED, then we'll get a notification when the desired
- * page gets unlocked. We can also get a partial read here, and if we
- * do, then just retry at the new offset.
- */
- ret = io_iter_do_read(req, iter);
- if (ret == -EIOCBQUEUED)
- return 0;
- /* we got some bytes, but not all. retry. */
- if (ret > 0 && ret < io_size)
- goto retry;
+ do {
+ io_size -= ret;
+ rw->bytes_done += ret;
+ /* if we can retry, do so with the callbacks armed */
+ if (!io_rw_should_retry(req)) {
+ kiocb->ki_flags &= ~IOCB_WAITQ;
+ return -EAGAIN;
+ }
+
+ /*
+ * Now retry read with the IOCB_WAITQ parts set in the iocb. If
+ * we get -EIOCBQUEUED, then we'll get a notification when the
+ * desired page gets unlocked. We can also get a partial read
+ * here, and if we do, then just retry at the new offset.
+ */
+ ret = io_iter_do_read(req, iter);
+ if (ret == -EIOCBQUEUED)
+ return 0;
+ /* we got some bytes, but not all. retry. */
+ } while (ret > 0 && ret < io_size);
done:
kiocb_done(kiocb, ret, cs);
return 0;
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 10/13] io_uring: treat NONBLOCK and RWF_NOWAIT similarly
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (8 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 09/13] io_uring: highlight read-retry loop Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 11/13] io_uring: io_import_iovec return type cleanup Pavel Begunkov
` (3 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
Make decision making of whether we need to retry read/write similar for
O_NONBLOCK and RWF_NOWAIT. Set REQ_F_NOWAIT when either is specified and
use it for all relevant checks. Also fix resubmitting NOWAIT requests
via io_rw_reissue().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 30 ++++++++++++++++--------------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bbf8ea8370d6..ce2ea3f55f65 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2734,7 +2734,9 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
if (res != -EAGAIN && res != -EOPNOTSUPP)
return false;
mode = file_inode(req->file)->i_mode;
- if ((!S_ISBLK(mode) && !S_ISREG(mode)) || io_wq_current_is_worker())
+ if (!S_ISBLK(mode) && !S_ISREG(mode))
+ return false;
+ if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
return false;
lockdep_assert_held(&req->ctx->uring_lock);
@@ -2907,16 +2909,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw.kiocb;
+ struct file *file = req->file;
unsigned ioprio;
int ret;
- if (S_ISREG(file_inode(req->file)->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode))
req->flags |= REQ_F_ISREG;
kiocb->ki_pos = READ_ONCE(sqe->off);
- if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) {
+ if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) {
req->flags |= REQ_F_CUR_POS;
- kiocb->ki_pos = req->file->f_pos;
+ kiocb->ki_pos = file->f_pos;
}
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -2924,6 +2927,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ret))
return ret;
+ /* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */
+ if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK))
+ req->flags |= REQ_F_NOWAIT;
+
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
ret = ioprio_check_cap(ioprio);
@@ -2934,10 +2941,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} else
kiocb->ki_ioprio = get_current_ioprio();
- /* don't allow async punt if RWF_NOWAIT was requested */
- if (kiocb->ki_flags & IOCB_NOWAIT)
- req->flags |= REQ_F_NOWAIT;
-
if (ctx->flags & IORING_SETUP_IOPOLL) {
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
!kiocb->ki_filp->f_op->iopoll)
@@ -3546,15 +3549,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
goto done;
- /* no retry on NONBLOCK marked file */
- if (req->file->f_flags & O_NONBLOCK)
+ /* no retry on NONBLOCK nor RWF_NOWAIT */
+ if (req->flags & REQ_F_NOWAIT)
goto done;
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
- (req->file->f_flags & O_NONBLOCK) ||
- !(req->flags & REQ_F_ISREG)) {
+ (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
@@ -3675,8 +3677,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
*/
if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
ret2 = -EAGAIN;
- /* no retry on NONBLOCK marked file */
- if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK))
+ /* no retry on NONBLOCK nor RWF_NOWAIT */
+ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
goto done;
if (!force_nonblock || ret2 != -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 11/13] io_uring: io_import_iovec return type cleanup
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (9 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 10/13] io_uring: treat NONBLOCK and RWF_NOWAIT similarly Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 12/13] io_uring: deduplicate file table slot calculation Pavel Begunkov
` (2 subsequent siblings)
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_import_iovec() doesn't return IO size anymore, only error code. Make
it more apparent by returning int instead of ssize and clean up
leftovers.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 30 +++++++++++-------------------
1 file changed, 11 insertions(+), 19 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ce2ea3f55f65..24cc00ff7155 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1030,9 +1030,8 @@ static struct file *io_file_get(struct io_submit_state *state,
static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs);
static void io_rsrc_put_work(struct work_struct *work);
-static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
- struct iovec **iovec, struct iov_iter *iter,
- bool needs_lock);
+static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec,
+ struct iov_iter *iter, bool needs_lock);
static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
const struct iovec *fast_iov,
struct iov_iter *iter, bool force);
@@ -2693,9 +2692,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res,
static bool io_resubmit_prep(struct io_kiocb *req)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- ssize_t ret = -ECANCELED;
+ int rw, ret = -ECANCELED;
struct iov_iter iter;
- int rw;
/* already prepared */
if (req->async_data)
@@ -3004,8 +3002,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
io_rw_done(kiocb, ret);
}
-static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
- struct iov_iter *iter)
+static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
{
struct io_ring_ctx *ctx = req->ctx;
size_t len = req->rw.len;
@@ -3069,7 +3066,7 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
}
}
- return len;
+ return 0;
}
static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
@@ -3210,16 +3207,14 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
return __io_iov_buffer_select(req, iov, needs_lock);
}
-static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
- struct iovec **iovec, struct iov_iter *iter,
- bool needs_lock)
+static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec,
+ struct iov_iter *iter, bool needs_lock)
{
void __user *buf = u64_to_user_ptr(req->rw.addr);
size_t sqe_len = req->rw.len;
+ u8 opcode = req->opcode;
ssize_t ret;
- u8 opcode;
- opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
return io_import_fixed(req, rw, iter);
@@ -3244,10 +3239,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
if (req->flags & REQ_F_BUFFER_SELECT) {
ret = io_iov_buffer_select(req, *iovec, needs_lock);
- if (!ret) {
- ret = (*iovec)->iov_len;
- iov_iter_init(iter, rw, *iovec, 1, ret);
- }
+ if (!ret)
+ iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len);
*iovec = NULL;
return ret;
}
@@ -3379,7 +3372,7 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
{
struct io_async_rw *iorw = req->async_data;
struct iovec *iov = iorw->fast_iov;
- ssize_t ret;
+ int ret;
ret = io_import_iovec(rw, req, &iov, &iorw->iter, false);
if (unlikely(ret < 0))
@@ -3518,7 +3511,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
}
io_size = iov_iter_count(iter);
req->result = io_size;
- ret = 0;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 12/13] io_uring: deduplicate file table slot calculation
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (10 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 11/13] io_uring: io_import_iovec return type cleanup Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme Pavel Begunkov
2021-02-04 15:07 ` [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Jens Axboe
13 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
Extract a helper io_fixed_file_slot() returning a place in our fixed
files table, so we don't hand-code it three times in the code.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 41 +++++++++++++++++++----------------------
1 file changed, 19 insertions(+), 22 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 24cc00ff7155..5ee6a9273fca 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7740,6 +7740,15 @@ static void io_rsrc_put_work(struct work_struct *work)
}
}
+static struct file **io_fixed_file_slot(struct fixed_rsrc_data *file_data,
+ unsigned i)
+{
+ struct fixed_rsrc_table *table;
+
+ table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT];
+ return &table->files[i & IORING_FILE_TABLE_MASK];
+}
+
static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
{
struct fixed_rsrc_ref_node *ref_node;
@@ -7808,6 +7817,7 @@ static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
kfree(ref_node);
}
+
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
@@ -7840,9 +7850,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto out_free;
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
- struct fixed_rsrc_table *table;
- unsigned index;
-
if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
ret = -EFAULT;
goto out_fput;
@@ -7867,9 +7874,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
fput(file);
goto out_fput;
}
- table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT];
- index = i & IORING_FILE_TABLE_MASK;
- table->files[index] = file;
+ *io_fixed_file_slot(file_data, i) = file;
}
ret = io_sqe_files_scm(ctx);
@@ -7972,7 +7977,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
{
struct fixed_rsrc_data *data = ctx->file_data;
struct fixed_rsrc_ref_node *ref_node;
- struct file *file;
+ struct file *file, **file_slot;
__s32 __user *fds;
int fd, i, err;
__u32 done;
@@ -7990,9 +7995,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
fds = u64_to_user_ptr(up->data);
for (done = 0; done < nr_args; done++) {
- struct fixed_rsrc_table *table;
- unsigned index;
-
err = 0;
if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
err = -EFAULT;
@@ -8002,14 +8004,13 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
continue;
i = array_index_nospec(up->offset + done, ctx->nr_user_files);
- table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
- index = i & IORING_FILE_TABLE_MASK;
- if (table->files[index]) {
- file = table->files[index];
- err = io_queue_file_removal(data, file);
+ file_slot = io_fixed_file_slot(ctx->file_data, i);
+
+ if (*file_slot) {
+ err = io_queue_file_removal(data, *file_slot);
if (err)
break;
- table->files[index] = NULL;
+ *file_slot = NULL;
needs_switch = true;
}
if (fd != -1) {
@@ -8031,13 +8032,12 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- table->files[index] = file;
err = io_sqe_file_register(ctx, file, i);
if (err) {
- table->files[index] = NULL;
fput(file);
break;
}
+ *file_slot = file;
}
}
@@ -9488,11 +9488,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
- struct fixed_rsrc_table *table;
- struct file *f;
+ struct file *f = *io_fixed_file_slot(ctx->file_data, i);
- table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
- f = table->files[i & IORING_FILE_TABLE_MASK];
if (f)
seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
else
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (11 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 12/13] io_uring: deduplicate file table slot calculation Pavel Begunkov
@ 2021-02-04 13:52 ` Pavel Begunkov
2021-02-04 14:52 ` Jens Axboe
2021-02-04 15:07 ` [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Jens Axboe
13 siblings, 1 reply; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 13:52 UTC (permalink / raw)
To: Jens Axboe, io-uring
Saving one lock/unlock for io-wq is not super important, but adds some
ugliness in the code. More important, atomic decs not turning it to zero
for some archs won't give the right ordering/barriers so the
io_steal_work() may pretty easily get subtly and completely broken.
Return back 2-step io-wq work exchange and clean it up.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io-wq.c | 16 ++++++----------
fs/io-wq.h | 4 ++--
fs/io_uring.c | 26 ++++----------------------
3 files changed, 12 insertions(+), 34 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2e2f14f42bf2..63ef195b1acb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -555,23 +555,21 @@ static void io_worker_handle_work(struct io_worker *worker)
/* handle a whole dependent link */
do {
- struct io_wq_work *old_work, *next_hashed, *linked;
+ struct io_wq_work *next_hashed, *linked;
unsigned int hash = io_get_work_hash(work);
next_hashed = wq_next_work(work);
io_impersonate_work(worker, work);
+ wq->do_work(work);
+ io_assign_current_work(worker, NULL);
- old_work = work;
- linked = wq->do_work(work);
-
+ linked = wq->free_work(work);
work = next_hashed;
if (!work && linked && !io_wq_is_hashed(linked)) {
work = linked;
linked = NULL;
}
io_assign_current_work(worker, work);
- wq->free_work(old_work);
-
if (linked)
io_wqe_enqueue(wqe, linked);
@@ -850,11 +848,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
struct io_wq *wq = wqe->wq;
do {
- struct io_wq_work *old_work = work;
-
work->flags |= IO_WQ_WORK_CANCEL;
- work = wq->do_work(work);
- wq->free_work(old_work);
+ wq->do_work(work);
+ work = wq->free_work(work);
} while (work);
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index e1ffb80a4a1d..e37a0f217cc8 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -106,8 +106,8 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
return container_of(work->list.next, struct io_wq_work, list);
}
-typedef void (free_work_fn)(struct io_wq_work *);
-typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
+typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
+typedef void (io_wq_work_fn)(struct io_wq_work *);
struct io_wq_data {
struct user_struct *user;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ee6a9273fca..b740a39110d6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2379,22 +2379,6 @@ static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
io_free_req_deferred(req);
}
-static struct io_wq_work *io_steal_work(struct io_kiocb *req)
-{
- struct io_kiocb *nxt;
-
- /*
- * A ref is owned by io-wq in which context we're. So, if that's the
- * last one, it's safe to steal next work. False negatives are Ok,
- * it just will be re-punted async in io_put_work()
- */
- if (refcount_read(&req->refs) != 1)
- return NULL;
-
- nxt = io_req_find_next(req);
- return nxt ? &nxt->work : NULL;
-}
-
static void io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
@@ -6343,7 +6327,7 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
return 0;
}
-static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
+static void io_wq_submit_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_kiocb *timeout;
@@ -6394,8 +6378,6 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
if (lock_ctx)
mutex_unlock(&lock_ctx->uring_lock);
}
-
- return io_steal_work(req);
}
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
@@ -8067,12 +8049,12 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
return __io_sqe_files_update(ctx, &up, nr_args);
}
-static void io_free_work(struct io_wq_work *work)
+static struct io_wq_work *io_free_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- /* Consider that io_steal_work() relies on this ref */
- io_put_req(req);
+ req = io_put_req_find_next(req);
+ return req ? &req->work : NULL;
}
static int io_init_wq_offload(struct io_ring_ctx *ctx,
--
2.24.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme
2021-02-04 13:52 ` [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme Pavel Begunkov
@ 2021-02-04 14:52 ` Jens Axboe
2021-02-04 14:56 ` Pavel Begunkov
0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2021-02-04 14:52 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 2/4/21 6:52 AM, Pavel Begunkov wrote:
> Saving one lock/unlock for io-wq is not super important, but adds some
> ugliness in the code. More important, atomic decs not turning it to zero
> for some archs won't give the right ordering/barriers so the
> io_steal_work() may pretty easily get subtly and completely broken.
>
> Return back 2-step io-wq work exchange and clean it up.
IIRC, this wasn't done to skip the lock/unlock exchange, which I agree
doesn't matter, but to ensure that a link would not need another io-wq
punt. And that is a big deal, it's much faster to run it from that
same thread, rather than needing a new async queue and new thread grab
to get there.
Just want to make sure that's on your mind... Maybe it's still fine
as-is, didn't look too closely yet or test it.
--
Jens Axboe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme
2021-02-04 14:52 ` Jens Axboe
@ 2021-02-04 14:56 ` Pavel Begunkov
2021-02-04 15:05 ` Jens Axboe
0 siblings, 1 reply; 18+ messages in thread
From: Pavel Begunkov @ 2021-02-04 14:56 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 04/02/2021 14:52, Jens Axboe wrote:
> On 2/4/21 6:52 AM, Pavel Begunkov wrote:
>> Saving one lock/unlock for io-wq is not super important, but adds some
>> ugliness in the code. More important, atomic decs not turning it to zero
>> for some archs won't give the right ordering/barriers so the
>> io_steal_work() may pretty easily get subtly and completely broken.
>>
>> Return back 2-step io-wq work exchange and clean it up.
>
> IIRC, this wasn't done to skip the lock/unlock exchange, which I agree
> doesn't matter, but to ensure that a link would not need another io-wq
> punt. And that is a big deal, it's much faster to run it from that
> same thread, rather than needing a new async queue and new thread grab
> to get there.
Right, we just refer to different patches and moments. This one is fine
in that regard, it just moves returning link from ->do_work() to
->free_work().
>
> Just want to make sure that's on your mind... Maybe it's still fine
> as-is, didn't look too closely yet or test it.
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme
2021-02-04 14:56 ` Pavel Begunkov
@ 2021-02-04 15:05 ` Jens Axboe
0 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2021-02-04 15:05 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 2/4/21 7:56 AM, Pavel Begunkov wrote:
> On 04/02/2021 14:52, Jens Axboe wrote:
>> On 2/4/21 6:52 AM, Pavel Begunkov wrote:
>>> Saving one lock/unlock for io-wq is not super important, but adds some
>>> ugliness in the code. More important, atomic decs not turning it to zero
>>> for some archs won't give the right ordering/barriers so the
>>> io_steal_work() may pretty easily get subtly and completely broken.
>>>
>>> Return back 2-step io-wq work exchange and clean it up.
>>
>> IIRC, this wasn't done to skip the lock/unlock exchange, which I agree
>> doesn't matter, but to ensure that a link would not need another io-wq
>> punt. And that is a big deal, it's much faster to run it from that
>> same thread, rather than needing a new async queue and new thread grab
>> to get there.
>
> Right, we just refer to different patches and moments. This one is fine
> in that regard, it just moves returning link from ->do_work() to
> ->free_work().
Got it, looks good then.
--
Jens Axboe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
` (12 preceding siblings ...)
2021-02-04 13:52 ` [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme Pavel Begunkov
@ 2021-02-04 15:07 ` Jens Axboe
13 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2021-02-04 15:07 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 2/4/21 6:51 AM, Pavel Begunkov wrote:
> Bunch of random cleanups, noticeable part of which (4-9/13) refactor
> io_read(), which is currently not in the best shape and is hard to
> understand.
>
> 7/13 may actually fix a problem.
> 10/13 should make NOWAIT and NONBLOCK to work right
>
> v2: add 9-13/13
>
> Pavel Begunkov (13):
> io_uring: deduplicate core cancellations sequence
> io_uring: refactor scheduling in io_cqring_wait
> io_uring: refactor io_cqring_wait
> io_uring: refactor io_read for unsupported nowait
> io_uring: further simplify do_read error parsing
> io_uring: let io_setup_async_rw take care of iovec
> io_uring: don't forget to adjust io_size
> io_uring: inline io_read()'s iovec freeing
> io_uring: highlight read-retry loop
> io_uring: treat NONBLOCK and RWF_NOWAIT similarly
> io_uring: io_import_iovec return type cleanup
> io_uring: deduplicate file table slot calculation
> io_uring/io-wq: return 2-step work swap scheme
>
> fs/io-wq.c | 16 +--
> fs/io-wq.h | 4 +-
> fs/io_uring.c | 366 ++++++++++++++++++++++----------------------------
> 3 files changed, 166 insertions(+), 220 deletions(-)
Applied, thanks Pavel!
--
Jens Axboe
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2021-02-04 16:27 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-04 13:51 [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 01/13] io_uring: deduplicate core cancellations sequence Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 02/13] io_uring: refactor scheduling in io_cqring_wait Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 03/13] io_uring: refactor io_cqring_wait Pavel Begunkov
2021-02-04 13:51 ` [PATCH v2 04/13] io_uring: refactor io_read for unsupported nowait Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 05/13] io_uring: further simplify do_read error parsing Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 06/13] io_uring: let io_setup_async_rw take care of iovec Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 07/13] io_uring: don't forget to adjust io_size Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 08/13] io_uring: inline io_read()'s iovec freeing Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 09/13] io_uring: highlight read-retry loop Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 10/13] io_uring: treat NONBLOCK and RWF_NOWAIT similarly Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 11/13] io_uring: io_import_iovec return type cleanup Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 12/13] io_uring: deduplicate file table slot calculation Pavel Begunkov
2021-02-04 13:52 ` [PATCH v2 13/13] io_uring/io-wq: return 2-step work swap scheme Pavel Begunkov
2021-02-04 14:52 ` Jens Axboe
2021-02-04 14:56 ` Pavel Begunkov
2021-02-04 15:05 ` Jens Axboe
2021-02-04 15:07 ` [PATCH v2 5.12 00/13] a second pack of 5.12 cleanups Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox