* [PATCH 1/6] io_uring: refactor io_arm_poll_handler()
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-26 20:40 ` [PATCH 2/6] io_uring: mainstream sqpoll task_work running Pavel Begunkov
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
gcc 11 goes a weird path and duplicates most of io_arm_poll_handler()
for READ and WRITE cases. Help it and move all pollin vs pollout
specific bits under a single if-else, so there is no temptation for this
kind of unfolding.
before vs after:
text data bss dec hex filename
85362 12650 8 98020 17ee4 ./fs/io_uring.o
85186 12650 8 97844 17e34 ./fs/io_uring.o
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 37 ++++++++++++++++---------------------
1 file changed, 16 insertions(+), 21 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 23c51786708b..0822e01e2d71 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5342,19 +5342,29 @@ static int io_arm_poll_handler(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
struct async_poll *apoll;
struct io_poll_table ipt;
- __poll_t mask, ret;
+ __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI;
int rw;
if (!req->file || !file_can_poll(req->file))
return IO_APOLL_ABORTED;
if (req->flags & REQ_F_POLLED)
return IO_APOLL_ABORTED;
- if (def->pollin)
+ if (!def->pollin && !def->pollout)
+ return IO_APOLL_ABORTED;
+
+ if (def->pollin) {
rw = READ;
- else if (def->pollout)
+ mask |= POLLIN | POLLRDNORM;
+
+ /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
+ if ((req->opcode == IORING_OP_RECVMSG) &&
+ (req->sr_msg.msg_flags & MSG_ERRQUEUE))
+ mask &= ~POLLIN;
+ } else {
rw = WRITE;
- else
- return IO_APOLL_ABORTED;
+ mask |= POLLOUT | POLLWRNORM;
+ }
+
/* if we can't nonblock try, then no point in arming a poll handler */
if (!io_file_supports_async(req, rw))
return IO_APOLL_ABORTED;
@@ -5363,23 +5373,8 @@ static int io_arm_poll_handler(struct io_kiocb *req)
if (unlikely(!apoll))
return IO_APOLL_ABORTED;
apoll->double_poll = NULL;
-
- req->flags |= REQ_F_POLLED;
req->apoll = apoll;
-
- mask = EPOLLONESHOT;
- if (def->pollin)
- mask |= POLLIN | POLLRDNORM;
- if (def->pollout)
- mask |= POLLOUT | POLLWRNORM;
-
- /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
- if ((req->opcode == IORING_OP_RECVMSG) &&
- (req->sr_msg.msg_flags & MSG_ERRQUEUE))
- mask &= ~POLLIN;
-
- mask |= POLLERR | POLLPRI;
-
+ req->flags |= REQ_F_POLLED;
ipt.pt._qproc = io_async_queue_proc;
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/6] io_uring: mainstream sqpoll task_work running
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
2021-06-26 20:40 ` [PATCH 1/6] io_uring: refactor io_arm_poll_handler() Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-26 20:40 ` [PATCH 3/6] io_uring: remove not needed PF_EXITING check Pavel Begunkov
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
task_works are widely used, so place io_run_task_work() directly into
the main path of io_sq_thread(), and remove it from other places where
it's not needed anymore.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0822e01e2d71..f10cdb92f771 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7064,7 +7064,6 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
cond_resched();
mutex_lock(&sqd->lock);
}
- io_run_task_work();
return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
}
@@ -7093,7 +7092,6 @@ static int io_sq_thread(void *data)
if (io_sqd_handle_event(sqd))
break;
timeout = jiffies + sqd->sq_thread_idle;
- continue;
}
cap_entries = !list_is_singular(&sqd->ctx_list);
@@ -7103,9 +7101,10 @@ static int io_sq_thread(void *data)
if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
sqt_spin = true;
}
+ if (io_run_task_work())
+ sqt_spin = true;
if (sqt_spin || !time_after(jiffies, timeout)) {
- io_run_task_work();
cond_resched();
if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
@@ -7113,7 +7112,7 @@ static int io_sq_thread(void *data)
}
prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
- if (!io_sqd_events_pending(sqd) && !io_run_task_work()) {
+ if (!io_sqd_events_pending(sqd) && !current->task_works) {
bool needs_sched = true;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/6] io_uring: remove not needed PF_EXITING check
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
2021-06-26 20:40 ` [PATCH 1/6] io_uring: refactor io_arm_poll_handler() Pavel Begunkov
2021-06-26 20:40 ` [PATCH 2/6] io_uring: mainstream sqpoll task_work running Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-26 20:40 ` [PATCH 4/6] io_uring: optimise hot path restricted checks Pavel Begunkov
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
Since cancellation got moved before exit_signals(), there is no one left
who can call io_run_task_work() with PF_EXIING set, so remove the check.
Note that __io_req_task_submit() still needs a similar check.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f10cdb92f771..953bdc41d018 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2264,12 +2264,6 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
static inline bool io_run_task_work(void)
{
- /*
- * Not safe to run on exiting task, and the task_work handling will
- * not add work to such a task.
- */
- if (unlikely(current->flags & PF_EXITING))
- return false;
if (current->task_works) {
__set_current_state(TASK_RUNNING);
task_work_run();
@@ -9216,7 +9210,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
ret |= io_cancel_defer_files(ctx, task, cancel_all);
ret |= io_poll_remove_all(ctx, task, cancel_all);
ret |= io_kill_timeouts(ctx, task, cancel_all);
- ret |= io_run_task_work();
+ if (task)
+ ret |= io_run_task_work();
ret |= io_run_ctx_fallback(ctx);
if (!ret)
break;
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/6] io_uring: optimise hot path restricted checks
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
` (2 preceding siblings ...)
2021-06-26 20:40 ` [PATCH 3/6] io_uring: remove not needed PF_EXITING check Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-26 20:40 ` [PATCH 5/6] io_uring: refactor io_submit_flush_completions Pavel Begunkov
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
Move likely/unlikely from io_check_restriction() to specifically
ctx->restricted check, because doesn't do what it supposed to and make
the common path take an extra jump.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 953bdc41d018..4dd2213f5454 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6702,7 +6702,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
struct io_kiocb *req,
unsigned int sqe_flags)
{
- if (!ctx->restricted)
+ if (likely(!ctx->restricted))
return true;
if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
@@ -6745,7 +6745,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
- if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+ if (!io_check_restriction(ctx, req, sqe_flags))
return -EACCES;
if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/6] io_uring: refactor io_submit_flush_completions
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
` (3 preceding siblings ...)
2021-06-26 20:40 ` [PATCH 4/6] io_uring: optimise hot path restricted checks Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-26 20:40 ` [PATCH 6/6] io_uring: pre-initialise some of req fields Pavel Begunkov
2021-06-27 22:23 ` [PATCH for-next 0/6] small for-next optimisations Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
Don't init req_batch before we actually need it. Also, add a small clean
up for req declaration.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4dd2213f5454..873cfd4a8761 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2161,22 +2161,22 @@ static void io_submit_flush_completions(struct io_ring_ctx *ctx)
{
struct io_comp_state *cs = &ctx->submit_state.comp;
int i, nr = cs->nr;
- struct io_kiocb *req;
struct req_batch rb;
- io_init_req_batch(&rb);
spin_lock_irq(&ctx->completion_lock);
for (i = 0; i < nr; i++) {
- req = cs->reqs[i];
+ struct io_kiocb *req = cs->reqs[i];
+
__io_cqring_fill_event(ctx, req->user_data, req->result,
req->compl.cflags);
}
io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock);
-
io_cqring_ev_posted(ctx);
+
+ io_init_req_batch(&rb);
for (i = 0; i < nr; i++) {
- req = cs->reqs[i];
+ struct io_kiocb *req = cs->reqs[i];
/* submission and completion refs */
if (req_ref_sub_and_test(req, 2))
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/6] io_uring: pre-initialise some of req fields
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
` (4 preceding siblings ...)
2021-06-26 20:40 ` [PATCH 5/6] io_uring: refactor io_submit_flush_completions Pavel Begunkov
@ 2021-06-26 20:40 ` Pavel Begunkov
2021-06-27 22:23 ` [PATCH for-next 0/6] small for-next optimisations Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2021-06-26 20:40 UTC (permalink / raw)
To: Jens Axboe, io-uring
Most of requests are allocated from an internal cache, so it's waste of
time fully initialising them every time. Instead, let's pre-init some of
the fields we can during initial allocation (e.g. kmalloc(), see
io_alloc_req()) and keep them valid on request recycling. There are four
of them in this patch:
->ctx is always stays the same
->link is NULL on free, it's an invariant
->result is not even needed to init, just a precaution
->async_data we now clean in io_dismantle_req() as it's likely to
never be allocated.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 873cfd4a8761..6cfbf72340ab 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1740,7 +1740,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
if (!state->free_reqs) {
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
- int ret;
+ int ret, i;
if (io_flush_cached_reqs(ctx))
goto got_req;
@@ -1758,6 +1758,20 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
return NULL;
ret = 1;
}
+
+ /*
+ * Don't initialise the fields below on every allocation, but
+ * do that in advance and keep valid on free.
+ */
+ for (i = 0; i < ret; i++) {
+ struct io_kiocb *req = state->reqs[i];
+
+ req->ctx = ctx;
+ req->link = NULL;
+ req->async_data = NULL;
+ /* not necessary, but safer to zero */
+ req->result = 0;
+ }
state->free_reqs = ret;
}
got_req:
@@ -1781,8 +1795,10 @@ static void io_dismantle_req(struct io_kiocb *req)
io_put_file(req->file);
if (req->fixed_rsrc_refs)
percpu_ref_put(req->fixed_rsrc_refs);
- if (req->async_data)
+ if (req->async_data) {
kfree(req->async_data);
+ req->async_data = NULL;
+ }
}
/* must to be called somewhat shortly after putting a request */
@@ -6730,15 +6746,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
req->user_data = READ_ONCE(sqe->user_data);
- req->async_data = NULL;
req->file = NULL;
- req->ctx = ctx;
- req->link = NULL;
req->fixed_rsrc_refs = NULL;
/* one is dropped after submission, the other at completion */
atomic_set(&req->refs, 2);
req->task = current;
- req->result = 0;
/* enforce forwards compatibility on users */
if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
--
2.32.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH for-next 0/6] small for-next optimisations
2021-06-26 20:40 [PATCH for-next 0/6] small for-next optimisations Pavel Begunkov
` (5 preceding siblings ...)
2021-06-26 20:40 ` [PATCH 6/6] io_uring: pre-initialise some of req fields Pavel Begunkov
@ 2021-06-27 22:23 ` Jens Axboe
6 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2021-06-27 22:23 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 6/26/21 2:40 PM, Pavel Begunkov wrote:
> Another pack of small randomly noticed optimisations with no
> particular theme.
>
> Pavel Begunkov (6):
> io_uring: refactor io_arm_poll_handler()
> io_uring: mainstream sqpoll task_work running
> io_uring: remove not needed PF_EXITING check
> io_uring: optimise hot path restricted checks
> io_uring: refactor io_submit_flush_completions
> io_uring: pre-initialise some of req fields
>
> fs/io_uring.c | 91 ++++++++++++++++++++++++++-------------------------
> 1 file changed, 46 insertions(+), 45 deletions(-)
LGTM, applied!
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread