* [PATCH v3 01/13] io_uring/cmd: move io_uring_try_cancel_uring_cmd()
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:29 ` Ming Lei
2024-03-18 22:00 ` [PATCH v3 02/13] io_uring/cmd: kill one issue_flags to tw conversion Pavel Begunkov
` (13 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
io_uring_try_cancel_uring_cmd() is a part of the cmd handling so let's
move it closer to all cmd bits into uring_cmd.c
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 39 +--------------------------------------
io_uring/io_uring.h | 7 +++++++
io_uring/uring_cmd.c | 30 ++++++++++++++++++++++++++++++
io_uring/uring_cmd.h | 3 +++
4 files changed, 41 insertions(+), 38 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5d4b448fdc50..6ca7f2a9c296 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -95,6 +95,7 @@
#include "waitid.h"
#include "futex.h"
#include "napi.h"
+#include "uring_cmd.h"
#include "timeout.h"
#include "poll.h"
@@ -173,13 +174,6 @@ static struct ctl_table kernel_io_uring_disabled_table[] = {
};
#endif
-static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
-{
- if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
- ctx->submit_state.cqes_count)
- __io_submit_flush_completions(ctx);
-}
-
static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
{
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
@@ -3237,37 +3231,6 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
return ret;
}
-static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
- struct task_struct *task, bool cancel_all)
-{
- struct hlist_node *tmp;
- struct io_kiocb *req;
- bool ret = false;
-
- lockdep_assert_held(&ctx->uring_lock);
-
- hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
- hash_node) {
- struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
- struct io_uring_cmd);
- struct file *file = req->file;
-
- if (!cancel_all && req->task != task)
- continue;
-
- if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
- /* ->sqe isn't available if no async data */
- if (!req_has_async_data(req))
- cmd->sqe = NULL;
- file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL);
- ret = true;
- }
- }
- io_submit_flush_completions(ctx);
-
- return ret;
-}
-
static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
bool cancel_all)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 6426ee382276..935d8d0747dc 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -154,6 +154,13 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
__io_req_task_work_add(req, 0);
}
+static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
+{
+ if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
+ ctx->submit_state.cqes_count)
+ __io_submit_flush_completions(ctx);
+}
+
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 42f63adfa54a..1551848a9394 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -14,6 +14,36 @@
#include "rsrc.h"
#include "uring_cmd.h"
+bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
+ struct task_struct *task, bool cancel_all)
+{
+ struct hlist_node *tmp;
+ struct io_kiocb *req;
+ bool ret = false;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
+ hash_node) {
+ struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
+ struct io_uring_cmd);
+ struct file *file = req->file;
+
+ if (!cancel_all && req->task != task)
+ continue;
+
+ if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
+ /* ->sqe isn't available if no async data */
+ if (!req_has_async_data(req))
+ cmd->sqe = NULL;
+ file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL);
+ ret = true;
+ }
+ }
+ io_submit_flush_completions(ctx);
+ return ret;
+}
+
static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 8117684ec3ca..7356bf9aa655 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -3,3 +3,6 @@
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_uring_cmd_prep_async(struct io_kiocb *req);
+
+bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
+ struct task_struct *task, bool cancel_all);
\ No newline at end of file
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 02/13] io_uring/cmd: kill one issue_flags to tw conversion
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 01/13] io_uring/cmd: move io_uring_try_cancel_uring_cmd() Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:33 ` Ming Lei
2024-03-18 22:00 ` [PATCH v3 03/13] io_uring/cmd: fix tw <-> issue_flags conversion Pavel Begunkov
` (12 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
io_uring cmd converts struct io_tw_state to issue_flags and later back
to io_tw_state, it's awfully ill-fated, not to mention that intermediate
issue_flags state is not correct.
Get rid of the last conversion, drag through tw everything that came
with IO_URING_F_UNLOCKED, and replace io_req_complete_defer() with a
direct call to io_req_complete_defer(), at least for the time being.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/uring_cmd.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 1551848a9394..7c1c58c5837e 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -130,11 +130,11 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
if (req->ctx->flags & IORING_SETUP_IOPOLL) {
/* order with io_iopoll_req_issued() checking ->iopoll_complete */
smp_store_release(&req->iopoll_completed, 1);
+ } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_req_complete_defer(req);
} else {
- struct io_tw_state ts = {
- .locked = !(issue_flags & IO_URING_F_UNLOCKED),
- };
- io_req_task_complete(req, &ts);
+ req->io_task_work.func = io_req_task_complete;
+ io_req_task_work_add(req);
}
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v3 02/13] io_uring/cmd: kill one issue_flags to tw conversion
2024-03-18 22:00 ` [PATCH v3 02/13] io_uring/cmd: kill one issue_flags to tw conversion Pavel Begunkov
@ 2024-03-19 1:33 ` Ming Lei
0 siblings, 0 replies; 21+ messages in thread
From: Ming Lei @ 2024-03-19 1:33 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: io-uring, linux-block, Jens Axboe, Kanchan Joshi
On Mon, Mar 18, 2024 at 10:00:24PM +0000, Pavel Begunkov wrote:
> io_uring cmd converts struct io_tw_state to issue_flags and later back
> to io_tw_state, it's awfully ill-fated, not to mention that intermediate
> issue_flags state is not correct.
>
> Get rid of the last conversion, drag through tw everything that came
> with IO_URING_F_UNLOCKED, and replace io_req_complete_defer() with a
> direct call to io_req_complete_defer(), at least for the time being.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
Reviewed-by: Ming Lei <[email protected]>
Thanks,
Ming
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v3 03/13] io_uring/cmd: fix tw <-> issue_flags conversion
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 01/13] io_uring/cmd: move io_uring_try_cancel_uring_cmd() Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 02/13] io_uring/cmd: kill one issue_flags to tw conversion Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:37 ` Ming Lei
2024-03-18 22:00 ` [PATCH v3 04/13] io_uring/cmd: introduce io_uring_cmd_complete Pavel Begunkov
` (11 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
!IO_URING_F_UNLOCKED does not translate to availability of the deferred
completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
pass and look for to use io_req_complete_defer() and other variants.
Luckily, it's not a real problem as two wrongs actually made it right,
at least as far as io_uring_cmd_work() goes.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/uring_cmd.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 7c1c58c5837e..759f919b14a9 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -36,7 +36,8 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
/* ->sqe isn't available if no async data */
if (!req_has_async_data(req))
cmd->sqe = NULL;
- file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL);
+ file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL |
+ IO_URING_F_COMPLETE_DEFER);
ret = true;
}
}
@@ -86,7 +87,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
+ unsigned issue_flags = IO_URING_F_UNLOCKED;
+
+ /* locked task_work executor checks the deffered list completion */
+ if (ts->locked)
+ issue_flags = IO_URING_F_COMPLETE_DEFER;
ioucmd->task_work_cb(ioucmd, issue_flags);
}
@@ -130,7 +135,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
if (req->ctx->flags & IORING_SETUP_IOPOLL) {
/* order with io_iopoll_req_issued() checking ->iopoll_complete */
smp_store_release(&req->iopoll_completed, 1);
- } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
+ if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
+ return;
io_req_complete_defer(req);
} else {
req->io_task_work.func = io_req_task_complete;
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v3 03/13] io_uring/cmd: fix tw <-> issue_flags conversion
2024-03-18 22:00 ` [PATCH v3 03/13] io_uring/cmd: fix tw <-> issue_flags conversion Pavel Begunkov
@ 2024-03-19 1:37 ` Ming Lei
0 siblings, 0 replies; 21+ messages in thread
From: Ming Lei @ 2024-03-19 1:37 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: io-uring, linux-block, Jens Axboe, Kanchan Joshi
On Mon, Mar 18, 2024 at 10:00:25PM +0000, Pavel Begunkov wrote:
> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
> pass and look for to use io_req_complete_defer() and other variants.
>
> Luckily, it's not a real problem as two wrongs actually made it right,
> at least as far as io_uring_cmd_work() goes.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
Reviewed-by: Ming Lei <[email protected]>
Thanks
Ming
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v3 04/13] io_uring/cmd: introduce io_uring_cmd_complete
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (2 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 03/13] io_uring/cmd: fix tw <-> issue_flags conversion Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:39 ` Ming Lei
2024-03-18 22:00 ` [PATCH v3 05/13] nvme/io_uring: don't hard code IO_URING_F_UNLOCKED Pavel Begunkov
` (10 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
io_uring_cmd_complete() does exactly what io_uring_cmd_done() does, that
is completing the request, but doesn't ask for issue_flags argument. We
have a couple of users hardcoding some random issue_flags values in
drivers, which they absolutely should not do. This function will be used
to get rid of them. Also, add comments warning users that they're only
allowed to pass issue_flags that were given from io_uring.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring/cmd.h | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index e453a997c060..bf94ed4135d8 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -26,12 +26,25 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
+
+/*
+ * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
+ * and the corresponding io_uring request.
+ *
+ * Note: the caller should never hard code @issue_flags and is only allowed
+ * to pass the mask provided by the core io_uring code.
+ */
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
+
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
+/*
+ * Note: the caller should never hard code @issue_flags and only use the
+ * mask provided by the core io_uring code.
+ */
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
@@ -56,6 +69,21 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
}
#endif
+/*
+ * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
+ * and the corresponding io_uring request. Similar to io_uring_cmd_done() but
+ * doesn't need issue_flags.
+ *
+ * Note, must not be used with cancellable requests.
+ */
+static inline void io_uring_cmd_complete(struct io_uring_cmd *ioucmd,
+ ssize_t ret, ssize_t res2)
+{
+ if (WARN_ON_ONCE(ioucmd->flags & IORING_URING_CMD_CANCELABLE))
+ return;
+ io_uring_cmd_done(ioucmd, ret, res2, IO_URING_F_UNLOCKED);
+}
+
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v3 04/13] io_uring/cmd: introduce io_uring_cmd_complete
2024-03-18 22:00 ` [PATCH v3 04/13] io_uring/cmd: introduce io_uring_cmd_complete Pavel Begunkov
@ 2024-03-19 1:39 ` Ming Lei
0 siblings, 0 replies; 21+ messages in thread
From: Ming Lei @ 2024-03-19 1:39 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: io-uring, linux-block, Jens Axboe, Kanchan Joshi
On Mon, Mar 18, 2024 at 10:00:26PM +0000, Pavel Begunkov wrote:
> io_uring_cmd_complete() does exactly what io_uring_cmd_done() does, that
> is completing the request, but doesn't ask for issue_flags argument. We
> have a couple of users hardcoding some random issue_flags values in
> drivers, which they absolutely should not do. This function will be used
> to get rid of them. Also, add comments warning users that they're only
> allowed to pass issue_flags that were given from io_uring.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
Reviewed-by: Ming Lei <[email protected]>
Thanks
Ming
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v3 05/13] nvme/io_uring: don't hard code IO_URING_F_UNLOCKED
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (3 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 04/13] io_uring/cmd: introduce io_uring_cmd_complete Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:40 ` Ming Lei
2024-03-18 22:00 ` [PATCH v3 06/13] io_uring/rw: avoid punting to io-wq directly Pavel Begunkov
` (9 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
uring_cmd implementations should not try to guess issue_flags, use a
freshly added helper io_uring_cmd_complete() instead.
Reviewed-by: Kanchan Joshi <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
drivers/nvme/host/ioctl.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 3dfd5ae99ae0..1a7b5af42dbc 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -426,10 +426,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
- if (blk_rq_is_poll(req))
- nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
- else
+ if (blk_rq_is_poll(req)) {
+ if (pdu->bio)
+ blk_rq_unmap_user(pdu->bio);
+ io_uring_cmd_complete(ioucmd, pdu->status, pdu->result);
+ } else {
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+ }
return RQ_END_IO_FREE;
}
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 06/13] io_uring/rw: avoid punting to io-wq directly
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (4 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 05/13] nvme/io_uring: don't hard code IO_URING_F_UNLOCKED Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 07/13] io_uring: force tw ctx locking Pavel Begunkov
` (8 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
kiocb_done() should care to specifically redirecting requests to io-wq.
Remove the hopping to tw to then queue an io-wq, return -EAGAIN and let
the core code io_uring handle offloading.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 8 ++++----
io_uring/io_uring.h | 1 -
io_uring/rw.c | 8 +-------
3 files changed, 5 insertions(+), 12 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6ca7f2a9c296..feff8f530c22 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -492,7 +492,7 @@ static void io_prep_async_link(struct io_kiocb *req)
}
}
-void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use)
+static void io_queue_iowq(struct io_kiocb *req)
{
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
@@ -1499,7 +1499,7 @@ void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
if (unlikely(req->task->flags & PF_EXITING))
io_req_defer_failed(req, -EFAULT);
else if (req->flags & REQ_F_FORCE_ASYNC)
- io_queue_iowq(req, ts);
+ io_queue_iowq(req);
else
io_queue_sqe(req);
}
@@ -2082,7 +2082,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
break;
case IO_APOLL_ABORTED:
io_kbuf_recycle(req, 0);
- io_queue_iowq(req, NULL);
+ io_queue_iowq(req);
break;
case IO_APOLL_OK:
break;
@@ -2129,7 +2129,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
if (unlikely(req->ctx->drain_active))
io_drain_req(req);
else
- io_queue_iowq(req, NULL);
+ io_queue_iowq(req);
}
}
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 935d8d0747dc..0861d49e83de 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -79,7 +79,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
bool io_alloc_async_data(struct io_kiocb *req);
void io_req_task_queue(struct io_kiocb *req);
-void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
void io_req_task_queue_fail(struct io_kiocb *req, int ret);
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0585ebcc9773..576934dbf833 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -187,12 +187,6 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
return NULL;
}
-static void io_req_task_queue_reissue(struct io_kiocb *req)
-{
- req->io_task_work.func = io_queue_iowq;
- io_req_task_work_add(req);
-}
-
#ifdef CONFIG_BLOCK
static bool io_resubmit_prep(struct io_kiocb *req)
{
@@ -405,7 +399,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
if (req->flags & REQ_F_REISSUE) {
req->flags &= ~REQ_F_REISSUE;
if (io_resubmit_prep(req))
- io_req_task_queue_reissue(req);
+ return -EAGAIN;
else
io_req_task_queue_fail(req, final_ret);
}
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 07/13] io_uring: force tw ctx locking
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (5 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 06/13] io_uring/rw: avoid punting to io-wq directly Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 08/13] io_uring: remove struct io_tw_state::locked Pavel Begunkov
` (7 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
We can run normal task_work without locking the ctx, however we try to
lock anyway and most handlers prefer or require it locked. It might have
been interesting to multi-submitter ring with high contention completing
async read/write requests via task_work, however that will still need to
go through io_req_complete_post() and potentially take the lock for
rsrc node putting or some other case.
In other words, it's hard to care about it, so alawys force the locking.
The case described would also because of various io_uring caches.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index feff8f530c22..66669cc9a675 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1185,8 +1185,9 @@ struct llist_node *io_handle_tw_list(struct llist_node *node,
if (req->ctx != ctx) {
ctx_flush_and_put(ctx, &ts);
ctx = req->ctx;
- /* if not contended, grab and improve batching */
- ts.locked = mutex_trylock(&ctx->uring_lock);
+
+ ts.locked = true;
+ mutex_lock(&ctx->uring_lock);
percpu_ref_get(&ctx->refs);
}
INDIRECT_CALL_2(req->io_task_work.func,
@@ -1447,11 +1448,9 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
if (io_run_local_work_continue(ctx, ret, min_events))
goto again;
- if (ts->locked) {
- io_submit_flush_completions(ctx);
- if (io_run_local_work_continue(ctx, ret, min_events))
- goto again;
- }
+ io_submit_flush_completions(ctx);
+ if (io_run_local_work_continue(ctx, ret, min_events))
+ goto again;
trace_io_uring_local_work_run(ctx, ret, loops);
return ret;
@@ -1475,14 +1474,12 @@ static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
static int io_run_local_work(struct io_ring_ctx *ctx, int min_events)
{
- struct io_tw_state ts = {};
+ struct io_tw_state ts = { .locked = true };
int ret;
- ts.locked = mutex_trylock(&ctx->uring_lock);
+ mutex_lock(&ctx->uring_lock);
ret = __io_run_local_work(ctx, &ts, min_events);
- if (ts.locked)
- mutex_unlock(&ctx->uring_lock);
-
+ mutex_unlock(&ctx->uring_lock);
return ret;
}
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 08/13] io_uring: remove struct io_tw_state::locked
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (6 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 07/13] io_uring: force tw ctx locking Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 09/13] io_uring: refactor io_fill_cqe_req_aux Pavel Begunkov
` (6 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
ctx is always locked for task_work now, so get rid of struct
io_tw_state::locked. Note I'm stopping one step before removing
io_tw_state altogether, which is not empty, because it still serves the
purpose of indicating which function is a tw callback and forcing users
not to invoke them carelessly out of a wrong context. The removal can
always be done later.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 2 --
io_uring/io_uring.c | 31 ++++++++-----------------------
io_uring/io_uring.h | 5 +----
io_uring/poll.c | 2 +-
io_uring/rw.c | 6 ++----
io_uring/timeout.c | 8 ++------
io_uring/uring_cmd.c | 8 ++------
io_uring/waitid.c | 2 +-
8 files changed, 17 insertions(+), 47 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e24893625085..5a2afbc93887 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -439,8 +439,6 @@ struct io_ring_ctx {
};
struct io_tw_state {
- /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */
- bool locked;
};
enum {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 66669cc9a675..9e8afc006fc9 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -246,14 +246,12 @@ static __cold void io_fallback_req_func(struct work_struct *work)
fallback_work.work);
struct llist_node *node = llist_del_all(&ctx->fallback_llist);
struct io_kiocb *req, *tmp;
- struct io_tw_state ts = { .locked = true, };
+ struct io_tw_state ts = {};
percpu_ref_get(&ctx->refs);
mutex_lock(&ctx->uring_lock);
llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
req->io_task_work.func(req, &ts);
- if (WARN_ON_ONCE(!ts.locked))
- return;
io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
percpu_ref_put(&ctx->refs);
@@ -1157,11 +1155,9 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
return;
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
- if (ts->locked) {
- io_submit_flush_completions(ctx);
- mutex_unlock(&ctx->uring_lock);
- ts->locked = false;
- }
+
+ io_submit_flush_completions(ctx);
+ mutex_unlock(&ctx->uring_lock);
percpu_ref_put(&ctx->refs);
}
@@ -1185,8 +1181,6 @@ struct llist_node *io_handle_tw_list(struct llist_node *node,
if (req->ctx != ctx) {
ctx_flush_and_put(ctx, &ts);
ctx = req->ctx;
-
- ts.locked = true;
mutex_lock(&ctx->uring_lock);
percpu_ref_get(&ctx->refs);
}
@@ -1459,22 +1453,16 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
int min_events)
{
- struct io_tw_state ts = { .locked = true, };
- int ret;
+ struct io_tw_state ts = {};
if (llist_empty(&ctx->work_llist))
return 0;
-
- ret = __io_run_local_work(ctx, &ts, min_events);
- /* shouldn't happen! */
- if (WARN_ON_ONCE(!ts.locked))
- mutex_lock(&ctx->uring_lock);
- return ret;
+ return __io_run_local_work(ctx, &ts, min_events);
}
static int io_run_local_work(struct io_ring_ctx *ctx, int min_events)
{
- struct io_tw_state ts = { .locked = true };
+ struct io_tw_state ts = {};
int ret;
mutex_lock(&ctx->uring_lock);
@@ -1702,10 +1690,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
- if (ts->locked)
- io_req_complete_defer(req);
- else
- io_req_complete_post(req, IO_URING_F_UNLOCKED);
+ io_req_complete_defer(req);
}
/*
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 0861d49e83de..7f921daae9c3 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -351,10 +351,7 @@ static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
static inline void io_tw_lock(struct io_ring_ctx *ctx, struct io_tw_state *ts)
{
- if (!ts->locked) {
- mutex_lock(&ctx->uring_lock);
- ts->locked = true;
- }
+ lockdep_assert_held(&ctx->uring_lock);
}
/*
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 6db1dcb2c797..8901dd118e50 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -322,7 +322,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
- if (!io_fill_cqe_req_aux(req, ts->locked, mask,
+ if (!io_fill_cqe_req_aux(req, true, mask,
IORING_CQE_F_MORE)) {
io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 576934dbf833..c7f9246ff508 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -305,11 +305,9 @@ void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
io_req_io_end(req);
- if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
- unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
+ if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+ req->cqe.flags |= io_put_kbuf(req, 0);
- req->cqe.flags |= io_put_kbuf(req, issue_flags);
- }
io_req_task_complete(req, ts);
}
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 7fd7dbb211d6..0a48e6acd0b2 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -72,10 +72,7 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
struct io_ring_ctx *ctx = req->ctx;
if (!io_timeout_finish(timeout, data)) {
- bool filled;
- filled = io_fill_cqe_req_aux(req, ts->locked, -ETIME,
- IORING_CQE_F_MORE);
- if (filled) {
+ if (io_fill_cqe_req_aux(req, true, -ETIME, IORING_CQE_F_MORE)) {
/* re-arm timer */
spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
@@ -301,7 +298,6 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *ts)
{
- unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_kiocb *prev = timeout->prev;
int ret = -ENOENT;
@@ -313,7 +309,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *t
.data = prev->cqe.user_data,
};
- ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
+ ret = io_try_cancel(req->task->io_uring, &cd, 0);
}
io_req_set_res(req, ret ?: -ETIME, 0);
io_req_task_complete(req, ts);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 759f919b14a9..4614ce734fee 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -87,13 +87,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- unsigned issue_flags = IO_URING_F_UNLOCKED;
- /* locked task_work executor checks the deffered list completion */
- if (ts->locked)
- issue_flags = IO_URING_F_COMPLETE_DEFER;
-
- ioucmd->task_work_cb(ioucmd, issue_flags);
+ /* task_work executor checks the deffered list completion */
+ ioucmd->task_work_cb(ioucmd, IO_URING_F_COMPLETE_DEFER);
}
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 77d340666cb9..6362ec20abc0 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -118,7 +118,7 @@ static int io_waitid_finish(struct io_kiocb *req, int ret)
static void io_waitid_complete(struct io_kiocb *req, int ret)
{
struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
- struct io_tw_state ts = { .locked = true };
+ struct io_tw_state ts = {};
/* anyone completing better be holding a reference */
WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK));
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 09/13] io_uring: refactor io_fill_cqe_req_aux
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (7 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 08/13] io_uring: remove struct io_tw_state::locked Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 10/13] io_uring: get rid of intermediate aux cqe caches Pavel Begunkov
` (5 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
The restriction on multishot execution context disallowing io-wq is
driven by rules of io_fill_cqe_req_aux(), it should only be called in
the master task context, either from the syscall path or in task_work.
Since task_work now always takes the ctx lock implying
IO_URING_F_COMPLETE_DEFER, we can just assume that the function is
always called with its defer argument set to true.
Kill the argument. Also rename the function for more consistency as
"fill" in CQE related functions was usually meant for raw interfaces
only copying data into the CQ without any locking, waking the user
and other accounting "post" functions take care of.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 16 +++-------------
io_uring/io_uring.h | 2 +-
io_uring/net.c | 6 ++----
io_uring/poll.c | 3 +--
io_uring/rw.c | 4 +---
io_uring/timeout.c | 2 +-
6 files changed, 9 insertions(+), 24 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9e8afc006fc9..9a4cc46582b2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -907,40 +907,30 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
state->cqes_count = 0;
}
-static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
- bool allow_overflow)
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
- if (!filled && allow_overflow)
+ if (!filled)
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
io_cq_unlock_post(ctx);
return filled;
}
-bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
-{
- return __io_post_aux_cqe(ctx, user_data, res, cflags, true);
-}
-
/*
* A helper for multishot requests posting additional CQEs.
* Should only be used from a task_work including IO_URING_F_MULTISHOT.
*/
-bool io_fill_cqe_req_aux(struct io_kiocb *req, bool defer, s32 res, u32 cflags)
+bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
{
struct io_ring_ctx *ctx = req->ctx;
u64 user_data = req->cqe.user_data;
struct io_uring_cqe *cqe;
lockdep_assert(!io_wq_current_is_worker());
-
- if (!defer)
- return __io_post_aux_cqe(ctx, user_data, res, cflags, false);
-
lockdep_assert_held(&ctx->uring_lock);
if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->completion_cqes)) {
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 7f921daae9c3..460290e1bdec 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -67,7 +67,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
-bool io_fill_cqe_req_aux(struct io_kiocb *req, bool defer, s32 res, u32 cflags);
+bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
diff --git a/io_uring/net.c b/io_uring/net.c
index 1e7665ff6ef7..ed798e185bbf 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -706,8 +706,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* receive from this socket.
*/
if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
- io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- *ret, cflags | IORING_CQE_F_MORE)) {
+ io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
@@ -1428,8 +1427,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
- if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
- ret, IORING_CQE_F_MORE))
+ if (io_req_post_cqe(req, ret, IORING_CQE_F_MORE))
goto retry;
io_req_set_res(req, ret, 0);
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 8901dd118e50..5d55bbf1de15 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -322,8 +322,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
- if (!io_fill_cqe_req_aux(req, true, mask,
- IORING_CQE_F_MORE)) {
+ if (!io_req_post_cqe(req, mask, IORING_CQE_F_MORE)) {
io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES;
}
diff --git a/io_uring/rw.c b/io_uring/rw.c
index c7f9246ff508..35216e8adc29 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -955,9 +955,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
cflags = io_put_kbuf(req, issue_flags);
rw->len = 0; /* similarly to above, reset len to 0 */
- if (io_fill_cqe_req_aux(req,
- issue_flags & IO_URING_F_COMPLETE_DEFER,
- ret, cflags | IORING_CQE_F_MORE)) {
+ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
if (issue_flags & IO_URING_F_MULTISHOT) {
/*
* Force retry, as we might have more data to
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 0a48e6acd0b2..3458ca550b83 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -72,7 +72,7 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
struct io_ring_ctx *ctx = req->ctx;
if (!io_timeout_finish(timeout, data)) {
- if (io_fill_cqe_req_aux(req, true, -ETIME, IORING_CQE_F_MORE)) {
+ if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
/* re-arm timer */
spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 10/13] io_uring: get rid of intermediate aux cqe caches
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (8 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 09/13] io_uring: refactor io_fill_cqe_req_aux Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 11/13] io_uring: remove current check from complete_post Pavel Begunkov
` (4 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
io_post_aux_cqe(), which is used for multishot requests, delays
completions by putting CQEs into a temporary array for the purpose
completion lock/flush batching.
DEFER_TASKRUN doesn't need any locking, so for it we can put completions
directly into the CQ and defer post completion handling with a flag.
That leaves !DEFER_TASKRUN, which is not that interesting / hot for
multishot requests, so have conditional locking with deferred flush
for them.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 3 +-
io_uring/io_uring.c | 62 +++++++---------------------------
io_uring/io_uring.h | 2 +-
3 files changed, 15 insertions(+), 52 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5a2afbc93887..ea7e5488b3be 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -205,6 +205,7 @@ struct io_submit_state {
bool plug_started;
bool need_plug;
+ bool cq_flush;
unsigned short submit_nr;
unsigned int cqes_count;
struct blk_plug plug;
@@ -342,8 +343,6 @@ struct io_ring_ctx {
unsigned cq_last_tm_flush;
} ____cacheline_aligned_in_smp;
- struct io_uring_cqe completion_cqes[16];
-
spinlock_t completion_lock;
/* IRQ completion list, under ->completion_lock */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a4cc46582b2..f5e2b5bef10f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -630,6 +630,12 @@ static inline void __io_cq_lock(struct io_ring_ctx *ctx)
spin_lock(&ctx->completion_lock);
}
+static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
+{
+ if (!ctx->lockless_cq)
+ spin_unlock(&ctx->completion_lock);
+}
+
static inline void io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
@@ -882,31 +888,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
return false;
}
-static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
- __must_hold(&ctx->uring_lock)
-{
- struct io_submit_state *state = &ctx->submit_state;
- unsigned int i;
-
- lockdep_assert_held(&ctx->uring_lock);
- for (i = 0; i < state->cqes_count; i++) {
- struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
-
- if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
- if (ctx->lockless_cq) {
- spin_lock(&ctx->completion_lock);
- io_cqring_event_overflow(ctx, cqe->user_data,
- cqe->res, cqe->flags, 0, 0);
- spin_unlock(&ctx->completion_lock);
- } else {
- io_cqring_event_overflow(ctx, cqe->user_data,
- cqe->res, cqe->flags, 0, 0);
- }
- }
- }
- state->cqes_count = 0;
-}
-
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
bool filled;
@@ -927,31 +908,16 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
{
struct io_ring_ctx *ctx = req->ctx;
- u64 user_data = req->cqe.user_data;
- struct io_uring_cqe *cqe;
+ bool posted;
lockdep_assert(!io_wq_current_is_worker());
lockdep_assert_held(&ctx->uring_lock);
- if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->completion_cqes)) {
- __io_cq_lock(ctx);
- __io_flush_post_cqes(ctx);
- /* no need to flush - flush is deferred */
- __io_cq_unlock_post(ctx);
- }
-
- /* For defered completions this is not as strict as it is otherwise,
- * however it's main job is to prevent unbounded posted completions,
- * and in that it works just as well.
- */
- if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
- return false;
-
- cqe = &ctx->completion_cqes[ctx->submit_state.cqes_count++];
- cqe->user_data = user_data;
- cqe->res = res;
- cqe->flags = cflags;
- return true;
+ __io_cq_lock(ctx);
+ posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+ ctx->submit_state.cq_flush = true;
+ __io_cq_unlock_post(ctx);
+ return posted;
}
static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
@@ -1545,9 +1511,6 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_wq_work_node *node;
__io_cq_lock(ctx);
- /* must come first to preserve CQE ordering in failure cases */
- if (state->cqes_count)
- __io_flush_post_cqes(ctx);
__wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
@@ -1569,6 +1532,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
}
+ ctx->submit_state.cq_flush = false;
}
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 460290e1bdec..5119265a11c2 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -156,7 +156,7 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
{
if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
- ctx->submit_state.cqes_count)
+ ctx->submit_state.cq_flush)
__io_submit_flush_completions(ctx);
}
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 11/13] io_uring: remove current check from complete_post
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (9 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 10/13] io_uring: get rid of intermediate aux cqe caches Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 12/13] io_uring: refactor io_req_complete_post() Pavel Begunkov
` (3 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
task_work execution is now always locked, and we shouldn't get into
io_req_complete_post() from them. That means that complete_post() is
always called out of the original task context and we don't even need to
check current.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f5e2b5bef10f..077c65757281 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -972,7 +972,7 @@ void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
- if (ctx->task_complete && ctx->submitter_task != current) {
+ if (ctx->task_complete) {
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 12/13] io_uring: refactor io_req_complete_post()
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (10 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 11/13] io_uring: remove current check from complete_post Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-18 22:00 ` [PATCH v3 13/13] io_uring: clean up io_lockdep_assert_cq_locked Pavel Begunkov
` (2 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
Make io_req_complete_post() to push all IORING_SETUP_IOPOLL requests
to task_work, it's much cleaner and should normally happen. We couldn't
do it before because there was a possibility of looping in
complete_post() -> tw -> complete_post() -> ...
Also, unexport the function and inline __io_req_complete_post().
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 29 +++++++++++------------------
io_uring/io_uring.h | 1 -
2 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 077c65757281..b07ab65591bf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -920,11 +920,21 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
return posted;
}
-static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
+static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_rsrc_node *rsrc_node = NULL;
+ /*
+ * Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
+ * the submitter task context, IOPOLL protects with uring_lock.
+ */
+ if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) {
+ req->io_task_work.func = io_req_task_complete;
+ io_req_task_work_add(req);
+ return;
+ }
+
io_cq_lock(ctx);
if (!(req->flags & REQ_F_CQE_SKIP)) {
if (!io_fill_cqe_req(ctx, req))
@@ -968,23 +978,6 @@ static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
}
}
-void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (ctx->task_complete) {
- req->io_task_work.func = io_req_task_complete;
- io_req_task_work_add(req);
- } else if (!(issue_flags & IO_URING_F_UNLOCKED) ||
- !(ctx->flags & IORING_SETUP_IOPOLL)) {
- __io_req_complete_post(req, issue_flags);
- } else {
- mutex_lock(&ctx->uring_lock);
- __io_req_complete_post(req, issue_flags & ~IO_URING_F_UNLOCKED);
- mutex_unlock(&ctx->uring_lock);
- }
-}
-
void io_req_defer_failed(struct io_kiocb *req, s32 res)
__must_hold(&ctx->uring_lock)
{
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 5119265a11c2..f694e7e6fb25 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -65,7 +65,6 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
void io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
-void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3 13/13] io_uring: clean up io_lockdep_assert_cq_locked
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (11 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 12/13] io_uring: refactor io_req_complete_post() Pavel Begunkov
@ 2024-03-18 22:00 ` Pavel Begunkov
2024-03-19 1:42 ` [PATCH v3 00/13] Remove aux CQE caches Ming Lei
2024-03-19 2:19 ` Jens Axboe
14 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2024-03-18 22:00 UTC (permalink / raw)
To: io-uring; +Cc: linux-block, Jens Axboe, asml.silence, Kanchan Joshi, Ming Lei
Move CONFIG_PROVE_LOCKING checks inside of io_lockdep_assert_cq_locked()
and kill the else branch.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.h | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index f694e7e6fb25..bae8c1e937c1 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -119,9 +119,9 @@ enum {
void io_eventfd_ops(struct rcu_head *rcu);
void io_activate_pollwq(struct io_ring_ctx *ctx);
-#if defined(CONFIG_PROVE_LOCKING)
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
{
+#if defined(CONFIG_PROVE_LOCKING)
lockdep_assert(in_task());
if (ctx->flags & IORING_SETUP_IOPOLL) {
@@ -140,12 +140,8 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
else
lockdep_assert(current == ctx->submitter_task);
}
-}
-#else
-static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
-{
-}
#endif
+}
static inline void io_req_task_work_add(struct io_kiocb *req)
{
--
2.44.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v3 00/13] Remove aux CQE caches
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (12 preceding siblings ...)
2024-03-18 22:00 ` [PATCH v3 13/13] io_uring: clean up io_lockdep_assert_cq_locked Pavel Begunkov
@ 2024-03-19 1:42 ` Ming Lei
2024-03-19 2:19 ` Jens Axboe
14 siblings, 0 replies; 21+ messages in thread
From: Ming Lei @ 2024-03-19 1:42 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: io-uring, linux-block, Jens Axboe, Kanchan Joshi
On Mon, Mar 18, 2024 at 10:00:22PM +0000, Pavel Begunkov wrote:
> Mandate ctx locking for task_work. Then, remove aux CQE caches mostly
> used by multishot requests and post them directly into the CQ.
>
> It's leaving some of the pre existing issue_flags and state conversion
> non conformant chunks, which will need to clean up later.
>
> v3: pass IO_URING_F_COMPLETE_DEFER to the cmd cancellation path
> drop patch moving request cancellation list removal to tw
> drop all other ublk changes
For the series, pass ublksrv test.
Tested-by: Ming Lei <[email protected]>
Thanks,
Ming
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v3 00/13] Remove aux CQE caches
2024-03-18 22:00 [PATCH v3 00/13] Remove aux CQE caches Pavel Begunkov
` (13 preceding siblings ...)
2024-03-19 1:42 ` [PATCH v3 00/13] Remove aux CQE caches Ming Lei
@ 2024-03-19 2:19 ` Jens Axboe
14 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2024-03-19 2:19 UTC (permalink / raw)
To: io-uring, Pavel Begunkov; +Cc: linux-block, Kanchan Joshi, Ming Lei
On Mon, 18 Mar 2024 22:00:22 +0000, Pavel Begunkov wrote:
> Mandate ctx locking for task_work. Then, remove aux CQE caches mostly
> used by multishot requests and post them directly into the CQ.
>
> It's leaving some of the pre existing issue_flags and state conversion
> non conformant chunks, which will need to clean up later.
>
> v3: pass IO_URING_F_COMPLETE_DEFER to the cmd cancellation path
> drop patch moving request cancellation list removal to tw
> drop all other ublk changes
>
> [...]
Applied, thanks!
[01/13] io_uring/cmd: move io_uring_try_cancel_uring_cmd()
commit: c877857e86396576260d12eaae2f777fa4fd835f
[02/13] io_uring/cmd: kill one issue_flags to tw conversion
commit: c6740905f9862b2780cc9a9a3e1714ea153d6c74
[03/13] io_uring/cmd: fix tw <-> issue_flags conversion
commit: 17c8e3f66f16256d33a10555d0a63d64405ab046
[04/13] io_uring/cmd: introduce io_uring_cmd_complete
commit: 23b0bed538ac9b73518c670925ebb64f0239b54f
[05/13] nvme/io_uring: don't hard code IO_URING_F_UNLOCKED
commit: 1a587b0d65d09d61bd8f0db728923ab68d8fb9c2
[06/13] io_uring/rw: avoid punting to io-wq directly
commit: 7f3b8125c3aee86b5dea06d9a9738b16aa55cbdd
[07/13] io_uring: force tw ctx locking
commit: 2a475207f98db597043251be32bb8f16d3617af9
[08/13] io_uring: remove struct io_tw_state::locked
commit: ccb464aeb6e563d1df179aacbb7c514369ceb8f0
[09/13] io_uring: refactor io_fill_cqe_req_aux
commit: 39c25ce47d211f4decc47f09f9561b8630aab84e
[10/13] io_uring: get rid of intermediate aux cqe caches
commit: 1a7520889e02de50c8334e215a3f187ff9a92456
[11/13] io_uring: remove current check from complete_post
commit: 7af89eaee7d17c2f15e483c859d4fcc09dda6dce
[12/13] io_uring: refactor io_req_complete_post()
commit: 838070b49a0b1466156661b85f6c97dc4033902b
[13/13] io_uring: clean up io_lockdep_assert_cq_locked
commit: 3cba2895da4a77b5f555f29821487db42f084324
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread