* [PATCH for-next v2 01/13] io_uring: merge io_req_tw_post and io_req_task_complete
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 02/13] io_uring: __io_req_complete should defer if available Dylan Yudaken
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Merge these functions that have the same logic
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2260fb7aa7f2..e40d7b3404eb 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1223,15 +1223,18 @@ int io_run_local_work(struct io_ring_ctx *ctx)
return ret;
}
-static void io_req_tw_post(struct io_kiocb *req, bool *locked)
+void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
- io_req_complete_post(req);
+ if (*locked)
+ io_req_complete_defer(req);
+ else
+ io_req_complete_post(req);
}
void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags)
{
io_req_set_res(req, res, cflags);
- req->io_task_work.func = io_req_tw_post;
+ req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
}
@@ -1460,14 +1463,6 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
return ret;
}
-void io_req_task_complete(struct io_kiocb *req, bool *locked)
-{
- if (*locked)
- io_req_complete_defer(req);
- else
- io_req_complete_post(req);
-}
-
/*
* After the iocb has been issued, it's safe to be found on the poll list.
* Adding the kiocb to the list AFTER submission ensures that we don't
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 02/13] io_uring: __io_req_complete should defer if available
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 01/13] io_uring: merge io_req_tw_post and io_req_task_complete Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 03/13] io_uring: split io_req_complete_failed into post/defer Dylan Yudaken
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
For consistency always defer completion if specified in the issue flags.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e40d7b3404eb..0741a728fb6a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -858,7 +858,10 @@ void io_req_complete_post(struct io_kiocb *req)
inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags)
{
- io_req_complete_post(req);
+ if (issue_flags & IO_URING_F_COMPLETE_DEFER)
+ io_req_complete_defer(req);
+ else
+ io_req_complete_post(req);
}
void io_req_complete_failed(struct io_kiocb *req, s32 res)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 03/13] io_uring: split io_req_complete_failed into post/defer
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 01/13] io_uring: merge io_req_tw_post and io_req_task_complete Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 02/13] io_uring: __io_req_complete should defer if available Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 04/13] io_uring: lock on remove in io_apoll_task_func Dylan Yudaken
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Different use cases might want to defer failure completion if available,
or post the completion immediately if the lock is not definitely taken.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 28 ++++++++++++++++++++--------
io_uring/io_uring.h | 2 +-
io_uring/poll.c | 2 +-
3 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0741a728fb6a..1e23adb7b0c5 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -864,7 +864,7 @@ inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags)
io_req_complete_post(req);
}
-void io_req_complete_failed(struct io_kiocb *req, s32 res)
+static inline void io_req_prep_failed(struct io_kiocb *req, s32 res)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -872,6 +872,18 @@ void io_req_complete_failed(struct io_kiocb *req, s32 res)
io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
if (def->fail)
def->fail(req);
+}
+
+static void io_req_defer_failed(struct io_kiocb *req, s32 res)
+ __must_hold(&ctx->uring_lock)
+{
+ io_req_prep_failed(req, res);
+ io_req_complete_defer(req);
+}
+
+void io_req_post_failed(struct io_kiocb *req, s32 res)
+{
+ io_req_prep_failed(req, res);
io_req_complete_post(req);
}
@@ -1245,7 +1257,7 @@ static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
{
/* not needed for normal modes, but SQPOLL depends on it */
io_tw_lock(req->ctx, locked);
- io_req_complete_failed(req, req->cqe.res);
+ io_req_defer_failed(req, req->cqe.res);
}
void io_req_task_submit(struct io_kiocb *req, bool *locked)
@@ -1255,7 +1267,7 @@ void io_req_task_submit(struct io_kiocb *req, bool *locked)
if (likely(!(req->task->flags & PF_EXITING)))
io_queue_sqe(req);
else
- io_req_complete_failed(req, -EFAULT);
+ io_req_defer_failed(req, -EFAULT);
}
void io_req_task_queue_fail(struct io_kiocb *req, int ret)
@@ -1633,7 +1645,7 @@ static __cold void io_drain_req(struct io_kiocb *req)
ret = io_req_prep_async(req);
if (ret) {
fail:
- io_req_complete_failed(req, ret);
+ io_req_defer_failed(req, ret);
return;
}
io_prep_async_link(req);
@@ -1863,7 +1875,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
struct io_kiocb *linked_timeout;
if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
- io_req_complete_failed(req, ret);
+ io_req_defer_failed(req, ret);
return;
}
@@ -1913,14 +1925,14 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
*/
req->flags &= ~REQ_F_HARDLINK;
req->flags |= REQ_F_LINK;
- io_req_complete_failed(req, req->cqe.res);
+ io_req_defer_failed(req, req->cqe.res);
} else if (unlikely(req->ctx->drain_active)) {
io_drain_req(req);
} else {
int ret = io_req_prep_async(req);
if (unlikely(ret))
- io_req_complete_failed(req, ret);
+ io_req_defer_failed(req, ret);
else
io_queue_iowq(req, NULL);
}
@@ -2847,7 +2859,7 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
while (!list_empty(&list)) {
de = list_first_entry(&list, struct io_defer_entry, list);
list_del_init(&de->list);
- io_req_complete_failed(de->req, -ECANCELED);
+ io_req_post_failed(de->req, -ECANCELED);
kfree(de);
}
return true;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 83013ee584d6..4d2d0926a42b 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -30,7 +30,7 @@ bool io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
-void io_req_complete_failed(struct io_kiocb *req, s32 res);
+void io_req_post_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
void io_req_complete_post(struct io_kiocb *req);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
diff --git a/io_uring/poll.c b/io_uring/poll.c
index cd4d98d622d2..ceb8255b54eb 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -316,7 +316,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
else if (ret == IOU_POLL_DONE)
io_req_task_submit(req, locked);
else
- io_req_complete_failed(req, ret);
+ io_req_post_failed(req, ret);
}
static void __io_poll_execute(struct io_kiocb *req, int mask)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 04/13] io_uring: lock on remove in io_apoll_task_func
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (2 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 03/13] io_uring: split io_req_complete_failed into post/defer Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 05/13] io_uring: timeout should use io_req_task_complete Dylan Yudaken
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
This allows using io_req_defer_failed rather than post in all cases. The
alternative would be to branch based on *locked and decide whether to post
or defer the completion.
However all of the non-error paths in io_poll_check_events that do not do
not return IOU_POLL_NO_ACTION end up locking anyway, and locking here does
reduce the logic complexity, so it seems reasonable to lock always and
then also defer the completion on failure always.
This also means that only io_req_defer_failed needs exporting from
io_uring.h
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 4 ++--
io_uring/io_uring.h | 2 +-
io_uring/poll.c | 5 +++--
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 1e23adb7b0c5..5a620001df2e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -874,14 +874,14 @@ static inline void io_req_prep_failed(struct io_kiocb *req, s32 res)
def->fail(req);
}
-static void io_req_defer_failed(struct io_kiocb *req, s32 res)
+void io_req_defer_failed(struct io_kiocb *req, s32 res)
__must_hold(&ctx->uring_lock)
{
io_req_prep_failed(req, res);
io_req_complete_defer(req);
}
-void io_req_post_failed(struct io_kiocb *req, s32 res)
+static void io_req_post_failed(struct io_kiocb *req, s32 res)
{
io_req_prep_failed(req, res);
io_req_complete_post(req);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 4d2d0926a42b..ffab0d2d33c0 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -30,7 +30,7 @@ bool io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
-void io_req_post_failed(struct io_kiocb *req, s32 res);
+void io_req_defer_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
void io_req_complete_post(struct io_kiocb *req);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
diff --git a/io_uring/poll.c b/io_uring/poll.c
index ceb8255b54eb..4bd43e6f5b72 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -308,15 +308,16 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
if (ret == IOU_POLL_NO_ACTION)
return;
+ io_tw_lock(req->ctx, locked);
io_poll_remove_entries(req);
io_poll_tw_hash_eject(req, locked);
if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
- io_req_complete_post(req);
+ io_req_task_complete(req, locked);
else if (ret == IOU_POLL_DONE)
io_req_task_submit(req, locked);
else
- io_req_post_failed(req, ret);
+ io_req_defer_failed(req, ret);
}
static void __io_poll_execute(struct io_kiocb *req, int mask)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 05/13] io_uring: timeout should use io_req_task_complete
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (3 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 04/13] io_uring: lock on remove in io_apoll_task_func Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 06/13] io_uring: simplify io_issue_sqe Dylan Yudaken
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Allow timeouts to defer completions if the ring is locked
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/timeout.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index e8a8c2099480..26b61e62aa9a 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -282,12 +282,11 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
}
io_req_set_res(req, ret ?: -ETIME, 0);
- io_req_complete_post(req);
io_put_req(prev);
} else {
io_req_set_res(req, -ETIME, 0);
- io_req_complete_post(req);
}
+ io_req_task_complete(req, locked);
}
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 06/13] io_uring: simplify io_issue_sqe
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (4 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 05/13] io_uring: timeout should use io_req_task_complete Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 07/13] io_uring: make io_req_complete_post static Dylan Yudaken
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
io_issue_sqe can reuse __io_req_complete for completion logic
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5a620001df2e..912f6fefc665 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1738,12 +1738,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (creds)
revert_creds(creds);
- if (ret == IOU_OK) {
- if (issue_flags & IO_URING_F_COMPLETE_DEFER)
- io_req_complete_defer(req);
- else
- io_req_complete_post(req);
- } else if (ret != IOU_ISSUE_SKIP_COMPLETE)
+ if (ret == IOU_OK)
+ __io_req_complete(req, issue_flags);
+ else if (ret != IOU_ISSUE_SKIP_COMPLETE)
return ret;
/* If the op doesn't have a file, we're not polling for it */
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 07/13] io_uring: make io_req_complete_post static
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (5 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 06/13] io_uring: simplify io_issue_sqe Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 08/13] io_uring: allow defer completion for aux posted cqes Dylan Yudaken
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
This is only called from two functions in io_uring.c so remove the header
export.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 2 +-
io_uring/io_uring.h | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 912f6fefc665..43db84fe001d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -845,7 +845,7 @@ static void __io_req_complete_put(struct io_kiocb *req)
}
}
-void io_req_complete_post(struct io_kiocb *req)
+static void io_req_complete_post(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index ffab0d2d33c0..3c3a93493239 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -32,7 +32,6 @@ int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
-void io_req_complete_post(struct io_kiocb *req);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 08/13] io_uring: allow defer completion for aux posted cqes
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (6 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 07/13] io_uring: make io_req_complete_post static Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 09/13] io_uring: add io_aux_cqe which allows deferred completion Dylan Yudaken
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Multishot ops cannot use the compl_reqs list as the request must stay in
the poll list, but that means they need to run each completion without
benefiting from batching.
Here introduce batching infrastructure for only small (ie 16 byte)
CQEs. This restriction is ok because there are no use cases posting 32
byte CQEs.
In the ring keep a batch of up to 16 posted results, and flush in the same
way as compl_reqs.
16 was chosen through experimentation on a microbenchmark ([1]), as well
as trying not to increase the size of the ring too much. This increases
the size to 1472 bytes from 1216.
[1]: https://github.com/DylanZA/liburing/commit/9ac66b36bcf4477bfafeff1c5f107896b7ae31cf
Run with $ make -j && ./benchmark/reg.b -s 1 -t 2000 -r 10
Gives results:
baseline 8309 k/s
8 18807 k/s
16 19338 k/s
32 20134 k/s
Suggested-by: Pavel Begunkov <[email protected]>
Signed-off-by: Dylan Yudaken <[email protected]>
---
include/linux/io_uring_types.h | 2 ++
io_uring/io_uring.c | 27 ++++++++++++++++++++++++---
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f5b687a787a3..accdfecee953 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -174,7 +174,9 @@ struct io_submit_state {
bool plug_started;
bool need_plug;
unsigned short submit_nr;
+ unsigned int cqes_count;
struct blk_plug plug;
+ struct io_uring_cqe cqes[16];
};
struct io_ev_fd {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 43db84fe001d..39f80d68d31c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -167,7 +167,8 @@ EXPORT_SYMBOL(io_uring_get_socket);
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
{
- if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+ if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
+ ctx->submit_state.cqes_count)
__io_submit_flush_completions(ctx);
}
@@ -802,6 +803,21 @@ bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
return false;
}
+static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
+{
+ struct io_submit_state *state = &ctx->submit_state;
+ unsigned int i;
+
+ lockdep_assert_held(&ctx->uring_lock);
+ for (i = 0; i < state->cqes_count; i++) {
+ struct io_uring_cqe *cqe = &state->cqes[i];
+
+ io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags, true);
+ }
+ state->cqes_count = 0;
+}
+
bool io_post_aux_cqe(struct io_ring_ctx *ctx,
u64 user_data, s32 res, u32 cflags,
bool allow_overflow)
@@ -1348,6 +1364,9 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_submit_state *state = &ctx->submit_state;
io_cq_lock(ctx);
+ /* post must come first to preserve CQE ordering */
+ if (state->cqes_count)
+ __io_flush_post_cqes(ctx);
wq_list_for_each(node, prev, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
@@ -1357,8 +1376,10 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
}
io_cq_unlock_post(ctx);
- io_free_batch_list(ctx, state->compl_reqs.first);
- INIT_WQ_LIST(&state->compl_reqs);
+ if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
+ io_free_batch_list(ctx, state->compl_reqs.first);
+ INIT_WQ_LIST(&state->compl_reqs);
+ }
}
/*
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 09/13] io_uring: add io_aux_cqe which allows deferred completion
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (7 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 08/13] io_uring: allow defer completion for aux posted cqes Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 10/13] io_uring: make io_fill_cqe_aux static Dylan Yudaken
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Use the just introduced deferred post cqe completion state when possible
in io_aux_cqe. If not possible fallback to io_post_aux_cqe.
This introduces a complication because of allow_overflow. For deferred
completions we cannot know without locking the completion_lock if it will
overflow (and even if we locked it, another post could sneak in and cause
this cqe to be in overflow).
However since overflow protection is mostly a best effort defence in depth
to prevent infinite loops of CQEs for poll, just checking the overflow bit
is going to be good enough and will result in at most 16 (array size of
deferred cqes) overflows.
Suggested-by: Pavel Begunkov <[email protected]>
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 34 ++++++++++++++++++++++++++++++++++
io_uring/io_uring.h | 2 ++
io_uring/net.c | 7 ++++---
io_uring/poll.c | 4 ++--
4 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 39f80d68d31c..37b195d85f32 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -830,6 +830,40 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx,
return filled;
}
+bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
+ bool allow_overflow)
+{
+ struct io_uring_cqe *cqe;
+ unsigned int length;
+
+ if (!defer)
+ return io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
+
+ length = ARRAY_SIZE(ctx->submit_state.cqes);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ if (ctx->submit_state.cqes_count == length) {
+ io_cq_lock(ctx);
+ __io_flush_post_cqes(ctx);
+ /* no need to flush - flush is deferred */
+ spin_unlock(&ctx->completion_lock);
+ }
+
+ /* For defered completions this is not as strict as it is otherwise,
+ * however it's main job is to prevent unbounded posted completions,
+ * and in that it works just as well.
+ */
+ if (!allow_overflow && test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
+ return false;
+
+ cqe = ctx->submit_state.cqes + ctx->submit_state.cqes_count++;
+ cqe->user_data = user_data;
+ cqe->res = res;
+ cqe->flags = cflags;
+ return true;
+}
+
static void __io_req_complete_put(struct io_kiocb *req)
{
/*
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 3c3a93493239..e075c4fb70c9 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -36,6 +36,8 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
bool allow_overflow);
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
+bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
+ bool allow_overflow);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
diff --git a/io_uring/net.c b/io_uring/net.c
index 0de6f78ad978..90342dcb6b1d 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -601,8 +601,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
}
if (!mshot_finished) {
- if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
- cflags | IORING_CQE_F_MORE, true)) {
+ if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
io_recv_prep_retry(req);
return false;
}
@@ -1320,7 +1320,8 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
- if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
+ if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
goto retry;
return -ECANCELED;
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 4bd43e6f5b72..922c1a366c41 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -252,8 +252,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
- if (!io_post_aux_cqe(ctx, req->cqe.user_data,
- mask, IORING_CQE_F_MORE, false)) {
+ if (!io_aux_cqe(ctx, *locked, req->cqe.user_data,
+ mask, IORING_CQE_F_MORE, false)) {
io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 10/13] io_uring: make io_fill_cqe_aux static
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (8 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 09/13] io_uring: add io_aux_cqe which allows deferred completion Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 11/13] io_uring: add lockdep assertion in io_fill_cqe_aux Dylan Yudaken
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
This is only used in io_uring.c
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 4 ++--
io_uring/io_uring.h | 2 --
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 37b195d85f32..42c7383ead91 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -770,8 +770,8 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow)
return &rings->cqes[off];
}
-bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
- bool allow_overflow)
+static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
+ bool allow_overflow)
{
struct io_uring_cqe *cqe;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index e075c4fb70c9..4519d91008de 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -34,8 +34,6 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
-bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
- bool allow_overflow);
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 11/13] io_uring: add lockdep assertion in io_fill_cqe_aux
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (9 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 10/13] io_uring: make io_fill_cqe_aux static Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 12/13] io_uring: remove overflow param from io_post_aux_cqe Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 13/13] io_uring: allow multishot polled reqs to defer completion Dylan Yudaken
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Add an assertion for the completion lock to io_fill_cqe_aux
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 42c7383ead91..6e1139a11fbf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -775,6 +775,8 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32
{
struct io_uring_cqe *cqe;
+ lockdep_assert_held(&ctx->completion_lock);
+
ctx->cq_extra++;
/*
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 12/13] io_uring: remove overflow param from io_post_aux_cqe
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (10 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 11/13] io_uring: add lockdep assertion in io_fill_cqe_aux Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
2022-11-23 11:06 ` [PATCH for-next v2 13/13] io_uring: allow multishot polled reqs to defer completion Dylan Yudaken
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
The only call sites which would not allow overflow are also call sites
which would use the io_aux_cqe as they care about ordering.
So remove this parameter from io_post_aux_cqe.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 12 ++++++++----
io_uring/io_uring.h | 3 +--
io_uring/msg_ring.c | 4 ++--
io_uring/rsrc.c | 4 ++--
4 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6e1139a11fbf..87ea497590b5 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -820,9 +820,8 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
state->cqes_count = 0;
}
-bool io_post_aux_cqe(struct io_ring_ctx *ctx,
- u64 user_data, s32 res, u32 cflags,
- bool allow_overflow)
+static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
+ bool allow_overflow)
{
bool filled;
@@ -832,6 +831,11 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx,
return filled;
}
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
+{
+ return __io_post_aux_cqe(ctx, user_data, res, cflags, true);
+}
+
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
bool allow_overflow)
{
@@ -839,7 +843,7 @@ bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32
unsigned int length;
if (!defer)
- return io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
+ return __io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
length = ARRAY_SIZE(ctx->submit_state.cqes);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 4519d91008de..d61dd9c5030a 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -32,8 +32,7 @@ int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
-bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
- bool allow_overflow);
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 90d2fc6fd80e..afb543aab9f6 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -31,7 +31,7 @@ static int io_msg_ring_data(struct io_kiocb *req)
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true))
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
return 0;
return -EOVERFLOW;
@@ -116,7 +116,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
* completes with -EOVERFLOW, then the sender must ensure that a
* later IORING_OP_MSG_RING delivers the message.
*/
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true))
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
io_double_unlock_ctx(ctx, target_ctx, issue_flags);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 187f1c83e779..133608200769 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -170,10 +170,10 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
if (prsrc->tag) {
if (ctx->flags & IORING_SETUP_IOPOLL) {
mutex_lock(&ctx->uring_lock);
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0, true);
+ io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
mutex_unlock(&ctx->uring_lock);
} else {
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0, true);
+ io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
}
}
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH for-next v2 13/13] io_uring: allow multishot polled reqs to defer completion
2022-11-23 11:06 [PATCH for-next v2 00/13] io_uring: batch multishot completions Dylan Yudaken
` (11 preceding siblings ...)
2022-11-23 11:06 ` [PATCH for-next v2 12/13] io_uring: remove overflow param from io_post_aux_cqe Dylan Yudaken
@ 2022-11-23 11:06 ` Dylan Yudaken
12 siblings, 0 replies; 14+ messages in thread
From: Dylan Yudaken @ 2022-11-23 11:06 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov; +Cc: io-uring, kernel-team, Dylan Yudaken
Until now there was no reason for multishot polled requests to defer
completions as there was no functional difference. However now this will
actually defer the completions, for a performance win.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 87ea497590b5..e3f0b4728db3 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1816,7 +1816,8 @@ int io_poll_issue(struct io_kiocb *req, bool *locked)
io_tw_lock(req->ctx, locked);
if (unlikely(req->task->flags & PF_EXITING))
return -EFAULT;
- return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT);
+ return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT|
+ IO_URING_F_COMPLETE_DEFER);
}
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread