* [PATCH 1/3] io_uring: split off IOPOLL argument verifiction
2022-03-22 14:07 [PATCH 0/3] optimise submit+iopoll mutex locking Pavel Begunkov
@ 2022-03-22 14:07 ` Pavel Begunkov
2022-03-22 14:07 ` [PATCH 2/3] io_uring: pre-calculate syscall iopolling decision Pavel Begunkov
2022-03-22 14:07 ` [PATCH 3/3] io_uring: optimise mutex locking for submit+iopoll Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2022-03-22 14:07 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence
IOPOLL doesn't use additional arguments like sigsets, but it still
needs some basic verification, which is currently done by
io_get_ext_arg(). This patch adds a separate function for the IOPOLL
path, which is a bit simpler and doesn't do extra. This prepares us for
further patches, which would have hurt inlining in the hot path otherwise.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9baa120a96f9..8d29ef2e552a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -10810,6 +10810,19 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
return 0;
}
+static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
+{
+ if (flags & IORING_ENTER_EXT_ARG) {
+ struct io_uring_getevents_arg arg;
+
+ if (argsz != sizeof(arg))
+ return -EINVAL;
+ if (copy_from_user(&arg, argp, sizeof(arg)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
struct __kernel_timespec __user **ts,
const sigset_t __user **sig)
@@ -10921,13 +10934,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
goto out;
}
if (flags & IORING_ENTER_GETEVENTS) {
- const sigset_t __user *sig;
- struct __kernel_timespec __user *ts;
-
- ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
- if (unlikely(ret))
- goto out;
-
min_complete = min(min_complete, ctx->cq_entries);
/*
@@ -10938,8 +10944,17 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
*/
if (ctx->flags & IORING_SETUP_IOPOLL &&
!(ctx->flags & IORING_SETUP_SQPOLL)) {
+ ret = io_validate_ext_arg(flags, argp, argsz);
+ if (unlikely(ret))
+ goto out;
ret = io_iopoll_check(ctx, min_complete);
} else {
+ const sigset_t __user *sig;
+ struct __kernel_timespec __user *ts;
+
+ ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+ if (unlikely(ret))
+ goto out;
ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] io_uring: pre-calculate syscall iopolling decision
2022-03-22 14:07 [PATCH 0/3] optimise submit+iopoll mutex locking Pavel Begunkov
2022-03-22 14:07 ` [PATCH 1/3] io_uring: split off IOPOLL argument verifiction Pavel Begunkov
@ 2022-03-22 14:07 ` Pavel Begunkov
2022-03-22 14:07 ` [PATCH 3/3] io_uring: optimise mutex locking for submit+iopoll Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2022-03-22 14:07 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence
Syscall should only iopoll for events when it's a IOPOLL ring and is not
SQPOLL. Instead of check both flags every time we can save it in ring
flags so it's easier to use. We don't care much about an extra if there,
however it will be inconvenient to copy-paste this chunk with checks in
future patches.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8d29ef2e552a..d7ca4f28cfa4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -359,6 +359,7 @@ struct io_ring_ctx {
unsigned int drain_active: 1;
unsigned int drain_disabled: 1;
unsigned int has_evfd: 1;
+ unsigned int syscall_iopoll: 1;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -10936,14 +10937,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (flags & IORING_ENTER_GETEVENTS) {
min_complete = min(min_complete, ctx->cq_entries);
- /*
- * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
- * space applications don't need to do io completion events
- * polling again, they can rely on io_sq_thread to do polling
- * work, which can reduce cpu usage and uring_lock contention.
- */
- if (ctx->flags & IORING_SETUP_IOPOLL &&
- !(ctx->flags & IORING_SETUP_SQPOLL)) {
+ if (ctx->syscall_iopoll) {
ret = io_validate_ext_arg(flags, argp, argsz);
if (unlikely(ret))
goto out;
@@ -11281,6 +11275,17 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx = io_ring_ctx_alloc(p);
if (!ctx)
return -ENOMEM;
+
+ /*
+ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+ * space applications don't need to do io completion events
+ * polling again, they can rely on io_sq_thread to do polling
+ * work, which can reduce cpu usage and uring_lock contention.
+ */
+ if (ctx->flags & IORING_SETUP_IOPOLL &&
+ !(ctx->flags & IORING_SETUP_SQPOLL))
+ ctx->syscall_iopoll = 1;
+
ctx->compat = in_compat_syscall();
if (!capable(CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
--
2.35.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] io_uring: optimise mutex locking for submit+iopoll
2022-03-22 14:07 [PATCH 0/3] optimise submit+iopoll mutex locking Pavel Begunkov
2022-03-22 14:07 ` [PATCH 1/3] io_uring: split off IOPOLL argument verifiction Pavel Begunkov
2022-03-22 14:07 ` [PATCH 2/3] io_uring: pre-calculate syscall iopolling decision Pavel Begunkov
@ 2022-03-22 14:07 ` Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2022-03-22 14:07 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence
Both submittion and iopolling requires holding uring_lock. IOPOLL can
users do them together in a single syscall, however it would still do 2
pairs of lock/unlock. Optimise this case combining locking into one
lock/unlock pair, which especially nice for low QD.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d7ca4f28cfa4..c87a4b18e370 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2867,12 +2867,6 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
unsigned int nr_events = 0;
int ret = 0;
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
/*
* Don't enter poll loop if we already have events pending.
* If we do, we can potentially be spinning for commands that
@@ -2881,7 +2875,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
if (test_bit(0, &ctx->check_cq_overflow))
__io_cqring_overflow_flush(ctx, false);
if (io_cqring_events(ctx))
- goto out;
+ return 0;
do {
/*
* If a submit got punted to a workqueue, we can have the
@@ -2911,8 +2905,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
nr_events += ret;
ret = 0;
} while (nr_events < min && !need_resched());
-out:
- mutex_unlock(&ctx->uring_lock);
+
return ret;
}
@@ -10927,21 +10920,33 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
ret = io_uring_add_tctx_node(ctx);
if (unlikely(ret))
goto out;
+
mutex_lock(&ctx->uring_lock);
submitted = io_submit_sqes(ctx, to_submit);
- mutex_unlock(&ctx->uring_lock);
-
- if (submitted != to_submit)
+ if (submitted != to_submit) {
+ mutex_unlock(&ctx->uring_lock);
goto out;
+ }
+ if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
+ goto iopoll_locked;
+ mutex_unlock(&ctx->uring_lock);
}
if (flags & IORING_ENTER_GETEVENTS) {
- min_complete = min(min_complete, ctx->cq_entries);
-
if (ctx->syscall_iopoll) {
+ /*
+ * We disallow the app entering submit/complete with
+ * polling, but we still need to lock the ring to
+ * prevent racing with polled issue that got punted to
+ * a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
+iopoll_locked:
ret = io_validate_ext_arg(flags, argp, argsz);
- if (unlikely(ret))
- goto out;
- ret = io_iopoll_check(ctx, min_complete);
+ if (likely(!ret)) {
+ min_complete = min(min_complete, ctx->cq_entries);
+ ret = io_iopoll_check(ctx, min_complete);
+ }
+ mutex_unlock(&ctx->uring_lock);
} else {
const sigset_t __user *sig;
struct __kernel_timespec __user *ts;
@@ -10949,6 +10954,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
if (unlikely(ret))
goto out;
+ min_complete = min(min_complete, ctx->cq_entries);
ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 4+ messages in thread