From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH for-next v2 09/12] io_uring: get rid of double locking
Date: Wed, 7 Dec 2022 03:53:34 +0000 [thread overview]
Message-ID: <a80ecc2bc99c3b3f2cf20015d618b7c51419a797.1670384893.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
We don't need to take both uring_locks at once, msg_ring can be split in
two parts, first getting a file from the filetable of the first ring and
then installing it into the second one.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/msg_ring.c | 85 ++++++++++++++++++++++++++-------------------
io_uring/msg_ring.h | 1 +
io_uring/opdef.c | 1 +
3 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index c7d6586164ca..387c45a58654 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -15,6 +15,7 @@
struct io_msg {
struct file *file;
+ struct file *src_file;
u64 user_data;
u32 len;
u32 cmd;
@@ -23,6 +24,17 @@ struct io_msg {
u32 flags;
};
+void io_msg_ring_cleanup(struct io_kiocb *req)
+{
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+
+ if (WARN_ON_ONCE(!msg->src_file))
+ return;
+
+ fput(msg->src_file);
+ msg->src_file = NULL;
+}
+
static int io_msg_ring_data(struct io_kiocb *req)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
@@ -37,17 +49,13 @@ static int io_msg_ring_data(struct io_kiocb *req)
return -EOVERFLOW;
}
-static void io_double_unlock_ctx(struct io_ring_ctx *ctx,
- struct io_ring_ctx *octx,
+static void io_double_unlock_ctx(struct io_ring_ctx *octx,
unsigned int issue_flags)
{
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_unlock(&ctx->uring_lock);
mutex_unlock(&octx->uring_lock);
}
-static int io_double_lock_ctx(struct io_ring_ctx *ctx,
- struct io_ring_ctx *octx,
+static int io_double_lock_ctx(struct io_ring_ctx *octx,
unsigned int issue_flags)
{
/*
@@ -60,17 +68,28 @@ static int io_double_lock_ctx(struct io_ring_ctx *ctx,
return -EAGAIN;
return 0;
}
+ mutex_lock(&octx->uring_lock);
+ return 0;
+}
- /* Always grab smallest value ctx first. We know ctx != octx. */
- if (ctx < octx) {
- mutex_lock(&ctx->uring_lock);
- mutex_lock(&octx->uring_lock);
- } else {
- mutex_lock(&octx->uring_lock);
- mutex_lock(&ctx->uring_lock);
+static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file = NULL;
+ unsigned long file_ptr;
+ int idx = msg->src_fd;
+
+ io_ring_submit_lock(ctx, issue_flags);
+ if (likely(idx < ctx->nr_user_files)) {
+ idx = array_index_nospec(idx, ctx->nr_user_files);
+ file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
+ file = (struct file *) (file_ptr & FFS_MASK);
+ if (file)
+ get_file(file);
}
-
- return 0;
+ io_ring_submit_unlock(ctx, issue_flags);
+ return file;
}
static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
@@ -78,34 +97,27 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
struct io_ring_ctx *ctx = req->ctx;
- unsigned long file_ptr;
- struct file *src_file;
+ struct file *src_file = msg->src_file;
int ret;
if (target_ctx == ctx)
return -EINVAL;
+ if (!src_file) {
+ src_file = io_msg_grab_file(req, issue_flags);
+ if (!src_file)
+ return -EBADF;
+ msg->src_file = src_file;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
- ret = io_double_lock_ctx(ctx, target_ctx, issue_flags);
- if (unlikely(ret))
- return ret;
-
- ret = -EBADF;
- if (unlikely(msg->src_fd >= ctx->nr_user_files))
- goto out_unlock;
-
- msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr;
- if (!file_ptr)
- goto out_unlock;
-
- src_file = (struct file *) (file_ptr & FFS_MASK);
- get_file(src_file);
+ if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+ return -EAGAIN;
ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
- if (ret < 0) {
- fput(src_file);
+ if (ret < 0)
goto out_unlock;
- }
+ msg->src_file = NULL;
+ req->flags &= ~REQ_F_NEED_CLEANUP;
if (msg->flags & IORING_MSG_RING_CQE_SKIP)
goto out_unlock;
@@ -119,7 +131,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
- io_double_unlock_ctx(ctx, target_ctx, issue_flags);
+ io_double_unlock_ctx(target_ctx, issue_flags);
return ret;
}
@@ -130,6 +142,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->buf_index || sqe->personality))
return -EINVAL;
+ msg->src_file = NULL;
msg->user_data = READ_ONCE(sqe->off);
msg->len = READ_ONCE(sqe->len);
msg->cmd = READ_ONCE(sqe->addr);
diff --git a/io_uring/msg_ring.h b/io_uring/msg_ring.h
index fb9601f202d0..3987ee6c0e5f 100644
--- a/io_uring/msg_ring.h
+++ b/io_uring/msg_ring.h
@@ -2,3 +2,4 @@
int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags);
+void io_msg_ring_cleanup(struct io_kiocb *req);
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 04dd2c983fce..3aa0d65c50e3 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -445,6 +445,7 @@ const struct io_op_def io_op_defs[] = {
.name = "MSG_RING",
.prep = io_msg_ring_prep,
.issue = io_msg_ring,
+ .cleanup = io_msg_ring_cleanup,
},
[IORING_OP_FSETXATTR] = {
.needs_file = 1,
--
2.38.1
next prev parent reply other threads:[~2022-12-07 3:54 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-07 3:53 [PATCH for-next v2 00/12] CQ locking optimisation Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 01/12] io_uring: dont remove file from msg_ring reqs Pavel Begunkov
2022-12-07 13:52 ` Jens Axboe
2022-12-07 21:12 ` Pavel Begunkov
2022-12-07 21:23 ` Jens Axboe
2022-12-07 3:53 ` [PATCH for-next v2 02/12] io_uring: improve io_double_lock_ctx fail handling Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 03/12] io_uring: skip overflow CQE posting for dying ring Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 04/12] io_uring: don't check overflow flush failures Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 05/12] io_uring: complete all requests in task context Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 06/12] io_uring: force multishot CQEs into " Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 07/12] io_uring: use tw for putting rsrc Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 08/12] io_uring: never run tw and fallback in parallel Pavel Begunkov
2022-12-07 3:53 ` Pavel Begunkov [this message]
2022-12-07 3:53 ` [PATCH for-next v2 10/12] io_uring: extract a io_msg_install_complete helper Pavel Begunkov
2022-12-07 3:53 ` [PATCH for-next v2 11/12] io_uring: do msg_ring in target task via tw Pavel Begunkov
2022-12-07 15:31 ` Jens Axboe
2022-12-07 15:51 ` Jens Axboe
2022-12-07 21:18 ` Pavel Begunkov
2022-12-07 21:22 ` Jens Axboe
2022-12-07 3:53 ` [PATCH for-next v2 12/12] io_uring: skip spinlocking for ->task_complete Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a80ecc2bc99c3b3f2cf20015d618b7c51419a797.1670384893.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox