From: Jens Axboe <[email protected]>
To: [email protected]
Cc: [email protected], Jens Axboe <[email protected]>
Subject: [PATCH 5/5] io_uring: defer file assignment for links
Date: Tue, 29 Mar 2022 11:07:42 -0600 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
If an application uses direct open or accept, it knows in advance what
direct descriptor value it will get as it picks it itself. This allows
combined requests such as:
sqe = io_uring_get_sqe(ring);
io_uring_prep_openat_direct(sqe, ..., file_slot);
sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;
sqe = io_uring_get_sqe(ring);
io_uring_prep_read(sqe,file_slot, buf, buf_size, 0);
sqe->flags |= IOSQE_FIXED_FILE;
io_uring_submit(ring);
where we prepare both a file open and read, and only get a completion
event for the read when both have completed successfully.
Currently links are fully prepared before the head is issued, but that
fails if the dependent link needs a file assigned that isn't valid until
the head has completed.
Allow deferral of file setup, which makes this documented case work.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 44 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 52fa0613b442..067ca76651b0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -784,6 +784,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_DEFERRED_FILE_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -848,6 +849,8 @@ enum {
REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
/* request has already done partial IO */
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
+ /* request has file assignment deferred */
+ REQ_F_DEFERRED_FILE = BIT(REQ_F_DEFERRED_FILE_BIT),
};
struct async_poll {
@@ -2096,6 +2099,21 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
return __io_fill_cqe(ctx, user_data, res, cflags);
}
+static void io_assign_file(struct io_kiocb *req)
+{
+ if (req->file || !io_op_defs[req->opcode].needs_file)
+ return;
+ if (!(req->flags & REQ_F_DEFERRED_FILE)) {
+ req_set_fail(req);
+ return;
+ }
+ req->flags &= ~REQ_F_DEFERRED_FILE;
+ req->file = io_file_get(req->ctx, req, req->result,
+ req->flags & REQ_F_FIXED_FILE);
+ if (!req->file)
+ req_set_fail(req);
+}
+
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2112,6 +2130,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
if (req->flags & IO_DISARM_MASK)
io_disarm_next(req);
if (req->link) {
+ io_assign_file(req->link);
io_req_task_queue(req->link);
req->link = NULL;
}
@@ -2423,7 +2442,11 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
__io_req_find_next_prep(req);
nxt = req->link;
req->link = NULL;
- return nxt;
+ if (nxt) {
+ io_assign_file(nxt);
+ return nxt;
+ }
+ return NULL;
}
static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
@@ -2626,6 +2649,10 @@ static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
static void io_req_task_queue(struct io_kiocb *req)
{
+ if (unlikely(req->flags & REQ_F_FAIL)) {
+ io_req_task_queue_fail(req, -ECANCELED);
+ return;
+ }
req->io_task_work.func = io_req_task_submit;
io_req_task_work_add(req, false);
}
@@ -2640,8 +2667,10 @@ static inline void io_queue_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = io_req_find_next(req);
- if (nxt)
+ if (nxt) {
+ io_assign_file(req);
io_req_task_queue(nxt);
+ }
}
static void io_free_req(struct io_kiocb *req)
@@ -7722,6 +7751,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
+ int fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
@@ -7733,10 +7763,14 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
blk_start_plug_nr_ios(&state->plug, state->submit_nr);
}
- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
+ req->file = io_file_get(ctx, req, fd,
(sqe_flags & IOSQE_FIXED_FILE));
- if (unlikely(!req->file))
- return -EBADF;
+ if (unlikely(!req->file)) {
+ if (!ctx->submit_state.link.head)
+ return -EBADF;
+ req->result = fd;
+ req->flags |= REQ_F_DEFERRED_FILE;
+ }
}
personality = READ_ONCE(sqe->personality);
--
2.35.1
next prev parent reply other threads:[~2022-03-29 17:07 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
2022-03-29 17:07 ` [PATCH 1/5] io_uring: fail links if msg-ring doesn't succeeed Jens Axboe
2022-03-29 17:07 ` [PATCH 2/5] io_uring: defer msg-ring file validity check until command issue Jens Axboe
2022-03-29 17:07 ` [PATCH 3/5] io_uring: defer splice/tee " Jens Axboe
2022-03-29 17:07 ` [PATCH 4/5] io_uring: move read/write file prep state into actual opcode handler Jens Axboe
2022-03-29 17:07 ` Jens Axboe [this message]
-- strict thread matches above, loose matches on Subject: below --
2022-03-30 17:14 [PATCHSET v3 0/5] Fix early file assignment for links or drain Jens Axboe
2022-03-30 17:14 ` [PATCH 5/5] io_uring: defer file assignment for links Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox