* [PATCH 1/5] io_uring: fail links if msg-ring doesn't succeeed
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
@ 2022-03-29 17:07 ` Jens Axboe
2022-03-29 17:07 ` [PATCH 2/5] io_uring: defer msg-ring file validity check until command issue Jens Axboe
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2022-03-29 17:07 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
We must always call req_set_fail() if the request is failed, otherwise
we won't sever links for dependent chains correctly.
Fixes: 4f57f06ce218 ("io_uring: add support for IORING_OP_MSG_RING command")
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 39a9ff31dbc5..923410937dc7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4500,6 +4500,8 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
}
+ if (ret < 0)
+ req_set_fail(req);
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/5] io_uring: defer msg-ring file validity check until command issue
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
2022-03-29 17:07 ` [PATCH 1/5] io_uring: fail links if msg-ring doesn't succeeed Jens Axboe
@ 2022-03-29 17:07 ` Jens Axboe
2022-03-29 17:07 ` [PATCH 3/5] io_uring: defer splice/tee " Jens Axboe
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2022-03-29 17:07 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
In preparation for not using the file at prep time, defer checking if this
file refers to a valid io_uring instance until issue time.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 923410937dc7..072a31400e6a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4473,9 +4473,6 @@ static int io_msg_ring_prep(struct io_kiocb *req,
sqe->splice_fd_in || sqe->buf_index || sqe->personality))
return -EINVAL;
- if (req->file->f_op != &io_uring_fops)
- return -EBADFD;
-
req->msg.user_data = READ_ONCE(sqe->off);
req->msg.len = READ_ONCE(sqe->len);
return 0;
@@ -4485,9 +4482,14 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx;
struct io_msg *msg = &req->msg;
- int ret = -EOVERFLOW;
bool filled;
+ int ret;
+ ret = -EBADF;
+ if (req->file->f_op != &io_uring_fops)
+ goto done;
+
+ ret = -EOVERFLOW;
target_ctx = req->file->private_data;
spin_lock(&target_ctx->completion_lock);
@@ -4500,6 +4502,7 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
}
+done:
if (ret < 0)
req_set_fail(req);
__io_req_complete(req, issue_flags, ret, 0);
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/5] io_uring: defer splice/tee file validity check until command issue
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
2022-03-29 17:07 ` [PATCH 1/5] io_uring: fail links if msg-ring doesn't succeeed Jens Axboe
2022-03-29 17:07 ` [PATCH 2/5] io_uring: defer msg-ring file validity check until command issue Jens Axboe
@ 2022-03-29 17:07 ` Jens Axboe
2022-03-29 17:07 ` [PATCH 4/5] io_uring: move read/write file prep state into actual opcode handler Jens Axboe
2022-03-29 17:07 ` [PATCH 5/5] io_uring: defer file assignment for links Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2022-03-29 17:07 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
In preparation for not using the file at prep time, defer checking if this
file refers to a valid io_uring instance until issue time.
This also means we can get rid of the cleanup flag for splice and tee.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 49 +++++++++++++++++++++----------------------------
1 file changed, 21 insertions(+), 28 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 072a31400e6a..900049cb6a82 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -655,10 +655,10 @@ struct io_epoll {
struct io_splice {
struct file *file_out;
- struct file *file_in;
loff_t off_out;
loff_t off_in;
u64 len;
+ int splice_fd_in;
unsigned int flags;
};
@@ -1688,14 +1688,6 @@ static void io_prep_async_work(struct io_kiocb *req)
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
-
- switch (req->opcode) {
- case IORING_OP_SPLICE:
- case IORING_OP_TEE:
- if (!S_ISREG(file_inode(req->splice.file_in)->i_mode))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
}
static void io_prep_async_link(struct io_kiocb *req)
@@ -4369,18 +4361,11 @@ static int __io_splice_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- sp->file_in = NULL;
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);
-
if (unlikely(sp->flags & ~valid_flags))
return -EINVAL;
-
- sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
- (sp->flags & SPLICE_F_FD_IN_FIXED));
- if (!sp->file_in)
- return -EBADF;
- req->flags |= REQ_F_NEED_CLEANUP;
+ sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in);
return 0;
}
@@ -4395,20 +4380,27 @@ static int io_tee_prep(struct io_kiocb *req,
static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_splice *sp = &req->splice;
- struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+ struct file *in;
long ret = 0;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
+
+ in = io_file_get(req->ctx, req, sp->splice_fd_in,
+ (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (!in) {
+ ret = -EBADF;
+ goto done;
+ }
+
if (sp->len)
ret = do_tee(in, out, sp->len, flags);
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
io_put_file(in);
- req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
if (ret != sp->len)
req_set_fail(req);
io_req_complete(req, ret);
@@ -4427,15 +4419,22 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_splice *sp = &req->splice;
- struct file *in = sp->file_in;
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out;
+ struct file *in;
long ret = 0;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
+ in = io_file_get(req->ctx, req, sp->splice_fd_in,
+ (sp->flags & SPLICE_F_FD_IN_FIXED));
+ if (!in) {
+ ret = -EBADF;
+ goto done;
+ }
+
poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
@@ -4444,8 +4443,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
io_put_file(in);
- req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
if (ret != sp->len)
req_set_fail(req);
io_req_complete(req, ret);
@@ -7165,11 +7163,6 @@ static void io_clean_op(struct io_kiocb *req)
kfree(io->free_iov);
break;
}
- case IORING_OP_SPLICE:
- case IORING_OP_TEE:
- if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED))
- io_put_file(req->splice.file_in);
- break;
case IORING_OP_OPENAT:
case IORING_OP_OPENAT2:
if (req->open.filename)
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/5] io_uring: move read/write file prep state into actual opcode handler
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
` (2 preceding siblings ...)
2022-03-29 17:07 ` [PATCH 3/5] io_uring: defer splice/tee " Jens Axboe
@ 2022-03-29 17:07 ` Jens Axboe
2022-03-29 17:07 ` [PATCH 5/5] io_uring: defer file assignment for links Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2022-03-29 17:07 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
In preparation for not necessarily having a file assigned at prep time,
defer any initialization associated with the file to when the opcode
handler is run.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 100 ++++++++++++++++++++++++++------------------------
1 file changed, 52 insertions(+), 48 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 900049cb6a82..52fa0613b442 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3177,42 +3177,12 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req)
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw.kiocb;
- struct file *file = req->file;
unsigned ioprio;
int ret;
- if (!io_req_ffs_set(req))
- req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
-
kiocb->ki_pos = READ_ONCE(sqe->off);
- kiocb->ki_flags = iocb_flags(file);
- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
- if (unlikely(ret))
- return ret;
-
- /*
- * If the file is marked O_NONBLOCK, still allow retry for it if it
- * supports async. Otherwise it's impossible to use O_NONBLOCK files
- * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
- */
- if ((kiocb->ki_flags & IOCB_NOWAIT) ||
- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
- req->flags |= REQ_F_NOWAIT;
-
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
- return -EOPNOTSUPP;
-
- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
- kiocb->ki_complete = io_complete_rw_iopoll;
- req->iopoll_completed = 0;
- } else {
- if (kiocb->ki_flags & IOCB_HIPRI)
- return -EINVAL;
- kiocb->ki_complete = io_complete_rw;
- }
+ kiocb->ki_flags = READ_ONCE(sqe->rw_flags);
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3731,13 +3701,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
return 0;
}
-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- if (unlikely(!(req->file->f_mode & FMODE_READ)))
- return -EBADF;
- return io_prep_rw(req, sqe);
-}
-
/*
* This is our waitqueue callback handler, registered through __folio_lock_async()
* when we initially tried to do the IO with the iocb armed our waitqueue.
@@ -3825,6 +3788,50 @@ static bool need_read_all(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
+static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
+{
+ struct kiocb *kiocb = &req->rw.kiocb;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file = req->file;
+ int rw_flags = kiocb->ki_flags;
+ int ret;
+
+ if (unlikely(!(file->f_mode & mode)))
+ return -EBADF;
+
+ if (!io_req_ffs_set(req))
+ req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
+
+ kiocb->ki_flags = iocb_flags(file);
+ ret = kiocb_set_rw_flags(kiocb, rw_flags);
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * If the file is marked O_NONBLOCK, still allow retry for it if it
+ * supports async. Otherwise it's impossible to use O_NONBLOCK files
+ * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
+ */
+ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+ ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
+ req->flags |= REQ_F_NOWAIT;
+
+ if (ctx->flags & IORING_SETUP_IOPOLL) {
+ if (!(kiocb->ki_flags & IOCB_DIRECT))
+ return -EOPNOTSUPP;
+
+ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
+ kiocb->ki_complete = io_complete_rw_iopoll;
+ req->iopoll_completed = 0;
+ } else {
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EINVAL;
+ kiocb->ki_complete = io_complete_rw;
+ }
+
+ return 0;
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw_state __s, *s = &__s;
@@ -3860,6 +3867,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
+ ret = io_rw_init_file(req, FMODE_READ);
+ if (unlikely(ret))
+ goto done;
req->result = iov_iter_count(&s->iter);
if (force_nonblock) {
@@ -3963,14 +3973,6 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
- return -EBADF;
- req->rw.kiocb.ki_hint = ki_hint_validate(file_write_hint(req->file));
- return io_prep_rw(req, sqe);
-}
-
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw_state __s, *s = &__s;
@@ -3991,6 +3993,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_restore(&s->iter, &s->iter_state);
iovec = NULL;
}
+ ret = io_rw_init_file(req, FMODE_WRITE);
+ if (unlikely(ret))
+ goto done;
req->result = iov_iter_count(&s->iter);
if (force_nonblock) {
@@ -6976,11 +6981,10 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
case IORING_OP_READ:
- return io_read_prep(req, sqe);
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
case IORING_OP_WRITE:
- return io_write_prep(req, sqe);
+ return io_prep_rw(req, sqe);
case IORING_OP_POLL_ADD:
return io_poll_add_prep(req, sqe);
case IORING_OP_POLL_REMOVE:
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 5/5] io_uring: defer file assignment for links
2022-03-29 17:07 [PATCHSET 0/5] Fix early file assignment for links Jens Axboe
` (3 preceding siblings ...)
2022-03-29 17:07 ` [PATCH 4/5] io_uring: move read/write file prep state into actual opcode handler Jens Axboe
@ 2022-03-29 17:07 ` Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2022-03-29 17:07 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
If an application uses direct open or accept, it knows in advance what
direct descriptor value it will get as it picks it itself. This allows
combined requests such as:
sqe = io_uring_get_sqe(ring);
io_uring_prep_openat_direct(sqe, ..., file_slot);
sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;
sqe = io_uring_get_sqe(ring);
io_uring_prep_read(sqe,file_slot, buf, buf_size, 0);
sqe->flags |= IOSQE_FIXED_FILE;
io_uring_submit(ring);
where we prepare both a file open and read, and only get a completion
event for the read when both have completed successfully.
Currently links are fully prepared before the head is issued, but that
fails if the dependent link needs a file assigned that isn't valid until
the head has completed.
Allow deferral of file setup, which makes this documented case work.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 44 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 52fa0613b442..067ca76651b0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -784,6 +784,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_DEFERRED_FILE_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -848,6 +849,8 @@ enum {
REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
/* request has already done partial IO */
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
+ /* request has file assignment deferred */
+ REQ_F_DEFERRED_FILE = BIT(REQ_F_DEFERRED_FILE_BIT),
};
struct async_poll {
@@ -2096,6 +2099,21 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
return __io_fill_cqe(ctx, user_data, res, cflags);
}
+static void io_assign_file(struct io_kiocb *req)
+{
+ if (req->file || !io_op_defs[req->opcode].needs_file)
+ return;
+ if (!(req->flags & REQ_F_DEFERRED_FILE)) {
+ req_set_fail(req);
+ return;
+ }
+ req->flags &= ~REQ_F_DEFERRED_FILE;
+ req->file = io_file_get(req->ctx, req, req->result,
+ req->flags & REQ_F_FIXED_FILE);
+ if (!req->file)
+ req_set_fail(req);
+}
+
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2112,6 +2130,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
if (req->flags & IO_DISARM_MASK)
io_disarm_next(req);
if (req->link) {
+ io_assign_file(req->link);
io_req_task_queue(req->link);
req->link = NULL;
}
@@ -2423,7 +2442,11 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
__io_req_find_next_prep(req);
nxt = req->link;
req->link = NULL;
- return nxt;
+ if (nxt) {
+ io_assign_file(nxt);
+ return nxt;
+ }
+ return NULL;
}
static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
@@ -2626,6 +2649,10 @@ static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
static void io_req_task_queue(struct io_kiocb *req)
{
+ if (unlikely(req->flags & REQ_F_FAIL)) {
+ io_req_task_queue_fail(req, -ECANCELED);
+ return;
+ }
req->io_task_work.func = io_req_task_submit;
io_req_task_work_add(req, false);
}
@@ -2640,8 +2667,10 @@ static inline void io_queue_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = io_req_find_next(req);
- if (nxt)
+ if (nxt) {
+ io_assign_file(req);
io_req_task_queue(nxt);
+ }
}
static void io_free_req(struct io_kiocb *req)
@@ -7722,6 +7751,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
+ int fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
@@ -7733,10 +7763,14 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
blk_start_plug_nr_ios(&state->plug, state->submit_nr);
}
- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
+ req->file = io_file_get(ctx, req, fd,
(sqe_flags & IOSQE_FIXED_FILE));
- if (unlikely(!req->file))
- return -EBADF;
+ if (unlikely(!req->file)) {
+ if (!ctx->submit_state.link.head)
+ return -EBADF;
+ req->result = fd;
+ req->flags |= REQ_F_DEFERRED_FILE;
+ }
}
personality = READ_ONCE(sqe->personality);
--
2.35.1
^ permalink raw reply related [flat|nested] 6+ messages in thread