From: Jens Axboe <[email protected]>
To: Linus Torvalds <[email protected]>
Cc: io-uring <[email protected]>
Subject: Re: [GIT PULL] io_uring fix for 5.16-rc7
Date: Thu, 23 Dec 2021 17:55:12 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <CAHk-=wgDGpBVOEH+JJS-byQE3adrZEpC9Sn8VCLr7ZhzDQv01w@mail.gmail.com>
On 12/23/21 4:43 PM, Linus Torvalds wrote:
> On Thu, Dec 23, 2021 at 3:39 PM Linus Torvalds
> <[email protected]> wrote:
>>
>> I don't think any of this is right.
>>
>> You can't use f_pos without using fdget_pos() to actually get the lock to it.
>>
>> Which you don't do in io_uring.
>
> I've pulled it because it's still an improvement on what it used to
> be, but f_pos use in io_uring does seem very broken.
Totally untested, but something like this should make it saner. This
grabs the lock (if needed) and position at issue time, rather than at
prep time. We could potentially handle the unlock in the handler
themselves too, but store the state in a flag and make it part of the
slower path cleanup instead. That ensures we always end up performing
it, even if the request is errored.
Might be straight forward enough for 5.16 in fact, but considering it's
not a new regression, deferring for 5.17 will be saner imho.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index fb2a0cb4aaf8..a80f9f8f2cd0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,7 @@
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
- REQ_F_ASYNC_DATA)
+ REQ_F_ASYNC_DATA | REQ_F_CUR_POS_LOCK)
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
@@ -726,6 +726,7 @@ enum {
REQ_F_FAIL_BIT = 8,
REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT,
+ REQ_F_CUR_POS_LOCK_BIT,
REQ_F_NOWAIT_BIT,
REQ_F_LINK_TIMEOUT_BIT,
REQ_F_NEED_CLEANUP_BIT,
@@ -765,6 +766,8 @@ enum {
REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
/* read/write uses file position */
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
+ /* request is holding file position lock */
+ REQ_F_CUR_POS_LOCK = BIT(REQ_F_CUR_POS_LOCK_BIT),
/* must not punt to workers */
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
/* has or had linked timeout */
@@ -2892,12 +2895,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kiocb->ki_pos = READ_ONCE(sqe->off);
if (kiocb->ki_pos == -1) {
- if (!(file->f_mode & FMODE_STREAM)) {
+ /*
+ * If we end up using the current file position, just set the
+ * flag and the actual file position will be read in the op
+ * handler themselves.
+ */
+ if (!(file->f_mode & FMODE_STREAM))
req->flags |= REQ_F_CUR_POS;
- kiocb->ki_pos = file->f_pos;
- } else {
+ else
kiocb->ki_pos = 0;
- }
}
kiocb->ki_flags = iocb_flags(file);
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
@@ -3515,6 +3521,25 @@ static bool need_read_all(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
+static bool io_file_pos_lock(struct io_kiocb *req, bool force_nonblock)
+{
+ struct file *file = req->file;
+
+ if (!(req->flags & REQ_F_CUR_POS))
+ return false;
+ if (file->f_mode & FMODE_ATOMIC_POS && file_count(file) > 1) {
+ if (force_nonblock) {
+ if (!mutex_trylock(&file->f_pos_lock))
+ return true;
+ } else {
+ mutex_lock(&file->f_pos_lock);
+ }
+ req->flags |= REQ_F_CUR_POS_LOCK;
+ }
+ req->rw.kiocb.ki_pos = file->f_pos;
+ return false;
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rw_state __s, *s = &__s;
@@ -3543,7 +3568,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
- if (unlikely(!io_file_supports_nowait(req))) {
+ if (unlikely(!io_file_supports_nowait(req) ||
+ io_file_pos_lock(req, true))) {
ret = io_setup_async_rw(req, iovec, s, true);
return ret ?: -EAGAIN;
}
@@ -3551,6 +3577,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
} else {
/* Ensure we clear previously set non-block flag */
kiocb->ki_flags &= ~IOCB_NOWAIT;
+ io_file_pos_lock(req, false);
}
ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -3668,7 +3695,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
- if (unlikely(!io_file_supports_nowait(req)))
+ if (unlikely(!io_file_supports_nowait(req) ||
+ io_file_pos_lock(req, true)))
goto copy_iov;
/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3680,6 +3708,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
} else {
/* Ensure we clear previously set non-block flag */
kiocb->ki_flags &= ~IOCB_NOWAIT;
+ io_file_pos_lock(req, false);
}
ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -6584,6 +6613,8 @@ static void io_clean_op(struct io_kiocb *req)
kfree(req->kbuf);
req->kbuf = NULL;
}
+ if (req->flags & REQ_F_CUR_POS_LOCK)
+ __f_unlock_pos(req->file);
if (req->flags & REQ_F_NEED_CLEANUP) {
switch (req->opcode) {
--
Jens Axboe
next prev parent reply other threads:[~2021-12-24 0:55 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-23 21:11 [GIT PULL] io_uring fix for 5.16-rc7 Jens Axboe
2021-12-23 23:39 ` Linus Torvalds
2021-12-23 23:43 ` Linus Torvalds
2021-12-24 0:55 ` Jens Axboe [this message]
2021-12-23 23:49 ` Jens Axboe
2021-12-23 23:46 ` pr-tracker-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox