[PATCH v3 4/4] io_uring: pre-increment f_pos on rw

public inbox for [email protected]
 help / color / mirror / Atom feed

From: Dylan Yudaken <[email protected]>
To: Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>,
	<[email protected]>
Cc: <[email protected]>, Dylan Yudaken <[email protected]>
Subject: [PATCH v3 4/4] io_uring: pre-increment f_pos on rw
Date: Tue, 22 Feb 2022 02:55:04 -0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

In read/write ops, preincrement f_pos when no offset is specified, and
then attempt fix up the position after IO completes if it completed less
than expected. This fixes the problem where multiple queued up IO will all
obtain the same f_pos, and so perform the same read/write.

This is still not as consistent as sync r/w, as it is able to advance the
file offset past the end of the file. It seems it would be quite a
performance hit to work around this limitation - such as by keeping track
of concurrent operations - and the downside does not seem to be too
problematic.

The attempt to fix up the f_pos after will at least mean that in situations
where a single operation is run, then the position will be consistent.

Co-developed-by: Jens Axboe <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
Signed-off-by: Dylan Yudaken <[email protected]>
---
 fs/io_uring.c | 95 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 82 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8954d82def36..adb15234e53c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3066,22 +3066,86 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 	}
 }
 
-static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
+static bool __io_kiocb_update_pos(struct io_kiocb *req, loff_t **ppos,
+				u64 expected, bool force_nonblock)
+{
+	struct kiocb *kiocb = &req->rw.kiocb;
+
+	WARN_ON(req->flags & REQ_F_CUR_POS);
+	if (req->file->f_mode & FMODE_ATOMIC_POS) {
+		if (force_nonblock) {
+			if (!mutex_trylock(&req->file->f_pos_lock))
+				return true;
+		} else {
+			mutex_lock(&req->file->f_pos_lock);
+		}
+	}
+	kiocb->ki_pos = req->file->f_pos;
+	req->file->f_pos += expected;
+	if (req->file->f_mode & FMODE_ATOMIC_POS)
+		mutex_unlock(&req->file->f_pos_lock);
+
+	*ppos = &kiocb->ki_pos;
+	req->flags |= REQ_F_CUR_POS;
+	return false;
+}
+
+static inline bool io_kiocb_update_pos(struct io_kiocb *req, loff_t **ppos,
+				u64 expected, bool force_nonblock)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
 	bool is_stream = req->file->f_mode & FMODE_STREAM;
 
 	if (kiocb->ki_pos == -1) {
 		if (!is_stream) {
-			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = req->file->f_pos;
-			return &kiocb->ki_pos;
+			return __io_kiocb_update_pos(req, ppos, expected,
+						force_nonblock);
 		} else {
 			kiocb->ki_pos = 0;
-			return NULL;
+			*ppos = NULL;
+			return false;
 		}
 	}
-	return is_stream ? NULL : &kiocb->ki_pos;
+	*ppos = is_stream ? NULL : &kiocb->ki_pos;
+	return false;
+}
+
+static void __io_kiocb_done_pos(struct io_kiocb *req, u64 actual)
+{
+	struct kiocb *kiocb = &req->rw.kiocb;
+	u64 expected;
+
+	expected = req->rw.len;
+	if (actual >= expected)
+		return;
+
+	/*
+	 * It's not definitely safe to lock here, and the assumption is,
+	 * that if we cannot lock the position that it will be changing,
+	 * and if it will be changing - then we can't update it anyway
+	 */
+	if (req->file->f_mode & FMODE_ATOMIC_POS
+		&& !mutex_trylock(&req->file->f_pos_lock))
+		return;
+
+	/*
+	 * now we want to move the pointer, but only if everything is consistent
+	 * with how we left it originally
+	 */
+	if (req->file->f_pos == kiocb->ki_pos + (expected - actual))
+		req->file->f_pos = kiocb->ki_pos;
+
+	/* else something else messed with f_pos and we can't do anything */
+
+	if (req->file->f_mode & FMODE_ATOMIC_POS)
+		mutex_unlock(&req->file->f_pos_lock);
+}
+
+static inline void io_kiocb_done_pos(struct io_kiocb *req, u64 actual)
+{
+	if (likely(!(req->flags & REQ_F_CUR_POS)))
+		return;
+	__io_kiocb_done_pos(req, actual);
 }
 
 static void kiocb_done(struct io_kiocb *req, ssize_t ret,
@@ -3097,8 +3161,7 @@ static void kiocb_done(struct io_kiocb *req, ssize_t ret,
 			ret += io->bytes_done;
 	}
 
-	if (req->flags & REQ_F_CUR_POS)
-		req->file->f_pos = req->rw.kiocb.ki_pos;
+	io_kiocb_done_pos(req, ret >= 0 ? ret : 0);
 	if (ret >= 0 && (req->rw.kiocb.ki_complete == io_complete_rw))
 		__io_complete_rw(req, ret, issue_flags);
 	else
@@ -3663,21 +3726,23 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req))) {
+		if (unlikely(!io_file_supports_nowait(req) ||
+				io_kiocb_update_pos(req, &ppos,
+						req->rw.len, true))) {
 			ret = io_setup_async_rw(req, iovec, s, true);
 			return ret ?: -EAGAIN;
 		}
 		kiocb->ki_flags |= IOCB_NOWAIT;
 	} else {
+		io_kiocb_update_pos(req, &ppos, req->rw.len, false);
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	ppos = io_kiocb_update_pos(req);
-
 	ret = rw_verify_area(READ, req->file, ppos, req->result);
 	if (unlikely(ret)) {
 		kfree(iovec);
+		io_kiocb_done_pos(req, 0);
 		return ret;
 	}
 
@@ -3799,14 +3864,17 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		    (req->flags & REQ_F_ISREG))
 			goto copy_iov;
 
+		/* if we cannot lock the file position then punt */
+		if (unlikely(io_kiocb_update_pos(req, &ppos, req->rw.len, true)))
+			goto copy_iov;
+
 		kiocb->ki_flags |= IOCB_NOWAIT;
 	} else {
+		io_kiocb_update_pos(req, &ppos, req->rw.len, false);
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	ppos = io_kiocb_update_pos(req);
-
 	ret = rw_verify_area(WRITE, req->file, ppos, req->result);
 	if (unlikely(ret))
 		goto out_free;
@@ -3859,6 +3927,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		return ret ?: -EAGAIN;
 	}
 out_free:
+	io_kiocb_done_pos(req, 0);
 	/* it's reportedly faster than delegating the null check to kfree() */
 	if (iovec)
 		kfree(iovec);
-- 
2.30.2

next prev parent reply	other threads:[~2022-02-22 10:55 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22 10:55 [PATCH v3 0/4] io_uring: consistent behaviour with linked read/write Dylan Yudaken
2022-02-22 10:55 ` [PATCH v3 1/4] io_uring: remove duplicated calls to io_kiocb_ppos Dylan Yudaken
2022-02-23 23:06   ` Pavel Begunkov
2022-02-22 10:55 ` [PATCH v3 2/4] io_uring: update kiocb->ki_pos at execution time Dylan Yudaken
2022-02-23 23:06   ` Pavel Begunkov
2022-02-22 10:55 ` [PATCH v3 3/4] io_uring: do not recalculate ppos unnecessarily Dylan Yudaken
2022-02-23 23:07   ` Pavel Begunkov
2022-02-22 10:55 ` Dylan Yudaken [this message]
2022-02-23 23:51 ` [PATCH v3 0/4] io_uring: consistent behaviour with linked read/write Jens Axboe
2022-02-24 10:45   ` Dylan Yudaken

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8954d82def3 dfblob:adb15234e53 )
 OR (
bs:"[PATCH v3 4/4] io_uring: pre-increment f_pos on rw" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox