[PATCHSET 0/2] io_uring: handle short reads internally

public inbox for [email protected]
 help / color / mirror / Atom feed

* [PATCHSET 0/2] io_uring: handle short reads internally
@ 2020-08-13 17:56 Jens Axboe
  2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: david, jmoyer

Since we've had a few cases of applications not dealing with this
appopriately, I believe the safest course of action is to ensure that
we don't return short reads when we really don't have to.

The first patch is just a prep patch that retains iov_iter state over
retries, while the second one actually enables just doing retries if
we get a short read back.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls
  2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe
@ 2020-08-13 17:56 ` Jens Axboe
  2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe
  2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer
  2 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: david, jmoyer, Jens Axboe

Instead of maintaining (and setting/remembering) iov_iter size and
segment counts, just put the iov_iter in the async part of the IO
structure.

This is mostly a preparation patch for doing appropriate internal retries
for short reads, but it also cleans up the state handling nicely and
simplifies it quite a bit.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 118 ++++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 62 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1ec25ee71372..a20fccf91d76 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -508,9 +508,7 @@ struct io_async_msghdr {
 
 struct io_async_rw {
 	struct iovec			fast_iov[UIO_FASTIOV];
-	struct iovec			*iov;
-	ssize_t				nr_segs;
-	ssize_t				size;
+	struct iov_iter			iter;
 	struct wait_page_queue		wpq;
 };
 
@@ -915,9 +913,8 @@ static void io_file_put_work(struct work_struct *work);
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 			       struct iovec **iovec, struct iov_iter *iter,
 			       bool needs_lock);
-static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
-			     struct iovec *iovec, struct iovec *fast_iov,
-			     struct iov_iter *iter);
+static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
+			     struct iovec *fast_iov, struct iov_iter *iter);
 
 static struct kmem_cache *req_cachep;
 
@@ -2299,7 +2296,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 	ret = io_import_iovec(rw, req, &iovec, &iter, false);
 	if (ret < 0)
 		goto end_req;
-	ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+	ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
 	if (!ret)
 		return true;
 	kfree(iovec);
@@ -2830,6 +2827,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 	if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
 		return -EINVAL;
 
+	if (req->io) {
+		struct io_async_rw *iorw = &req->io->rw;
+
+		*iovec = NULL;
+		return iov_iter_count(&iorw->iter);
+	}
+
 	if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
 		if (req->flags & REQ_F_BUFFER_SELECT) {
 			buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
@@ -2845,14 +2849,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 		return ret < 0 ? ret : sqe_len;
 	}
 
-	if (req->io) {
-		struct io_async_rw *iorw = &req->io->rw;
-
-		iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
-		*iovec = NULL;
-		return iorw->size;
-	}
-
 	if (req->flags & REQ_F_BUFFER_SELECT) {
 		ret = io_iov_buffer_select(req, *iovec, needs_lock);
 		if (!ret) {
@@ -2930,21 +2926,19 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
 	return ret;
 }
 
-static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
-			  struct iovec *iovec, struct iovec *fast_iov,
-			  struct iov_iter *iter)
+static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
+			  struct iovec *fast_iov, struct iov_iter *iter)
 {
 	struct io_async_rw *rw = &req->io->rw;
 
-	rw->nr_segs = iter->nr_segs;
-	rw->size = io_size;
+	memcpy(&rw->iter, iter, sizeof(*iter));
 	if (!iovec) {
-		rw->iov = rw->fast_iov;
-		if (rw->iov != fast_iov)
-			memcpy(rw->iov, fast_iov,
+		rw->iter.iov = rw->fast_iov;
+		if (rw->iter.iov != fast_iov)
+			memcpy((void *) rw->iter.iov, fast_iov,
 			       sizeof(struct iovec) * iter->nr_segs);
 	} else {
-		rw->iov = iovec;
+		rw->iter.iov = iovec;
 		req->flags |= REQ_F_NEED_CLEANUP;
 	}
 }
@@ -2963,9 +2957,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
 	return  __io_alloc_async_ctx(req);
 }
 
-static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
-			     struct iovec *iovec, struct iovec *fast_iov,
-			     struct iov_iter *iter)
+static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
+			     struct iovec *fast_iov, struct iov_iter *iter)
 {
 	if (!io_op_defs[req->opcode].async_ctx)
 		return 0;
@@ -2973,7 +2966,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 		if (__io_alloc_async_ctx(req))
 			return -ENOMEM;
 
-		io_req_map_rw(req, io_size, iovec, fast_iov, iter);
+		io_req_map_rw(req, iovec, fast_iov, iter);
 	}
 	return 0;
 }
@@ -2981,18 +2974,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
 				   bool force_nonblock)
 {
-	struct io_async_ctx *io = req->io;
-	struct iov_iter iter;
+	struct io_async_rw *iorw = &req->io->rw;
 	ssize_t ret;
 
-	io->rw.iov = io->rw.fast_iov;
+	iorw->iter.iov = iorw->fast_iov;
+	/* reset ->io around the iovec import, we don't want to use it */
 	req->io = NULL;
-	ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
-	req->io = io;
+	ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
+				&iorw->iter, !force_nonblock);
+	req->io = container_of(iorw, struct io_async_ctx, rw);
 	if (unlikely(ret < 0))
 		return ret;
 
-	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+	io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter);
 	return 0;
 }
 
@@ -3090,7 +3084,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
  * succeed, or in rare cases where it fails, we then fall back to using the
  * async worker threads for a blocking retry.
  */
-static bool io_rw_should_retry(struct io_kiocb *req)
+static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
+			       struct iovec *fast_iov, struct iov_iter *iter)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
 	int ret;
@@ -3113,8 +3108,11 @@ static bool io_rw_should_retry(struct io_kiocb *req)
 	 * If request type doesn't require req->io to defer in general,
 	 * we need to allocate it here
 	 */
-	if (!req->io && __io_alloc_async_ctx(req))
-		return false;
+	if (!req->io) {
+		if (__io_alloc_async_ctx(req))
+			return false;
+		io_req_map_rw(req, iovec, fast_iov, iter);
+	}
 
 	ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
 						io_async_buf_func, req);
@@ -3141,12 +3139,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
-	struct iov_iter iter;
+	struct iov_iter __iter, *iter = &__iter;
 	size_t iov_count;
 	ssize_t io_size, ret, ret2;
-	unsigned long nr_segs;
 
-	ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock);
+	if (req->io)
+		iter = &req->io->rw.iter;
+
+	ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
 	if (ret < 0)
 		return ret;
 	io_size = ret;
@@ -3160,30 +3160,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 	if (force_nonblock && !io_file_supports_async(req->file, READ))
 		goto copy_iov;
 
-	iov_count = iov_iter_count(&iter);
-	nr_segs = iter.nr_segs;
+	iov_count = iov_iter_count(iter);
 	ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
 	if (unlikely(ret))
 		goto out_free;
 
-	ret2 = io_iter_do_read(req, &iter);
+	ret2 = io_iter_do_read(req, iter);
 
 	/* Catch -EAGAIN return for forced non-blocking submission */
 	if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
 		kiocb_done(kiocb, ret2, cs);
 	} else {
-		iter.count = iov_count;
-		iter.nr_segs = nr_segs;
 copy_iov:
-		ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
-					&iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
 		iovec = NULL;
 		/* if we can retry, do so with the callbacks armed */
-		if (io_rw_should_retry(req)) {
-			ret2 = io_iter_do_read(req, &iter);
+		if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
+			ret2 = io_iter_do_read(req, iter);
 			if (ret2 == -EIOCBQUEUED) {
 				goto out_free;
 			} else if (ret2 != -EAGAIN) {
@@ -3223,12 +3219,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
-	struct iov_iter iter;
+	struct iov_iter __iter, *iter = &__iter;
 	size_t iov_count;
 	ssize_t ret, ret2, io_size;
-	unsigned long nr_segs;
 
-	ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock);
+	if (req->io)
+		iter = &req->io->rw.iter;
+
+	ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
 	if (ret < 0)
 		return ret;
 	io_size = ret;
@@ -3247,8 +3245,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 	    (req->flags & REQ_F_ISREG))
 		goto copy_iov;
 
-	iov_count = iov_iter_count(&iter);
-	nr_segs = iter.nr_segs;
+	iov_count = iov_iter_count(iter);
 	ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
 	if (unlikely(ret))
 		goto out_free;
@@ -3269,9 +3266,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 	kiocb->ki_flags |= IOCB_WRITE;
 
 	if (req->file->f_op->write_iter)
-		ret2 = call_write_iter(req->file, kiocb, &iter);
+		ret2 = call_write_iter(req->file, kiocb, iter);
 	else if (req->file->f_op->write)
-		ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+		ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
 	else
 		ret2 = -EINVAL;
 
@@ -3284,11 +3281,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 	if (!force_nonblock || ret2 != -EAGAIN) {
 		kiocb_done(kiocb, ret2, cs);
 	} else {
-		iter.count = iov_count;
-		iter.nr_segs = nr_segs;
 copy_iov:
-		ret = io_setup_async_rw(req, io_size, iovec, inline_vecs,
-					&iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
@@ -5583,8 +5577,8 @@ static void __io_clean_op(struct io_kiocb *req)
 		case IORING_OP_WRITEV:
 		case IORING_OP_WRITE_FIXED:
 		case IORING_OP_WRITE:
-			if (io->rw.iov != io->rw.fast_iov)
-				kfree(io->rw.iov);
+			if (io->rw.iter.iov != io->rw.fast_iov)
+				kfree(io->rw.iter.iov);
 			break;
 		case IORING_OP_RECVMSG:
 		case IORING_OP_SENDMSG:
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] io_uring: internally retry short reads
  2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe
  2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe
@ 2020-08-13 17:56 ` Jens Axboe
  2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer
  2 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: david, jmoyer, Jens Axboe

We've had a few application cases of not handling short reads properly,
and it is understandable as short reads aren't really expected if the
application isn't doing non-blocking IO.

Now that we retain the iov_iter over retries, we can implement internal
retry pretty trivially. This ensures that we don't return a short read,
even for buffered reads on page cache conflicts.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 72 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a20fccf91d76..c6a9f1b11e85 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -509,6 +509,7 @@ struct io_async_msghdr {
 struct io_async_rw {
 	struct iovec			fast_iov[UIO_FASTIOV];
 	struct iov_iter			iter;
+	size_t				bytes_done;
 	struct wait_page_queue		wpq;
 };
 
@@ -914,7 +915,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 			       struct iovec **iovec, struct iov_iter *iter,
 			       bool needs_lock);
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iovec *fast_iov, struct iov_iter *iter);
+			     struct iovec *fast_iov, struct iov_iter *iter,
+			     bool force);
 
 static struct kmem_cache *req_cachep;
 
@@ -2296,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 	ret = io_import_iovec(rw, req, &iovec, &iter, false);
 	if (ret < 0)
 		goto end_req;
-	ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
+	ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
 	if (!ret)
 		return true;
 	kfree(iovec);
@@ -2586,6 +2588,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
+	/* add previously done IO, if any */
+	if (req->io && req->io->rw.bytes_done > 0) {
+		if (ret < 0)
+			ret = req->io->rw.bytes_done;
+		else
+			ret += req->io->rw.bytes_done;
+	}
+
 	if (req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
@@ -2932,6 +2942,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
 	struct io_async_rw *rw = &req->io->rw;
 
 	memcpy(&rw->iter, iter, sizeof(*iter));
+	rw->bytes_done = 0;
 	if (!iovec) {
 		rw->iter.iov = rw->fast_iov;
 		if (rw->iter.iov != fast_iov)
@@ -2958,9 +2969,10 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
 }
 
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iovec *fast_iov, struct iov_iter *iter)
+			     struct iovec *fast_iov, struct iov_iter *iter,
+			     bool force)
 {
-	if (!io_op_defs[req->opcode].async_ctx)
+	if (!force && !io_op_defs[req->opcode].async_ctx)
 		return 0;
 	if (!req->io) {
 		if (__io_alloc_async_ctx(req))
@@ -3084,8 +3096,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
  * succeed, or in rare cases where it fails, we then fall back to using the
  * async worker threads for a blocking retry.
  */
-static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
-			       struct iovec *fast_iov, struct iov_iter *iter)
+static bool io_rw_should_retry(struct io_kiocb *req)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
 	int ret;
@@ -3094,8 +3105,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (req->flags & REQ_F_NOWAIT)
 		return false;
 
-	/* already tried, or we're doing O_DIRECT */
-	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+	/* Only for buffered IO */
+	if (kiocb->ki_flags & IOCB_DIRECT)
 		return false;
 	/*
 	 * just use poll if we can, and don't attempt if the fs doesn't
@@ -3104,16 +3115,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
 		return false;
 
-	/*
-	 * If request type doesn't require req->io to defer in general,
-	 * we need to allocate it here
-	 */
-	if (!req->io) {
-		if (__io_alloc_async_ctx(req))
-			return false;
-		io_req_map_rw(req, iovec, fast_iov, iter);
-	}
-
 	ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
 						io_async_buf_func, req);
 	if (!ret) {
@@ -3167,29 +3168,46 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 
 	ret2 = io_iter_do_read(req, iter);
 
-	/* Catch -EAGAIN return for forced non-blocking submission */
-	if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
-		kiocb_done(kiocb, ret2, cs);
-	} else {
+	if (!iov_iter_count(iter))
+		goto done;
+
+	if (ret2 >= 0) {
+		/* successful read, but partial */
+		if (req->flags & REQ_F_NOWAIT)
+			goto done;
+		io_size -= ret2;
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
 		iovec = NULL;
+		/* now use our persistent iterator, if we aren't already */
+		iter = &req->io->rw.iter;
+
+		if (ret2 > 0) {
+retry:
+			req->io->rw.bytes_done += ret2;
+		}
 		/* if we can retry, do so with the callbacks armed */
-		if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
+		if (io_rw_should_retry(req)) {
 			ret2 = io_iter_do_read(req, iter);
 			if (ret2 == -EIOCBQUEUED) {
 				goto out_free;
+			} else if (ret2 == io_size) {
+				goto done;
 			} else if (ret2 != -EAGAIN) {
-				kiocb_done(kiocb, ret2, cs);
-				goto out_free;
+				/* we got some bytes, but not all. retry. */
+				if (ret2 > 0)
+					goto retry;
+				goto done;
 			}
 		}
 		kiocb->ki_flags &= ~IOCB_WAITQ;
 		return -EAGAIN;
 	}
+done:
+	kiocb_done(kiocb, ret2, cs);
 out_free:
 	if (iovec)
 		kfree(iovec);
@@ -3282,7 +3300,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 		kiocb_done(kiocb, ret2, cs);
 	} else {
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe
  2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe
  2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe
@ 2020-08-13 20:25 ` Jeff Moyer
  2020-08-13 20:33   ` Jens Axboe
  2 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-13 20:25 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

> Since we've had a few cases of applications not dealing with this
> appopriately, I believe the safest course of action is to ensure that
> we don't return short reads when we really don't have to.
>
> The first patch is just a prep patch that retains iov_iter state over
> retries, while the second one actually enables just doing retries if
> we get a short read back.

Have you run this through the liburing regression tests?

Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed

I'll take a look at the failures, but wanted to bring it to your
attention sooner rather than later.  I was using your io_uring-5.9
branch.

-Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer
@ 2020-08-13 20:33   ` Jens Axboe
  2020-08-13 20:37     ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 20:33 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/13/20 2:25 PM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>> Since we've had a few cases of applications not dealing with this
>> appopriately, I believe the safest course of action is to ensure that
>> we don't return short reads when we really don't have to.
>>
>> The first patch is just a prep patch that retains iov_iter state over
>> retries, while the second one actually enables just doing retries if
>> we get a short read back.
> 
> Have you run this through the liburing regression tests?
> 
> Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed
> 
> I'll take a look at the failures, but wanted to bring it to your
> attention sooner rather than later.  I was using your io_uring-5.9
> branch.

The eed8b54e0df-test failure is known with this one, pretty sure it
was always racy, but I'm looking into it.

The timeout-overflow test needs fixing, it's just an ordering thing
with the batched completions done through submit. Not new with these
patches.

The read-write one I'm interested in, what did you run it on? And
what was the failure?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 20:33   ` Jens Axboe
@ 2020-08-13 20:37     ` Jens Axboe
  2020-08-13 20:41       ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 20:37 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/13/20 2:33 PM, Jens Axboe wrote:
> On 8/13/20 2:25 PM, Jeff Moyer wrote:
>> Jens Axboe <[email protected]> writes:
>>
>>> Since we've had a few cases of applications not dealing with this
>>> appopriately, I believe the safest course of action is to ensure that
>>> we don't return short reads when we really don't have to.
>>>
>>> The first patch is just a prep patch that retains iov_iter state over
>>> retries, while the second one actually enables just doing retries if
>>> we get a short read back.
>>
>> Have you run this through the liburing regression tests?
>>
>> Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed
>>
>> I'll take a look at the failures, but wanted to bring it to your
>> attention sooner rather than later.  I was using your io_uring-5.9
>> branch.
> 
> The eed8b54e0df-test failure is known with this one, pretty sure it
> was always racy, but I'm looking into it.
> 
> The timeout-overflow test needs fixing, it's just an ordering thing
> with the batched completions done through submit. Not new with these
> patches.
> 
> The read-write one I'm interested in, what did you run it on? And
> what was the failure?

BTW, what git sha did you run?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 20:37     ` Jens Axboe
@ 2020-08-13 20:41       ` Jens Axboe
  2020-08-13 20:49         ` Jeff Moyer
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 20:41 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/13/20 2:37 PM, Jens Axboe wrote:
> On 8/13/20 2:33 PM, Jens Axboe wrote:
>> On 8/13/20 2:25 PM, Jeff Moyer wrote:
>>> Jens Axboe <[email protected]> writes:
>>>
>>>> Since we've had a few cases of applications not dealing with this
>>>> appopriately, I believe the safest course of action is to ensure that
>>>> we don't return short reads when we really don't have to.
>>>>
>>>> The first patch is just a prep patch that retains iov_iter state over
>>>> retries, while the second one actually enables just doing retries if
>>>> we get a short read back.
>>>
>>> Have you run this through the liburing regression tests?
>>>
>>> Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed
>>>
>>> I'll take a look at the failures, but wanted to bring it to your
>>> attention sooner rather than later.  I was using your io_uring-5.9
>>> branch.
>>
>> The eed8b54e0df-test failure is known with this one, pretty sure it
>> was always racy, but I'm looking into it.
>>
>> The timeout-overflow test needs fixing, it's just an ordering thing
>> with the batched completions done through submit. Not new with these
>> patches.
>>
>> The read-write one I'm interested in, what did you run it on? And
>> what was the failure?
> 
> BTW, what git sha did you run?

I do see a failure with dm on that, I'll take a look.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 20:41       ` Jens Axboe
@ 2020-08-13 20:49         ` Jeff Moyer
  2020-08-13 22:08           ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-13 20:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

> On 8/13/20 2:37 PM, Jens Axboe wrote:
>> On 8/13/20 2:33 PM, Jens Axboe wrote:
>>> On 8/13/20 2:25 PM, Jeff Moyer wrote:
>>>> Jens Axboe <[email protected]> writes:
>>>>
>>>>> Since we've had a few cases of applications not dealing with this
>>>>> appopriately, I believe the safest course of action is to ensure that
>>>>> we don't return short reads when we really don't have to.
>>>>>
>>>>> The first patch is just a prep patch that retains iov_iter state over
>>>>> retries, while the second one actually enables just doing retries if
>>>>> we get a short read back.
>>>>
>>>> Have you run this through the liburing regression tests?
>>>>
>>>> Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed
>>>>
>>>> I'll take a look at the failures, but wanted to bring it to your
>>>> attention sooner rather than later.  I was using your io_uring-5.9
>>>> branch.
>>>
>>> The eed8b54e0df-test failure is known with this one, pretty sure it
>>> was always racy, but I'm looking into it.
>>>
>>> The timeout-overflow test needs fixing, it's just an ordering thing
>>> with the batched completions done through submit. Not new with these
>>> patches.

OK.

>>> The read-write one I'm interested in, what did you run it on? And
>>> what was the failure?
>> 
>> BTW, what git sha did you run?
>
> I do see a failure with dm on that, I'll take a look.

I ran it on a file system atop nvme with 8 poll queues.

liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20

The error I saw was:

Running test read-write:
Non-vectored IO not supported, skipping
cqe res -22, wanted 2048
test_buf_select_short vec failed
Test read-write failed with ret 1

But I don't think it was due to these two commits.

Thanks,
Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 20:49         ` Jeff Moyer
@ 2020-08-13 22:08           ` Jens Axboe
  2020-08-13 22:21             ` Jeff Moyer
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 22:08 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/13/20 2:49 PM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>> On 8/13/20 2:37 PM, Jens Axboe wrote:
>>> On 8/13/20 2:33 PM, Jens Axboe wrote:
>>>> On 8/13/20 2:25 PM, Jeff Moyer wrote:
>>>>> Jens Axboe <[email protected]> writes:
>>>>>
>>>>>> Since we've had a few cases of applications not dealing with this
>>>>>> appopriately, I believe the safest course of action is to ensure that
>>>>>> we don't return short reads when we really don't have to.
>>>>>>
>>>>>> The first patch is just a prep patch that retains iov_iter state over
>>>>>> retries, while the second one actually enables just doing retries if
>>>>>> we get a short read back.
>>>>>
>>>>> Have you run this through the liburing regression tests?
>>>>>
>>>>> Tests  <eeed8b54e0df-test> <timeout-overflow> <read-write> failed
>>>>>
>>>>> I'll take a look at the failures, but wanted to bring it to your
>>>>> attention sooner rather than later.  I was using your io_uring-5.9
>>>>> branch.
>>>>
>>>> The eed8b54e0df-test failure is known with this one, pretty sure it
>>>> was always racy, but I'm looking into it.
>>>>
>>>> The timeout-overflow test needs fixing, it's just an ordering thing
>>>> with the batched completions done through submit. Not new with these
>>>> patches.
> 
> OK.
> 
>>>> The read-write one I'm interested in, what did you run it on? And
>>>> what was the failure?
>>>
>>> BTW, what git sha did you run?
>>
>> I do see a failure with dm on that, I'll take a look.
> 
> I ran it on a file system atop nvme with 8 poll queues.
> 
> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20

Fixed it, and actually enabled a further cleanup.

> The error I saw was:
> 
> Running test read-write:
> Non-vectored IO not supported, skipping
> cqe res -22, wanted 2048
> test_buf_select_short vec failed
> Test read-write failed with ret 1
> 
> But I don't think it was due to these two commits.

Yeah don't think so either.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 22:08           ` Jens Axboe
@ 2020-08-13 22:21             ` Jeff Moyer
  2020-08-13 22:31               ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-13 22:21 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

>>>> BTW, what git sha did you run?
>>>
>>> I do see a failure with dm on that, I'll take a look.
>> 
>> I ran it on a file system atop nvme with 8 poll queues.
>> 
>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>
> Fixed it, and actually enabled a further cleanup.

Great, thanks!  Did you push that out somewhere?

-Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 22:21             ` Jeff Moyer
@ 2020-08-13 22:31               ` Jens Axboe
  2020-08-17 20:55                 ` Jeff Moyer
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-13 22:31 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/13/20 4:21 PM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>>>>> BTW, what git sha did you run?
>>>>
>>>> I do see a failure with dm on that, I'll take a look.
>>>
>>> I ran it on a file system atop nvme with 8 poll queues.
>>>
>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>>
>> Fixed it, and actually enabled a further cleanup.
> 
> Great, thanks!  Did you push that out somewhere?

It's pushed to io_uring-5.9, current sha is:

ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085

FWIW, the issue was just for fixed buffers. It's running through the
usual testing now.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-13 22:31               ` Jens Axboe
@ 2020-08-17 20:55                 ` Jeff Moyer
  2020-08-17 20:57                   ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-17 20:55 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

> On 8/13/20 4:21 PM, Jeff Moyer wrote:
>> Jens Axboe <[email protected]> writes:
>> 
>>>>>> BTW, what git sha did you run?
>>>>>
>>>>> I do see a failure with dm on that, I'll take a look.
>>>>
>>>> I ran it on a file system atop nvme with 8 poll queues.
>>>>
>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>>>
>>> Fixed it, and actually enabled a further cleanup.
>> 
>> Great, thanks!  Did you push that out somewhere?
>
> It's pushed to io_uring-5.9, current sha is:
>
> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085
>
> FWIW, the issue was just for fixed buffers. It's running through the
> usual testing now.

OK.  Since it was an unrelated problem, I was expecting a separate
commit for it.  What was the exact issue?  Is it something that needs
backporting to -stable?

-Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-17 20:55                 ` Jeff Moyer
@ 2020-08-17 20:57                   ` Jens Axboe
  2020-08-18 17:55                     ` Jeff Moyer
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-17 20:57 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/17/20 1:55 PM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>> On 8/13/20 4:21 PM, Jeff Moyer wrote:
>>> Jens Axboe <[email protected]> writes:
>>>
>>>>>>> BTW, what git sha did you run?
>>>>>>
>>>>>> I do see a failure with dm on that, I'll take a look.
>>>>>
>>>>> I ran it on a file system atop nvme with 8 poll queues.
>>>>>
>>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>>>>
>>>> Fixed it, and actually enabled a further cleanup.
>>>
>>> Great, thanks!  Did you push that out somewhere?
>>
>> It's pushed to io_uring-5.9, current sha is:
>>
>> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085
>>
>> FWIW, the issue was just for fixed buffers. It's running through the
>> usual testing now.
> 
> OK.  Since it was an unrelated problem, I was expecting a separate
> commit for it.  What was the exact issue?  Is it something that needs
> backporting to -stable?

No, it was a bug in the posted patch, so I just folded in the fix.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-17 20:57                   ` Jens Axboe
@ 2020-08-18 17:55                     ` Jeff Moyer
  2020-08-18 18:00                       ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-18 17:55 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

> On 8/17/20 1:55 PM, Jeff Moyer wrote:
>> Jens Axboe <[email protected]> writes:
>> 
>>> On 8/13/20 4:21 PM, Jeff Moyer wrote:
>>>> Jens Axboe <[email protected]> writes:
>>>>
>>>>>>>> BTW, what git sha did you run?
>>>>>>>
>>>>>>> I do see a failure with dm on that, I'll take a look.
>>>>>>
>>>>>> I ran it on a file system atop nvme with 8 poll queues.
>>>>>>
>>>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>>>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>>>>>
>>>>> Fixed it, and actually enabled a further cleanup.
>>>>
>>>> Great, thanks!  Did you push that out somewhere?
>>>
>>> It's pushed to io_uring-5.9, current sha is:
>>>
>>> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085
>>>
>>> FWIW, the issue was just for fixed buffers. It's running through the
>>> usual testing now.
>> 
>> OK.  Since it was an unrelated problem, I was expecting a separate
>> commit for it.  What was the exact issue?  Is it something that needs
>> backporting to -stable?
>
> No, it was a bug in the posted patch, so I just folded in the fix.

We must be hitting different problems, then.  I just tested your
5.7-stable branch (running the test suite from an xfs file system on an
nvme partition with polling enabled), and the read-write test fails:

Running test read-write:
Non-vectored IO not supported, skipping
cqe res -22, wanted 2048
test_buf_select_short vec failed
Test read-write failed with ret 1

That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad

-Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-18 17:55                     ` Jeff Moyer
@ 2020-08-18 18:00                       ` Jens Axboe
  2020-08-18 18:07                         ` Jeff Moyer
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-18 18:00 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/18/20 10:55 AM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>> On 8/17/20 1:55 PM, Jeff Moyer wrote:
>>> Jens Axboe <[email protected]> writes:
>>>
>>>> On 8/13/20 4:21 PM, Jeff Moyer wrote:
>>>>> Jens Axboe <[email protected]> writes:
>>>>>
>>>>>>>>> BTW, what git sha did you run?
>>>>>>>>
>>>>>>>> I do see a failure with dm on that, I'll take a look.
>>>>>>>
>>>>>>> I ran it on a file system atop nvme with 8 poll queues.
>>>>>>>
>>>>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158
>>>>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20
>>>>>>
>>>>>> Fixed it, and actually enabled a further cleanup.
>>>>>
>>>>> Great, thanks!  Did you push that out somewhere?
>>>>
>>>> It's pushed to io_uring-5.9, current sha is:
>>>>
>>>> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085
>>>>
>>>> FWIW, the issue was just for fixed buffers. It's running through the
>>>> usual testing now.
>>>
>>> OK.  Since it was an unrelated problem, I was expecting a separate
>>> commit for it.  What was the exact issue?  Is it something that needs
>>> backporting to -stable?
>>
>> No, it was a bug in the posted patch, so I just folded in the fix.
> 
> We must be hitting different problems, then.  I just tested your
> 5.7-stable branch (running the test suite from an xfs file system on an
> nvme partition with polling enabled), and the read-write test fails:
> 
> Running test read-write:
> Non-vectored IO not supported, skipping
> cqe res -22, wanted 2048
> test_buf_select_short vec failed
> Test read-write failed with ret 1
> 
> That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad

Not sure what this is, haven't seen that here and my regular liburing
runs include both xfs-on-nvme(with poll queues) as one of the test
points. Seems to me like there's two oddities in the above:

1) Saying that Non-vectored isn't supported, that is not true on 5.7.
   This is due to an -EINVAL return.
2) The test_buf_select_short_vec failure

I'll see if I can reproduce this. Anything special otherwise enabled?
Scheduler on the nvme device? nr_requests? XFS options?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-18 18:00                       ` Jens Axboe
@ 2020-08-18 18:07                         ` Jeff Moyer
  2020-08-18 18:10                           ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Moyer @ 2020-08-18 18:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, david

Jens Axboe <[email protected]> writes:

>> We must be hitting different problems, then.  I just tested your
>> 5.7-stable branch (running the test suite from an xfs file system on an
>> nvme partition with polling enabled), and the read-write test fails:
>> 
>> Running test read-write:
>> Non-vectored IO not supported, skipping
>> cqe res -22, wanted 2048
>> test_buf_select_short vec failed
>> Test read-write failed with ret 1
>> 
>> That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad
>
> Not sure what this is, haven't seen that here and my regular liburing
> runs include both xfs-on-nvme(with poll queues) as one of the test
> points. Seems to me like there's two oddities in the above:
>
> 1) Saying that Non-vectored isn't supported, that is not true on 5.7.
>    This is due to an -EINVAL return.
> 2) The test_buf_select_short_vec failure
>
> I'll see if I can reproduce this. Anything special otherwise enabled?
> Scheduler on the nvme device? nr_requests? XFS options?

No changes from defaults.

/dev/nvme0n1p1 on /mnt/test type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,noquota)

# xfs_info /dev/nvme0n1p1
meta-data=/dev/nvme0n1p1         isize=512    agcount=4, agsize=22893222 blks
         =                       sectsz=4096  attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1
data     =                       bsize=4096   blocks=91572885, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=44713, version=2
         =                       sectsz=4096  sunit=1 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0

# cat /sys/block/nvme0n1/queue/scheduler 
[none] mq-deadline kyber bfq 

# cat /sys/block/nvme0n1/queue/nr_requests 
1023

# cat /sys/module/nvme/parameters/poll_queues 
8

I'll see if I can figure out what's going on.

-Jeff


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCHSET 0/2] io_uring: handle short reads internally
  2020-08-18 18:07                         ` Jeff Moyer
@ 2020-08-18 18:10                           ` Jens Axboe
  0 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2020-08-18 18:10 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: io-uring, david

On 8/18/20 11:07 AM, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
> 
>>> We must be hitting different problems, then.  I just tested your
>>> 5.7-stable branch (running the test suite from an xfs file system on an
>>> nvme partition with polling enabled), and the read-write test fails:
>>>
>>> Running test read-write:
>>> Non-vectored IO not supported, skipping
>>> cqe res -22, wanted 2048
>>> test_buf_select_short vec failed
>>> Test read-write failed with ret 1
>>>
>>> That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad
>>
>> Not sure what this is, haven't seen that here and my regular liburing
>> runs include both xfs-on-nvme(with poll queues) as one of the test
>> points. Seems to me like there's two oddities in the above:
>>
>> 1) Saying that Non-vectored isn't supported, that is not true on 5.7.
>>    This is due to an -EINVAL return.
>> 2) The test_buf_select_short_vec failure
>>
>> I'll see if I can reproduce this. Anything special otherwise enabled?
>> Scheduler on the nvme device? nr_requests? XFS options?
> 
> No changes from defaults.
> 
> /dev/nvme0n1p1 on /mnt/test type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,noquota)
> 
> # xfs_info /dev/nvme0n1p1
> meta-data=/dev/nvme0n1p1         isize=512    agcount=4, agsize=22893222 blks
>          =                       sectsz=4096  attr=2, projid32bit=1
>          =                       crc=1        finobt=1, sparse=1, rmapbt=0
>          =                       reflink=1
> data     =                       bsize=4096   blocks=91572885, imaxpct=25
>          =                       sunit=0      swidth=0 blks
> naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
> log      =internal log           bsize=4096   blocks=44713, version=2
>          =                       sectsz=4096  sunit=1 blks, lazy-count=1
> realtime =none                   extsz=4096   blocks=0, rtextents=0
> 
> # cat /sys/block/nvme0n1/queue/scheduler 
> [none] mq-deadline kyber bfq 
> 
> # cat /sys/block/nvme0n1/queue/nr_requests 
> 1023
> 
> # cat /sys/module/nvme/parameters/poll_queues 
> 8
> 
> I'll see if I can figure out what's going on.

Thanks, I'll be a bit busy the next 24h, but let me know how it goes and I'll
dig into this too when I can.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCHSET v2 0/2] io_uring: handle short reads internally
@ 2020-08-14 19:54 Jens Axboe
  2020-08-14 19:54 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2020-08-14 19:54 UTC (permalink / raw)
  To: io-uring; +Cc: david, jmoyer

Hi,

Since we've had a few cases of applications not dealing with this
appopriately, I believe the safest course of action is to ensure that
we don't return short reads when we really don't have to.

The first patch is just a prep patch that retains iov_iter state over
retries, while the second one actually enables just doing retries if
we get a short read back.

This passes all my testing, both liburing regression tests but also
tests that explicitly trigger internal short reads and hence retry
based on current state. No short reads are passed back to the
application.

Changes since v1:

- Fixed an issue with fixed/registered buffers
- Rewrite io_read() code flow to be a lot more readable
- Add comments

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/2] io_uring: internally retry short reads
  2020-08-14 19:54 [PATCHSET v2 " Jens Axboe
@ 2020-08-14 19:54 ` Jens Axboe
  0 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2020-08-14 19:54 UTC (permalink / raw)
  To: io-uring; +Cc: david, jmoyer, Jens Axboe

We've had a few application cases of not handling short reads properly,
and it is understandable as short reads aren't really expected if the
application isn't doing non-blocking IO.

Now that we retain the iov_iter over retries, we can implement internal
retry pretty trivially. This ensures that we don't return a short read,
even for buffered reads on page cache conflicts.

Cleanup the deep nesting and hard to read nature of io_read() as well,
it's much more straight forward now to read and understand. Added a
few comments explaining the logic as well.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 111 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 71 insertions(+), 40 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 409cfa1d6d90..e1aacc0b9bd0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -509,6 +509,7 @@ struct io_async_msghdr {
 struct io_async_rw {
 	struct iovec			fast_iov[UIO_FASTIOV];
 	struct iov_iter			iter;
+	size_t				bytes_done;
 	struct wait_page_queue		wpq;
 };
 
@@ -914,7 +915,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 			       struct iovec **iovec, struct iov_iter *iter,
 			       bool needs_lock);
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iov_iter *iter);
+			     struct iov_iter *iter, bool force);
 
 static struct kmem_cache *req_cachep;
 
@@ -2296,7 +2297,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 	ret = io_import_iovec(rw, req, &iovec, &iter, false);
 	if (ret < 0)
 		goto end_req;
-	ret = io_setup_async_rw(req, iovec, &iter);
+	ret = io_setup_async_rw(req, iovec, &iter, false);
 	if (!ret)
 		return true;
 	kfree(iovec);
@@ -2586,6 +2587,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
+	/* add previously done IO, if any */
+	if (req->io && req->io->rw.bytes_done > 0) {
+		if (ret < 0)
+			ret = req->io->rw.bytes_done;
+		else
+			ret += req->io->rw.bytes_done;
+	}
+
 	if (req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
@@ -2932,6 +2941,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
 	struct io_async_rw *rw = &req->io->rw;
 
 	memcpy(&rw->iter, iter, sizeof(*iter));
+	rw->bytes_done = 0;
 	if (!iovec) {
 		rw->iter.iov = rw->fast_iov;
 		if (rw->fast_iov != iter->iov)
@@ -2958,9 +2968,9 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
 }
 
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iov_iter *iter)
+			     struct iov_iter *iter, bool force)
 {
-	if (!io_op_defs[req->opcode].async_ctx)
+	if (!force && !io_op_defs[req->opcode].async_ctx)
 		return 0;
 	if (!req->io) {
 		if (__io_alloc_async_ctx(req))
@@ -3084,8 +3094,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
  * succeed, or in rare cases where it fails, we then fall back to using the
  * async worker threads for a blocking retry.
  */
-static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
-			       struct iovec *fast_iov, struct iov_iter *iter)
+static bool io_rw_should_retry(struct io_kiocb *req)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
 	int ret;
@@ -3094,8 +3103,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (req->flags & REQ_F_NOWAIT)
 		return false;
 
-	/* already tried, or we're doing O_DIRECT */
-	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+	/* Only for buffered IO */
+	if (kiocb->ki_flags & IOCB_DIRECT)
 		return false;
 	/*
 	 * just use poll if we can, and don't attempt if the fs doesn't
@@ -3104,16 +3113,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
 		return false;
 
-	/*
-	 * If request type doesn't require req->io to defer in general,
-	 * we need to allocate it here
-	 */
-	if (!req->io) {
-		if (__io_alloc_async_ctx(req))
-			return false;
-		io_req_map_rw(req, iovec, iter);
-	}
-
 	ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
 						io_async_buf_func, req);
 	if (!ret) {
@@ -3140,8 +3139,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct iov_iter __iter, *iter = &__iter;
+	ssize_t io_size, ret, ret2;
 	size_t iov_count;
-	ssize_t io_size, ret, ret2 = 0;
 
 	if (req->io)
 		iter = &req->io->rw.iter;
@@ -3151,6 +3150,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 		return ret;
 	io_size = ret;
 	req->result = io_size;
+	ret = 0;
 
 	/* Ensure we clear previously set non-block flag */
 	if (!force_nonblock)
@@ -3165,31 +3165,62 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 	if (unlikely(ret))
 		goto out_free;
 
-	ret2 = io_iter_do_read(req, iter);
+	ret = io_iter_do_read(req, iter);
 
-	/* Catch -EAGAIN return for forced non-blocking submission */
-	if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
-		kiocb_done(kiocb, ret2, cs);
-	} else {
+	if (!ret) {
+		goto done;
+	} else if (ret == -EIOCBQUEUED) {
+		ret = 0;
+		goto out_free;
+	} else if (ret == -EAGAIN) {
+		ret2 = io_setup_async_rw(req, iovec, iter, false);
+		if (ret2 < 0)
+			ret = ret2;
+		goto out_free;
+	} else if (ret < 0) {
+		goto out_free;
+	}
+
+	/* read it all, or we did blocking attempt. no retry. */
+	if (!iov_iter_count(iter) || !force_nonblock)
+		goto done;
+
+	io_size -= ret;
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, iter);
-		if (ret)
-			goto out_free;
-		/* it's copied and will be cleaned with ->io */
-		iovec = NULL;
-		/* if we can retry, do so with the callbacks armed */
-		if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
-			ret2 = io_iter_do_read(req, iter);
-			if (ret2 == -EIOCBQUEUED) {
-				goto out_free;
-			} else if (ret2 != -EAGAIN) {
-				kiocb_done(kiocb, ret2, cs);
-				goto out_free;
-			}
-		}
+	ret2 = io_setup_async_rw(req, iovec, iter, true);
+	if (ret2) {
+		ret = ret2;
+		goto out_free;
+	}
+	/* it's copied and will be cleaned with ->io */
+	iovec = NULL;
+	/* now use our persistent iterator, if we aren't already */
+	iter = &req->io->rw.iter;
+retry:
+	req->io->rw.bytes_done += ret;
+	/* if we can retry, do so with the callbacks armed */
+	if (!io_rw_should_retry(req)) {
 		kiocb->ki_flags &= ~IOCB_WAITQ;
 		return -EAGAIN;
 	}
+
+	/*
+	 * Now retry read with the IOCB_WAITQ parts set in the iocb. If we
+	 * get -EIOCBQUEUED, then we'll get a notification when the desired
+	 * page gets unlocked. We can also get a partial read here, and if we
+	 * do, then just retry at the new offset.
+	 */
+	ret = io_iter_do_read(req, iter);
+	if (ret == -EIOCBQUEUED) {
+		ret = 0;
+		goto out_free;
+	} else if (ret > 0 && ret < io_size) {
+		/* we got some bytes, but not all. retry. */
+		goto retry;
+	}
+done:
+	kiocb_done(kiocb, ret, cs);
+	ret = 0;
 out_free:
 	if (iovec)
 		kfree(iovec);
@@ -3282,7 +3313,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 		kiocb_done(kiocb, ret2, cs);
 	} else {
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, iter);
+		ret = io_setup_async_rw(req, iovec, iter, false);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-08-18 18:10 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe
2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe
2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe
2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer
2020-08-13 20:33   ` Jens Axboe
2020-08-13 20:37     ` Jens Axboe
2020-08-13 20:41       ` Jens Axboe
2020-08-13 20:49         ` Jeff Moyer
2020-08-13 22:08           ` Jens Axboe
2020-08-13 22:21             ` Jeff Moyer
2020-08-13 22:31               ` Jens Axboe
2020-08-17 20:55                 ` Jeff Moyer
2020-08-17 20:57                   ` Jens Axboe
2020-08-18 17:55                     ` Jeff Moyer
2020-08-18 18:00                       ` Jens Axboe
2020-08-18 18:07                         ` Jeff Moyer
2020-08-18 18:10                           ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2020-08-14 19:54 [PATCHSET v2 " Jens Axboe
2020-08-14 19:54 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox