* [PATCHSET 0/2] io_uring: handle short reads internally @ 2020-08-13 17:56 Jens Axboe 2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe ` (2 more replies) 0 siblings, 3 replies; 17+ messages in thread From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw) To: io-uring; +Cc: david, jmoyer Since we've had a few cases of applications not dealing with this appopriately, I believe the safest course of action is to ensure that we don't return short reads when we really don't have to. The first patch is just a prep patch that retains iov_iter state over retries, while the second one actually enables just doing retries if we get a short read back. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls 2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe @ 2020-08-13 17:56 ` Jens Axboe 2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe 2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer 2 siblings, 0 replies; 17+ messages in thread From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw) To: io-uring; +Cc: david, jmoyer, Jens Axboe Instead of maintaining (and setting/remembering) iov_iter size and segment counts, just put the iov_iter in the async part of the IO structure. This is mostly a preparation patch for doing appropriate internal retries for short reads, but it also cleans up the state handling nicely and simplifies it quite a bit. Signed-off-by: Jens Axboe <[email protected]> --- fs/io_uring.c | 118 ++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 62 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1ec25ee71372..a20fccf91d76 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -508,9 +508,7 @@ struct io_async_msghdr { struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; - struct iovec *iov; - ssize_t nr_segs; - ssize_t size; + struct iov_iter iter; struct wait_page_queue wpq; }; @@ -915,9 +913,8 @@ static void io_file_put_work(struct work_struct *work); static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter, bool needs_lock); -static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, - struct iov_iter *iter); +static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, + struct iovec *fast_iov, struct iov_iter *iter); static struct kmem_cache *req_cachep; @@ -2299,7 +2296,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ret = io_import_iovec(rw, req, &iovec, &iter, false); if (ret < 0) goto end_req; - ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter); if (!ret) return true; kfree(iovec); @@ -2830,6 +2827,13 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; + if (req->io) { + struct io_async_rw *iorw = &req->io->rw; + + *iovec = NULL; + return iov_iter_count(&iorw->iter); + } + if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { if (req->flags & REQ_F_BUFFER_SELECT) { buf = io_rw_buffer_select(req, &sqe_len, needs_lock); @@ -2845,14 +2849,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, return ret < 0 ? ret : sqe_len; } - if (req->io) { - struct io_async_rw *iorw = &req->io->rw; - - iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size); - *iovec = NULL; - return iorw->size; - } - if (req->flags & REQ_F_BUFFER_SELECT) { ret = io_iov_buffer_select(req, *iovec, needs_lock); if (!ret) { @@ -2930,21 +2926,19 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb, return ret; } -static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, - struct iov_iter *iter) +static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, + struct iovec *fast_iov, struct iov_iter *iter) { struct io_async_rw *rw = &req->io->rw; - rw->nr_segs = iter->nr_segs; - rw->size = io_size; + memcpy(&rw->iter, iter, sizeof(*iter)); if (!iovec) { - rw->iov = rw->fast_iov; - if (rw->iov != fast_iov) - memcpy(rw->iov, fast_iov, + rw->iter.iov = rw->fast_iov; + if (rw->iter.iov != fast_iov) + memcpy((void *) rw->iter.iov, fast_iov, sizeof(struct iovec) * iter->nr_segs); } else { - rw->iov = iovec; + rw->iter.iov = iovec; req->flags |= REQ_F_NEED_CLEANUP; } } @@ -2963,9 +2957,8 @@ static int io_alloc_async_ctx(struct io_kiocb *req) return __io_alloc_async_ctx(req); } -static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, - struct iovec *iovec, struct iovec *fast_iov, - struct iov_iter *iter) +static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, + struct iovec *fast_iov, struct iov_iter *iter) { if (!io_op_defs[req->opcode].async_ctx) return 0; @@ -2973,7 +2966,7 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, if (__io_alloc_async_ctx(req)) return -ENOMEM; - io_req_map_rw(req, io_size, iovec, fast_iov, iter); + io_req_map_rw(req, iovec, fast_iov, iter); } return 0; } @@ -2981,18 +2974,19 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, static inline int io_rw_prep_async(struct io_kiocb *req, int rw, bool force_nonblock) { - struct io_async_ctx *io = req->io; - struct iov_iter iter; + struct io_async_rw *iorw = &req->io->rw; ssize_t ret; - io->rw.iov = io->rw.fast_iov; + iorw->iter.iov = iorw->fast_iov; + /* reset ->io around the iovec import, we don't want to use it */ req->io = NULL; - ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock); - req->io = io; + ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, + &iorw->iter, !force_nonblock); + req->io = container_of(iorw, struct io_async_ctx, rw); if (unlikely(ret < 0)) return ret; - io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter); + io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter); return 0; } @@ -3090,7 +3084,8 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ -static bool io_rw_should_retry(struct io_kiocb *req) +static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, + struct iovec *fast_iov, struct iov_iter *iter) { struct kiocb *kiocb = &req->rw.kiocb; int ret; @@ -3113,8 +3108,11 @@ static bool io_rw_should_retry(struct io_kiocb *req) * If request type doesn't require req->io to defer in general, * we need to allocate it here */ - if (!req->io && __io_alloc_async_ctx(req)) - return false; + if (!req->io) { + if (__io_alloc_async_ctx(req)) + return false; + io_req_map_rw(req, iovec, fast_iov, iter); + } ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, io_async_buf_func, req); @@ -3141,12 +3139,14 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; - struct iov_iter iter; + struct iov_iter __iter, *iter = &__iter; size_t iov_count; ssize_t io_size, ret, ret2; - unsigned long nr_segs; - ret = io_import_iovec(READ, req, &iovec, &iter, !force_nonblock); + if (req->io) + iter = &req->io->rw.iter; + + ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; io_size = ret; @@ -3160,30 +3160,26 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (force_nonblock && !io_file_supports_async(req->file, READ)) goto copy_iov; - iov_count = iov_iter_count(&iter); - nr_segs = iter.nr_segs; + iov_count = iov_iter_count(iter); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); if (unlikely(ret)) goto out_free; - ret2 = io_iter_do_read(req, &iter); + ret2 = io_iter_do_read(req, iter); /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { kiocb_done(kiocb, ret2, cs); } else { - iter.count = iov_count; - iter.nr_segs = nr_segs; copy_iov: - ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, - &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ iovec = NULL; /* if we can retry, do so with the callbacks armed */ - if (io_rw_should_retry(req)) { - ret2 = io_iter_do_read(req, &iter); + if (io_rw_should_retry(req, iovec, inline_vecs, iter)) { + ret2 = io_iter_do_read(req, iter); if (ret2 == -EIOCBQUEUED) { goto out_free; } else if (ret2 != -EAGAIN) { @@ -3223,12 +3219,14 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; - struct iov_iter iter; + struct iov_iter __iter, *iter = &__iter; size_t iov_count; ssize_t ret, ret2, io_size; - unsigned long nr_segs; - ret = io_import_iovec(WRITE, req, &iovec, &iter, !force_nonblock); + if (req->io) + iter = &req->io->rw.iter; + + ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; io_size = ret; @@ -3247,8 +3245,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, (req->flags & REQ_F_ISREG)) goto copy_iov; - iov_count = iov_iter_count(&iter); - nr_segs = iter.nr_segs; + iov_count = iov_iter_count(iter); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); if (unlikely(ret)) goto out_free; @@ -3269,9 +3266,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb->ki_flags |= IOCB_WRITE; if (req->file->f_op->write_iter) - ret2 = call_write_iter(req->file, kiocb, &iter); + ret2 = call_write_iter(req->file, kiocb, iter); else if (req->file->f_op->write) - ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); + ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter); else ret2 = -EINVAL; @@ -3284,11 +3281,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2, cs); } else { - iter.count = iov_count; - iter.nr_segs = nr_segs; copy_iov: - ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, - &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ @@ -5583,8 +5577,8 @@ static void __io_clean_op(struct io_kiocb *req) case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - if (io->rw.iov != io->rw.fast_iov) - kfree(io->rw.iov); + if (io->rw.iter.iov != io->rw.fast_iov) + kfree(io->rw.iter.iov); break; case IORING_OP_RECVMSG: case IORING_OP_SENDMSG: -- 2.28.0 ^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 2/2] io_uring: internally retry short reads 2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe 2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe @ 2020-08-13 17:56 ` Jens Axboe 2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer 2 siblings, 0 replies; 17+ messages in thread From: Jens Axboe @ 2020-08-13 17:56 UTC (permalink / raw) To: io-uring; +Cc: david, jmoyer, Jens Axboe We've had a few application cases of not handling short reads properly, and it is understandable as short reads aren't really expected if the application isn't doing non-blocking IO. Now that we retain the iov_iter over retries, we can implement internal retry pretty trivially. This ensures that we don't return a short read, even for buffered reads on page cache conflicts. Signed-off-by: Jens Axboe <[email protected]> --- fs/io_uring.c | 72 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a20fccf91d76..c6a9f1b11e85 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -509,6 +509,7 @@ struct io_async_msghdr { struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; struct iov_iter iter; + size_t bytes_done; struct wait_page_queue wpq; }; @@ -914,7 +915,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter, bool needs_lock); static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter); + struct iovec *fast_iov, struct iov_iter *iter, + bool force); static struct kmem_cache *req_cachep; @@ -2296,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ret = io_import_iovec(rw, req, &iovec, &iter, false); if (ret < 0) goto end_req; - ret = io_setup_async_rw(req, iovec, inline_vecs, &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); if (!ret) return true; kfree(iovec); @@ -2586,6 +2588,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); + /* add previously done IO, if any */ + if (req->io && req->io->rw.bytes_done > 0) { + if (ret < 0) + ret = req->io->rw.bytes_done; + else + ret += req->io->rw.bytes_done; + } + if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; if (ret >= 0 && kiocb->ki_complete == io_complete_rw) @@ -2932,6 +2942,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, struct io_async_rw *rw = &req->io->rw; memcpy(&rw->iter, iter, sizeof(*iter)); + rw->bytes_done = 0; if (!iovec) { rw->iter.iov = rw->fast_iov; if (rw->iter.iov != fast_iov) @@ -2958,9 +2969,10 @@ static int io_alloc_async_ctx(struct io_kiocb *req) } static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter) + struct iovec *fast_iov, struct iov_iter *iter, + bool force) { - if (!io_op_defs[req->opcode].async_ctx) + if (!force && !io_op_defs[req->opcode].async_ctx) return 0; if (!req->io) { if (__io_alloc_async_ctx(req)) @@ -3084,8 +3096,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ -static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter) +static bool io_rw_should_retry(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; int ret; @@ -3094,8 +3105,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (req->flags & REQ_F_NOWAIT) return false; - /* already tried, or we're doing O_DIRECT */ - if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ)) + /* Only for buffered IO */ + if (kiocb->ki_flags & IOCB_DIRECT) return false; /* * just use poll if we can, and don't attempt if the fs doesn't @@ -3104,16 +3115,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) return false; - /* - * If request type doesn't require req->io to defer in general, - * we need to allocate it here - */ - if (!req->io) { - if (__io_alloc_async_ctx(req)) - return false; - io_req_map_rw(req, iovec, fast_iov, iter); - } - ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, io_async_buf_func, req); if (!ret) { @@ -3167,29 +3168,46 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret2 = io_iter_do_read(req, iter); - /* Catch -EAGAIN return for forced non-blocking submission */ - if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { - kiocb_done(kiocb, ret2, cs); - } else { + if (!iov_iter_count(iter)) + goto done; + + if (ret2 >= 0) { + /* successful read, but partial */ + if (req->flags & REQ_F_NOWAIT) + goto done; + io_size -= ret2; copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ iovec = NULL; + /* now use our persistent iterator, if we aren't already */ + iter = &req->io->rw.iter; + + if (ret2 > 0) { +retry: + req->io->rw.bytes_done += ret2; + } /* if we can retry, do so with the callbacks armed */ - if (io_rw_should_retry(req, iovec, inline_vecs, iter)) { + if (io_rw_should_retry(req)) { ret2 = io_iter_do_read(req, iter); if (ret2 == -EIOCBQUEUED) { goto out_free; + } else if (ret2 == io_size) { + goto done; } else if (ret2 != -EAGAIN) { - kiocb_done(kiocb, ret2, cs); - goto out_free; + /* we got some bytes, but not all. retry. */ + if (ret2 > 0) + goto retry; + goto done; } } kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; } +done: + kiocb_done(kiocb, ret2, cs); out_free: if (iovec) kfree(iovec); @@ -3282,7 +3300,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb_done(kiocb, ret2, cs); } else { copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ -- 2.28.0 ^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe 2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe 2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe @ 2020-08-13 20:25 ` Jeff Moyer 2020-08-13 20:33 ` Jens Axboe 2 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-13 20:25 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: > Since we've had a few cases of applications not dealing with this > appopriately, I believe the safest course of action is to ensure that > we don't return short reads when we really don't have to. > > The first patch is just a prep patch that retains iov_iter state over > retries, while the second one actually enables just doing retries if > we get a short read back. Have you run this through the liburing regression tests? Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed I'll take a look at the failures, but wanted to bring it to your attention sooner rather than later. I was using your io_uring-5.9 branch. -Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer @ 2020-08-13 20:33 ` Jens Axboe 2020-08-13 20:37 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-13 20:33 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/13/20 2:25 PM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >> Since we've had a few cases of applications not dealing with this >> appopriately, I believe the safest course of action is to ensure that >> we don't return short reads when we really don't have to. >> >> The first patch is just a prep patch that retains iov_iter state over >> retries, while the second one actually enables just doing retries if >> we get a short read back. > > Have you run this through the liburing regression tests? > > Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed > > I'll take a look at the failures, but wanted to bring it to your > attention sooner rather than later. I was using your io_uring-5.9 > branch. The eed8b54e0df-test failure is known with this one, pretty sure it was always racy, but I'm looking into it. The timeout-overflow test needs fixing, it's just an ordering thing with the batched completions done through submit. Not new with these patches. The read-write one I'm interested in, what did you run it on? And what was the failure? -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 20:33 ` Jens Axboe @ 2020-08-13 20:37 ` Jens Axboe 2020-08-13 20:41 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-13 20:37 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/13/20 2:33 PM, Jens Axboe wrote: > On 8/13/20 2:25 PM, Jeff Moyer wrote: >> Jens Axboe <[email protected]> writes: >> >>> Since we've had a few cases of applications not dealing with this >>> appopriately, I believe the safest course of action is to ensure that >>> we don't return short reads when we really don't have to. >>> >>> The first patch is just a prep patch that retains iov_iter state over >>> retries, while the second one actually enables just doing retries if >>> we get a short read back. >> >> Have you run this through the liburing regression tests? >> >> Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed >> >> I'll take a look at the failures, but wanted to bring it to your >> attention sooner rather than later. I was using your io_uring-5.9 >> branch. > > The eed8b54e0df-test failure is known with this one, pretty sure it > was always racy, but I'm looking into it. > > The timeout-overflow test needs fixing, it's just an ordering thing > with the batched completions done through submit. Not new with these > patches. > > The read-write one I'm interested in, what did you run it on? And > what was the failure? BTW, what git sha did you run? -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 20:37 ` Jens Axboe @ 2020-08-13 20:41 ` Jens Axboe 2020-08-13 20:49 ` Jeff Moyer 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-13 20:41 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/13/20 2:37 PM, Jens Axboe wrote: > On 8/13/20 2:33 PM, Jens Axboe wrote: >> On 8/13/20 2:25 PM, Jeff Moyer wrote: >>> Jens Axboe <[email protected]> writes: >>> >>>> Since we've had a few cases of applications not dealing with this >>>> appopriately, I believe the safest course of action is to ensure that >>>> we don't return short reads when we really don't have to. >>>> >>>> The first patch is just a prep patch that retains iov_iter state over >>>> retries, while the second one actually enables just doing retries if >>>> we get a short read back. >>> >>> Have you run this through the liburing regression tests? >>> >>> Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed >>> >>> I'll take a look at the failures, but wanted to bring it to your >>> attention sooner rather than later. I was using your io_uring-5.9 >>> branch. >> >> The eed8b54e0df-test failure is known with this one, pretty sure it >> was always racy, but I'm looking into it. >> >> The timeout-overflow test needs fixing, it's just an ordering thing >> with the batched completions done through submit. Not new with these >> patches. >> >> The read-write one I'm interested in, what did you run it on? And >> what was the failure? > > BTW, what git sha did you run? I do see a failure with dm on that, I'll take a look. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 20:41 ` Jens Axboe @ 2020-08-13 20:49 ` Jeff Moyer 2020-08-13 22:08 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-13 20:49 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: > On 8/13/20 2:37 PM, Jens Axboe wrote: >> On 8/13/20 2:33 PM, Jens Axboe wrote: >>> On 8/13/20 2:25 PM, Jeff Moyer wrote: >>>> Jens Axboe <[email protected]> writes: >>>> >>>>> Since we've had a few cases of applications not dealing with this >>>>> appopriately, I believe the safest course of action is to ensure that >>>>> we don't return short reads when we really don't have to. >>>>> >>>>> The first patch is just a prep patch that retains iov_iter state over >>>>> retries, while the second one actually enables just doing retries if >>>>> we get a short read back. >>>> >>>> Have you run this through the liburing regression tests? >>>> >>>> Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed >>>> >>>> I'll take a look at the failures, but wanted to bring it to your >>>> attention sooner rather than later. I was using your io_uring-5.9 >>>> branch. >>> >>> The eed8b54e0df-test failure is known with this one, pretty sure it >>> was always racy, but I'm looking into it. >>> >>> The timeout-overflow test needs fixing, it's just an ordering thing >>> with the batched completions done through submit. Not new with these >>> patches. OK. >>> The read-write one I'm interested in, what did you run it on? And >>> what was the failure? >> >> BTW, what git sha did you run? > > I do see a failure with dm on that, I'll take a look. I ran it on a file system atop nvme with 8 poll queues. liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 The error I saw was: Running test read-write: Non-vectored IO not supported, skipping cqe res -22, wanted 2048 test_buf_select_short vec failed Test read-write failed with ret 1 But I don't think it was due to these two commits. Thanks, Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 20:49 ` Jeff Moyer @ 2020-08-13 22:08 ` Jens Axboe 2020-08-13 22:21 ` Jeff Moyer 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-13 22:08 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/13/20 2:49 PM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >> On 8/13/20 2:37 PM, Jens Axboe wrote: >>> On 8/13/20 2:33 PM, Jens Axboe wrote: >>>> On 8/13/20 2:25 PM, Jeff Moyer wrote: >>>>> Jens Axboe <[email protected]> writes: >>>>> >>>>>> Since we've had a few cases of applications not dealing with this >>>>>> appopriately, I believe the safest course of action is to ensure that >>>>>> we don't return short reads when we really don't have to. >>>>>> >>>>>> The first patch is just a prep patch that retains iov_iter state over >>>>>> retries, while the second one actually enables just doing retries if >>>>>> we get a short read back. >>>>> >>>>> Have you run this through the liburing regression tests? >>>>> >>>>> Tests <eeed8b54e0df-test> <timeout-overflow> <read-write> failed >>>>> >>>>> I'll take a look at the failures, but wanted to bring it to your >>>>> attention sooner rather than later. I was using your io_uring-5.9 >>>>> branch. >>>> >>>> The eed8b54e0df-test failure is known with this one, pretty sure it >>>> was always racy, but I'm looking into it. >>>> >>>> The timeout-overflow test needs fixing, it's just an ordering thing >>>> with the batched completions done through submit. Not new with these >>>> patches. > > OK. > >>>> The read-write one I'm interested in, what did you run it on? And >>>> what was the failure? >>> >>> BTW, what git sha did you run? >> >> I do see a failure with dm on that, I'll take a look. > > I ran it on a file system atop nvme with 8 poll queues. > > liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 > linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 Fixed it, and actually enabled a further cleanup. > The error I saw was: > > Running test read-write: > Non-vectored IO not supported, skipping > cqe res -22, wanted 2048 > test_buf_select_short vec failed > Test read-write failed with ret 1 > > But I don't think it was due to these two commits. Yeah don't think so either. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 22:08 ` Jens Axboe @ 2020-08-13 22:21 ` Jeff Moyer 2020-08-13 22:31 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-13 22:21 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: >>>> BTW, what git sha did you run? >>> >>> I do see a failure with dm on that, I'll take a look. >> >> I ran it on a file system atop nvme with 8 poll queues. >> >> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 > > Fixed it, and actually enabled a further cleanup. Great, thanks! Did you push that out somewhere? -Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 22:21 ` Jeff Moyer @ 2020-08-13 22:31 ` Jens Axboe 2020-08-17 20:55 ` Jeff Moyer 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-13 22:31 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/13/20 4:21 PM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >>>>> BTW, what git sha did you run? >>>> >>>> I do see a failure with dm on that, I'll take a look. >>> >>> I ran it on a file system atop nvme with 8 poll queues. >>> >>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 >> >> Fixed it, and actually enabled a further cleanup. > > Great, thanks! Did you push that out somewhere? It's pushed to io_uring-5.9, current sha is: ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085 FWIW, the issue was just for fixed buffers. It's running through the usual testing now. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-13 22:31 ` Jens Axboe @ 2020-08-17 20:55 ` Jeff Moyer 2020-08-17 20:57 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-17 20:55 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: > On 8/13/20 4:21 PM, Jeff Moyer wrote: >> Jens Axboe <[email protected]> writes: >> >>>>>> BTW, what git sha did you run? >>>>> >>>>> I do see a failure with dm on that, I'll take a look. >>>> >>>> I ran it on a file system atop nvme with 8 poll queues. >>>> >>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 >>> >>> Fixed it, and actually enabled a further cleanup. >> >> Great, thanks! Did you push that out somewhere? > > It's pushed to io_uring-5.9, current sha is: > > ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085 > > FWIW, the issue was just for fixed buffers. It's running through the > usual testing now. OK. Since it was an unrelated problem, I was expecting a separate commit for it. What was the exact issue? Is it something that needs backporting to -stable? -Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-17 20:55 ` Jeff Moyer @ 2020-08-17 20:57 ` Jens Axboe 2020-08-18 17:55 ` Jeff Moyer 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-17 20:57 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/17/20 1:55 PM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >> On 8/13/20 4:21 PM, Jeff Moyer wrote: >>> Jens Axboe <[email protected]> writes: >>> >>>>>>> BTW, what git sha did you run? >>>>>> >>>>>> I do see a failure with dm on that, I'll take a look. >>>>> >>>>> I ran it on a file system atop nvme with 8 poll queues. >>>>> >>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 >>>> >>>> Fixed it, and actually enabled a further cleanup. >>> >>> Great, thanks! Did you push that out somewhere? >> >> It's pushed to io_uring-5.9, current sha is: >> >> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085 >> >> FWIW, the issue was just for fixed buffers. It's running through the >> usual testing now. > > OK. Since it was an unrelated problem, I was expecting a separate > commit for it. What was the exact issue? Is it something that needs > backporting to -stable? No, it was a bug in the posted patch, so I just folded in the fix. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-17 20:57 ` Jens Axboe @ 2020-08-18 17:55 ` Jeff Moyer 2020-08-18 18:00 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-18 17:55 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: > On 8/17/20 1:55 PM, Jeff Moyer wrote: >> Jens Axboe <[email protected]> writes: >> >>> On 8/13/20 4:21 PM, Jeff Moyer wrote: >>>> Jens Axboe <[email protected]> writes: >>>> >>>>>>>> BTW, what git sha did you run? >>>>>>> >>>>>>> I do see a failure with dm on that, I'll take a look. >>>>>> >>>>>> I ran it on a file system atop nvme with 8 poll queues. >>>>>> >>>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >>>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 >>>>> >>>>> Fixed it, and actually enabled a further cleanup. >>>> >>>> Great, thanks! Did you push that out somewhere? >>> >>> It's pushed to io_uring-5.9, current sha is: >>> >>> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085 >>> >>> FWIW, the issue was just for fixed buffers. It's running through the >>> usual testing now. >> >> OK. Since it was an unrelated problem, I was expecting a separate >> commit for it. What was the exact issue? Is it something that needs >> backporting to -stable? > > No, it was a bug in the posted patch, so I just folded in the fix. We must be hitting different problems, then. I just tested your 5.7-stable branch (running the test suite from an xfs file system on an nvme partition with polling enabled), and the read-write test fails: Running test read-write: Non-vectored IO not supported, skipping cqe res -22, wanted 2048 test_buf_select_short vec failed Test read-write failed with ret 1 That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad -Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-18 17:55 ` Jeff Moyer @ 2020-08-18 18:00 ` Jens Axboe 2020-08-18 18:07 ` Jeff Moyer 0 siblings, 1 reply; 17+ messages in thread From: Jens Axboe @ 2020-08-18 18:00 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/18/20 10:55 AM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >> On 8/17/20 1:55 PM, Jeff Moyer wrote: >>> Jens Axboe <[email protected]> writes: >>> >>>> On 8/13/20 4:21 PM, Jeff Moyer wrote: >>>>> Jens Axboe <[email protected]> writes: >>>>> >>>>>>>>> BTW, what git sha did you run? >>>>>>>> >>>>>>>> I do see a failure with dm on that, I'll take a look. >>>>>>> >>>>>>> I ran it on a file system atop nvme with 8 poll queues. >>>>>>> >>>>>>> liburing head: 9e1d69e078ee51f253a829ff421b17cfc996d158 >>>>>>> linux-block head: ff1353802d86a9d8e40ef1377efb12a1d3000a20 >>>>>> >>>>>> Fixed it, and actually enabled a further cleanup. >>>>> >>>>> Great, thanks! Did you push that out somewhere? >>>> >>>> It's pushed to io_uring-5.9, current sha is: >>>> >>>> ee6ac2d3d5cc50d58ca55a5967671c9c1f38b085 >>>> >>>> FWIW, the issue was just for fixed buffers. It's running through the >>>> usual testing now. >>> >>> OK. Since it was an unrelated problem, I was expecting a separate >>> commit for it. What was the exact issue? Is it something that needs >>> backporting to -stable? >> >> No, it was a bug in the posted patch, so I just folded in the fix. > > We must be hitting different problems, then. I just tested your > 5.7-stable branch (running the test suite from an xfs file system on an > nvme partition with polling enabled), and the read-write test fails: > > Running test read-write: > Non-vectored IO not supported, skipping > cqe res -22, wanted 2048 > test_buf_select_short vec failed > Test read-write failed with ret 1 > > That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad Not sure what this is, haven't seen that here and my regular liburing runs include both xfs-on-nvme(with poll queues) as one of the test points. Seems to me like there's two oddities in the above: 1) Saying that Non-vectored isn't supported, that is not true on 5.7. This is due to an -EINVAL return. 2) The test_buf_select_short_vec failure I'll see if I can reproduce this. Anything special otherwise enabled? Scheduler on the nvme device? nr_requests? XFS options? -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-18 18:00 ` Jens Axboe @ 2020-08-18 18:07 ` Jeff Moyer 2020-08-18 18:10 ` Jens Axboe 0 siblings, 1 reply; 17+ messages in thread From: Jeff Moyer @ 2020-08-18 18:07 UTC (permalink / raw) To: Jens Axboe; +Cc: io-uring, david Jens Axboe <[email protected]> writes: >> We must be hitting different problems, then. I just tested your >> 5.7-stable branch (running the test suite from an xfs file system on an >> nvme partition with polling enabled), and the read-write test fails: >> >> Running test read-write: >> Non-vectored IO not supported, skipping >> cqe res -22, wanted 2048 >> test_buf_select_short vec failed >> Test read-write failed with ret 1 >> >> That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad > > Not sure what this is, haven't seen that here and my regular liburing > runs include both xfs-on-nvme(with poll queues) as one of the test > points. Seems to me like there's two oddities in the above: > > 1) Saying that Non-vectored isn't supported, that is not true on 5.7. > This is due to an -EINVAL return. > 2) The test_buf_select_short_vec failure > > I'll see if I can reproduce this. Anything special otherwise enabled? > Scheduler on the nvme device? nr_requests? XFS options? No changes from defaults. /dev/nvme0n1p1 on /mnt/test type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,noquota) # xfs_info /dev/nvme0n1p1 meta-data=/dev/nvme0n1p1 isize=512 agcount=4, agsize=22893222 blks = sectsz=4096 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=0 = reflink=1 data = bsize=4096 blocks=91572885, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=44713, version=2 = sectsz=4096 sunit=1 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 # cat /sys/block/nvme0n1/queue/scheduler [none] mq-deadline kyber bfq # cat /sys/block/nvme0n1/queue/nr_requests 1023 # cat /sys/module/nvme/parameters/poll_queues 8 I'll see if I can figure out what's going on. -Jeff ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHSET 0/2] io_uring: handle short reads internally 2020-08-18 18:07 ` Jeff Moyer @ 2020-08-18 18:10 ` Jens Axboe 0 siblings, 0 replies; 17+ messages in thread From: Jens Axboe @ 2020-08-18 18:10 UTC (permalink / raw) To: Jeff Moyer; +Cc: io-uring, david On 8/18/20 11:07 AM, Jeff Moyer wrote: > Jens Axboe <[email protected]> writes: > >>> We must be hitting different problems, then. I just tested your >>> 5.7-stable branch (running the test suite from an xfs file system on an >>> nvme partition with polling enabled), and the read-write test fails: >>> >>> Running test read-write: >>> Non-vectored IO not supported, skipping >>> cqe res -22, wanted 2048 >>> test_buf_select_short vec failed >>> Test read-write failed with ret 1 >>> >>> That's with this head: a451911d530075352fbc7ef9bb2df68145a747ad >> >> Not sure what this is, haven't seen that here and my regular liburing >> runs include both xfs-on-nvme(with poll queues) as one of the test >> points. Seems to me like there's two oddities in the above: >> >> 1) Saying that Non-vectored isn't supported, that is not true on 5.7. >> This is due to an -EINVAL return. >> 2) The test_buf_select_short_vec failure >> >> I'll see if I can reproduce this. Anything special otherwise enabled? >> Scheduler on the nvme device? nr_requests? XFS options? > > No changes from defaults. > > /dev/nvme0n1p1 on /mnt/test type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,noquota) > > # xfs_info /dev/nvme0n1p1 > meta-data=/dev/nvme0n1p1 isize=512 agcount=4, agsize=22893222 blks > = sectsz=4096 attr=2, projid32bit=1 > = crc=1 finobt=1, sparse=1, rmapbt=0 > = reflink=1 > data = bsize=4096 blocks=91572885, imaxpct=25 > = sunit=0 swidth=0 blks > naming =version 2 bsize=4096 ascii-ci=0, ftype=1 > log =internal log bsize=4096 blocks=44713, version=2 > = sectsz=4096 sunit=1 blks, lazy-count=1 > realtime =none extsz=4096 blocks=0, rtextents=0 > > # cat /sys/block/nvme0n1/queue/scheduler > [none] mq-deadline kyber bfq > > # cat /sys/block/nvme0n1/queue/nr_requests > 1023 > > # cat /sys/module/nvme/parameters/poll_queues > 8 > > I'll see if I can figure out what's going on. Thanks, I'll be a bit busy the next 24h, but let me know how it goes and I'll dig into this too when I can. -- Jens Axboe ^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2020-08-18 18:10 UTC | newest] Thread overview: 17+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-08-13 17:56 [PATCHSET 0/2] io_uring: handle short reads internally Jens Axboe 2020-08-13 17:56 ` [PATCH 1/2] io_uring: retain iov_iter state over io_read/io_write calls Jens Axboe 2020-08-13 17:56 ` [PATCH 2/2] io_uring: internally retry short reads Jens Axboe 2020-08-13 20:25 ` [PATCHSET 0/2] io_uring: handle short reads internally Jeff Moyer 2020-08-13 20:33 ` Jens Axboe 2020-08-13 20:37 ` Jens Axboe 2020-08-13 20:41 ` Jens Axboe 2020-08-13 20:49 ` Jeff Moyer 2020-08-13 22:08 ` Jens Axboe 2020-08-13 22:21 ` Jeff Moyer 2020-08-13 22:31 ` Jens Axboe 2020-08-17 20:55 ` Jeff Moyer 2020-08-17 20:57 ` Jens Axboe 2020-08-18 17:55 ` Jeff Moyer 2020-08-18 18:00 ` Jens Axboe 2020-08-18 18:07 ` Jeff Moyer 2020-08-18 18:10 ` Jens Axboe
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox