public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCHSET] io_uring: never block for block based IO submit
@ 2020-06-04 17:48 Jens Axboe
  2020-06-04 17:48 ` [PATCH 1/4] block: provide plug based way of signaling forced no-wait semantics Jens Axboe
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-04 17:48 UTC (permalink / raw)
  To: io-uring

We still have a case where resource starvation can cause us to block.
I've been running with a debug patch to detect cases where an io_uring
task can go uninterruptibly to sleep, and this is the main one.

This patchset provides a way for io_uring to have a holding area for
requests that should get retried, and a way to signal to the block stack
that we should be attempting to alloate requests with REQ_NOWAIT. When
we finish the block plug, we re-issue any requests that failed to get
allocated.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] block: provide plug based way of signaling forced no-wait semantics
  2020-06-04 17:48 [PATCHSET] io_uring: never block for block based IO submit Jens Axboe
@ 2020-06-04 17:48 ` Jens Axboe
  2020-06-04 17:48 ` [PATCH 2/4] io_uring: always plug for any number of IOs Jens Axboe
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-04 17:48 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Provide a way for the caller to specify that IO should be marked
with REQ_NOWAIT to avoid blocking on allocation, as well as a list
head for caller use.

Signed-off-by: Jens Axboe <[email protected]>
---
 block/blk-core.c       | 6 ++++++
 include/linux/blkdev.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 03252af8c82c..62a4904db921 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -958,6 +958,7 @@ generic_make_request_checks(struct bio *bio)
 	struct request_queue *q;
 	int nr_sectors = bio_sectors(bio);
 	blk_status_t status = BLK_STS_IOERR;
+	struct blk_plug *plug;
 	char b[BDEVNAME_SIZE];
 
 	might_sleep();
@@ -971,6 +972,10 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	plug = blk_mq_plug(q, bio);
+	if (plug && plug->nowait)
+		bio->bi_opf |= REQ_NOWAIT;
+
 	/*
 	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
 	 * if queue is not a request based queue.
@@ -1800,6 +1805,7 @@ void blk_start_plug(struct blk_plug *plug)
 	INIT_LIST_HEAD(&plug->cb_list);
 	plug->rq_count = 0;
 	plug->multiple_queues = false;
+	plug->nowait = false;
 
 	/*
 	 * Store ordering should not be needed here, since a potential
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fd900998b4e..27887bf36d50 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1187,8 +1187,10 @@ extern void blk_set_queue_dying(struct request_queue *);
 struct blk_plug {
 	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
+	struct list_head nowait_list;	/* caller user */
 	unsigned short rq_count;
 	bool multiple_queues;
+	bool nowait;
 };
 #define BLK_MAX_REQUEST_COUNT 16
 #define BLK_PLUG_FLUSH_SIZE (128 * 1024)
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] io_uring: always plug for any number of IOs
  2020-06-04 17:48 [PATCHSET] io_uring: never block for block based IO submit Jens Axboe
  2020-06-04 17:48 ` [PATCH 1/4] block: provide plug based way of signaling forced no-wait semantics Jens Axboe
@ 2020-06-04 17:48 ` Jens Axboe
  2020-06-04 17:48 ` [PATCH 3/4] io_uring: catch -EIO from buffered issue request failure Jens Axboe
  2020-06-04 17:48 ` [PATCH 4/4] io_uring: re-issue plug based block requests that failed Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-04 17:48 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Currently we only plug if we're doing more than two request. We're going
to be relying on always having the plug there to pass down information,
so plug unconditionally.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 70f0f2f940fb..b468fe2e8792 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -669,7 +669,6 @@ struct io_kiocb {
 	};
 };
 
-#define IO_PLUG_THRESHOLD		2
 #define IO_IOPOLL_BATCH			8
 
 struct io_submit_state {
@@ -5910,7 +5909,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct file *ring_file, int ring_fd)
 {
-	struct io_submit_state state, *statep = NULL;
+	struct io_submit_state state;
 	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 
@@ -5927,10 +5926,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	if (nr > IO_PLUG_THRESHOLD) {
-		io_submit_state_start(&state, nr);
-		statep = &state;
-	}
+	io_submit_state_start(&state, nr);
 
 	ctx->ring_fd = ring_fd;
 	ctx->ring_file = ring_file;
@@ -5945,14 +5941,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			io_consume_sqe(ctx);
 			break;
 		}
-		req = io_alloc_req(ctx, statep);
+		req = io_alloc_req(ctx, &state);
 		if (unlikely(!req)) {
 			if (!submitted)
 				submitted = -EAGAIN;
 			break;
 		}
 
-		err = io_init_req(ctx, req, sqe, statep);
+		err = io_init_req(ctx, req, sqe, &state);
 		io_consume_sqe(ctx);
 		/* will complete beyond this point, count as submitted */
 		submitted++;
@@ -5978,8 +5974,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	}
 	if (link)
 		io_queue_link_head(link);
-	if (statep)
-		io_submit_state_end(&state);
+	io_submit_state_end(&state);
 
 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
 	io_commit_sqring(ctx);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] io_uring: catch -EIO from buffered issue request failure
  2020-06-04 17:48 [PATCHSET] io_uring: never block for block based IO submit Jens Axboe
  2020-06-04 17:48 ` [PATCH 1/4] block: provide plug based way of signaling forced no-wait semantics Jens Axboe
  2020-06-04 17:48 ` [PATCH 2/4] io_uring: always plug for any number of IOs Jens Axboe
@ 2020-06-04 17:48 ` Jens Axboe
  2020-06-04 17:48 ` [PATCH 4/4] io_uring: re-issue plug based block requests that failed Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-04 17:48 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

-EIO bubbles up like -EAGAIN if we fail to allocate a request at the
lower level. Play it safe and treat it like -EAGAIN in terms of sync
retry, to avoid passing back an errant -EIO.

Catch some of these early for block based file, as non-mq devices
generally do not support NOWAIT. That saves us some overhead by
not first trying, then retrying from async context. We can go straight
to async punt instead.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b468fe2e8792..625578715d37 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2053,6 +2053,15 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
 	return state->file;
 }
 
+static bool io_bdev_nowait(struct block_device *bdev)
+{
+#ifdef CONFIG_BLOCK
+	return !bdev || queue_is_mq(bdev_get_queue(bdev));
+#else
+	return true;
+#endif
+}
+
 /*
  * If we tracked the file through the SCM inflight mechanism, we could support
  * any file. For now, just ensure that anything potentially problematic is done
@@ -2062,10 +2071,19 @@ static bool io_file_supports_async(struct file *file, int rw)
 {
 	umode_t mode = file_inode(file)->i_mode;
 
-	if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISSOCK(mode))
-		return true;
-	if (S_ISREG(mode) && file->f_op != &io_uring_fops)
+	if (S_ISBLK(mode)) {
+		if (io_bdev_nowait(file->f_inode->i_bdev))
+			return true;
+		return false;
+	}
+	if (S_ISCHR(mode) || S_ISSOCK(mode))
 		return true;
+	if (S_ISREG(mode)) {
+		if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
+		    file->f_op != &io_uring_fops)
+			return true;
+		return false;
+	}
 
 	if (!(file->f_mode & FMODE_NOWAIT))
 		return false;
@@ -2611,7 +2629,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 	iov_count = iov_iter_count(&iter);
 	ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
 	if (!ret) {
-		ssize_t ret2;
+		ssize_t ret2 = 0;
 
 		if (req->file->f_op->read_iter)
 			ret2 = call_read_iter(req->file, kiocb, &iter);
@@ -2619,7 +2637,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 			ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
 
 		/* Catch -EAGAIN return for forced non-blocking submission */
-		if (!force_nonblock || ret2 != -EAGAIN) {
+		if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
 			kiocb_done(kiocb, ret2);
 		} else {
 copy_iov:
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] io_uring: re-issue plug based block requests that failed
  2020-06-04 17:48 [PATCHSET] io_uring: never block for block based IO submit Jens Axboe
                   ` (2 preceding siblings ...)
  2020-06-04 17:48 ` [PATCH 3/4] io_uring: catch -EIO from buffered issue request failure Jens Axboe
@ 2020-06-04 17:48 ` Jens Axboe
  2020-06-05  3:20   ` [PATCH v2] " Jens Axboe
  3 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2020-06-04 17:48 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Mark the plug with nowait == true, which will cause requests to avoid
blocking on request allocation. If they do, we catch them and add them
to the plug list. Once we finish the plug, re-issue requests that got
caught.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 625578715d37..04b3571b21e9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1947,12 +1947,31 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
 	__io_cqring_add_event(req, res, cflags);
 }
 
+static bool io_rw_reissue(struct io_kiocb *req, long res)
+{
+#ifdef CONFIG_BLOCK
+	struct blk_plug *plug;
+
+	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
+		return false;
+
+	plug = current->plug;
+	if (plug && plug->nowait) {
+		list_add_tail(&req->list, &plug->nowait_list);
+		return true;
+	}
+#endif
+	return false;
+}
+
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
-	io_complete_rw_common(kiocb, res);
-	io_put_req(req);
+	if (!io_rw_reissue(req, res)) {
+		io_complete_rw_common(kiocb, res);
+		io_put_req(req);
+	}
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -5789,12 +5808,30 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
+#ifdef CONFIG_BLOCK
+static void io_resubmit_rw(struct list_head *list)
+{
+	struct io_kiocb *req;
+
+	while (!list_empty(list)) {
+		req = list_first_entry(list, struct io_kiocb, list);
+		list_del(&req->list);
+		refcount_inc(&req->refs);
+		io_queue_async_work(req);
+	}
+}
+#endif
+
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
 	blk_finish_plug(&state->plug);
+#ifdef CONFIG_BLOCK
+	if (unlikely(!list_empty(&state->plug.nowait_list)))
+		io_resubmit_rw(&state->plug.nowait_list);
+#endif
 	io_state_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
@@ -5807,6 +5844,10 @@ static void io_submit_state_start(struct io_submit_state *state,
 				  unsigned int max_ios)
 {
 	blk_start_plug(&state->plug);
+#ifdef CONFIG_BLOCK
+	INIT_LIST_HEAD(&state->plug.nowait_list);
+	state->plug.nowait = true;
+#endif
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2] io_uring: re-issue plug based block requests that failed
  2020-06-04 17:48 ` [PATCH 4/4] io_uring: re-issue plug based block requests that failed Jens Axboe
@ 2020-06-05  3:20   ` Jens Axboe
  0 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-05  3:20 UTC (permalink / raw)
  To: io-uring

Mark the plug with nowait == true, which will cause requests to avoid
blocking on request allocation. If they do, we catch them and add them
to the plug list. Once we finish the plug, re-issue requests that got
caught.

Signed-off-by: Jens Axboe <[email protected]>

---

Since v1:
- Properly re-prep on resubmit
- Sanity check for io_wq_current_is_worker()
- Sanity check we're just doing read/write for re-issue
- Ensure iov state is sane for resubmit

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 625578715d37..942984bda2f8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1947,12 +1947,31 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
 	__io_cqring_add_event(req, res, cflags);
 }
 
+static bool io_rw_reissue(struct io_kiocb *req, long res)
+{
+#ifdef CONFIG_BLOCK
+	struct blk_plug *plug;
+
+	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
+		return false;
+
+	plug = current->plug;
+	if (plug && plug->nowait) {
+		list_add_tail(&req->list, &plug->nowait_list);
+		return true;
+	}
+#endif
+	return false;
+}
+
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
-	io_complete_rw_common(kiocb, res);
-	io_put_req(req);
+	if (!io_rw_reissue(req, res)) {
+		io_complete_rw_common(kiocb, res);
+		io_put_req(req);
+	}
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -2629,6 +2648,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 	iov_count = iov_iter_count(&iter);
 	ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
 	if (!ret) {
+		unsigned long nr_segs = iter.nr_segs;
 		ssize_t ret2 = 0;
 
 		if (req->file->f_op->read_iter)
@@ -2640,6 +2660,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 		if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
 			kiocb_done(kiocb, ret2);
 		} else {
+			iter.count = iov_count;
+			iter.nr_segs = nr_segs;
 copy_iov:
 			ret = io_setup_async_rw(req, io_size, iovec,
 						inline_vecs, &iter);
@@ -2726,6 +2748,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
 	iov_count = iov_iter_count(&iter);
 	ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
 	if (!ret) {
+		unsigned long nr_segs = iter.nr_segs;
 		ssize_t ret2;
 
 		/*
@@ -2763,6 +2786,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
 		if (!force_nonblock || ret2 != -EAGAIN) {
 			kiocb_done(kiocb, ret2);
 		} else {
+			iter.count = iov_count;
+			iter.nr_segs = nr_segs;
 copy_iov:
 			ret = io_setup_async_rw(req, io_size, iovec,
 						inline_vecs, &iter);
@@ -5789,12 +5814,70 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
+#ifdef CONFIG_BLOCK
+static bool io_resubmit_prep(struct io_kiocb *req)
+{
+	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+	struct iov_iter iter;
+	ssize_t ret;
+	int rw;
+
+	switch (req->opcode) {
+	case IORING_OP_READV:
+	case IORING_OP_READ_FIXED:
+	case IORING_OP_READ:
+		rw = READ;
+		break;
+	case IORING_OP_WRITEV:
+	case IORING_OP_WRITE_FIXED:
+	case IORING_OP_WRITE:
+		rw = WRITE;
+		break;
+	default:
+		printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
+				req->opcode);
+		goto end_req;
+	}
+
+	ret = io_import_iovec(rw, req, &iovec, &iter, false);
+	if (ret < 0)
+		goto end_req;
+	ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+	if (!ret)
+		return true;
+	kfree(iovec);
+end_req:
+	io_cqring_add_event(req, ret);
+	req_set_fail_links(req);
+	io_put_req(req);
+	return false;
+}
+
+static void io_resubmit_rw(struct list_head *list)
+{
+	struct io_kiocb *req;
+
+	while (!list_empty(list)) {
+		req = list_first_entry(list, struct io_kiocb, list);
+		list_del(&req->list);
+		if (io_resubmit_prep(req)) {
+			refcount_inc(&req->refs);
+			io_queue_async_work(req);
+		}
+	}
+}
+#endif
+
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
 	blk_finish_plug(&state->plug);
+#ifdef CONFIG_BLOCK
+	if (unlikely(!list_empty(&state->plug.nowait_list)))
+		io_resubmit_rw(&state->plug.nowait_list);
+#endif
 	io_state_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
@@ -5807,6 +5890,10 @@ static void io_submit_state_start(struct io_submit_state *state,
 				  unsigned int max_ios)
 {
 	blk_start_plug(&state->plug);
+#ifdef CONFIG_BLOCK
+	INIT_LIST_HEAD(&state->plug.nowait_list);
+	state->plug.nowait = true;
+#endif
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-06-05  3:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-04 17:48 [PATCHSET] io_uring: never block for block based IO submit Jens Axboe
2020-06-04 17:48 ` [PATCH 1/4] block: provide plug based way of signaling forced no-wait semantics Jens Axboe
2020-06-04 17:48 ` [PATCH 2/4] io_uring: always plug for any number of IOs Jens Axboe
2020-06-04 17:48 ` [PATCH 3/4] io_uring: catch -EIO from buffered issue request failure Jens Axboe
2020-06-04 17:48 ` [PATCH 4/4] io_uring: re-issue plug based block requests that failed Jens Axboe
2020-06-05  3:20   ` [PATCH v2] " Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox