From: Jens Axboe <[email protected]>
To: [email protected]
Subject: Re: [PATCH RFC] io_uring: limit inflight IO
Date: Thu, 7 Nov 2019 17:19:58 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 11/7/19 4:21 PM, Jens Axboe wrote:
> I'd like some feedback on this one. Even tith the overflow backpressure
> patch, we still have a potentially large gap where applications can
> submit IO before we get any dropped events in the CQ ring. This is
> especially true if the execution time of those requests are long
> (unbounded).
>
> This adds IORING_SETUP_INFLIGHT, which if set, will return -EBUSY if we
> have more IO pending than we can feasibly support. This is normally the
> CQ ring size, but of IORING_SETUP_CQ_NODROP is enabled, then it's twice
> the CQ ring size.
>
> This helps manage the pending queue size instead of letting it grow
> indefinitely.
>
> Note that we could potentially just make this the default behavior -
> applications need to handle -EAGAIN returns already, in case we run out
> of memory, and if we change this to return -EAGAIN as well, then it
> doesn't introduce any new failure cases. I'm tempted to do that...
>
> Anyway, comments solicited!
After a little deliberation, I think we should go with the one that
doesn't require users to opt-in. As mentioned, let's change it to
-EAGAIN to not introduce a new errno for this. They essentially mean
the same thing anyway.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f8344f95817e..4c488bf6e889 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -203,6 +203,7 @@ struct io_ring_ctx {
unsigned sq_mask;
unsigned sq_thread_idle;
unsigned cached_sq_dropped;
+ atomic_t cached_cq_overflow;
struct io_uring_sqe *sq_sqes;
struct list_head defer_list;
@@ -221,13 +222,12 @@ struct io_ring_ctx {
struct {
unsigned cached_cq_tail;
- atomic_t cached_cq_overflow;
unsigned cq_entries;
unsigned cq_mask;
+ atomic_t cq_timeouts;
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
- atomic_t cq_timeouts;
} ____cacheline_aligned_in_smp;
struct io_rings *rings;
@@ -705,16 +705,39 @@ static void io_cqring_add_event(struct io_kiocb *req, long res)
io_cqring_ev_posted(ctx);
}
+static bool io_req_over_limit(struct io_ring_ctx *ctx)
+{
+ unsigned limit, inflight;
+
+ /*
+ * This doesn't need to be super precise, so only check every once
+ * in a while.
+ */
+ if (ctx->cached_sq_head & ctx->sq_mask)
+ return false;
+
+ if (ctx->flags & IORING_SETUP_CQ_NODROP)
+ limit = 2 * ctx->cq_entries;
+ else
+ limit = ctx->cq_entries;
+
+ inflight = ctx->cached_sq_head -
+ (ctx->cached_cq_tail + atomic_read(&ctx->cached_cq_overflow));
+ return inflight >= limit;
+}
+
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
- struct io_submit_state *state)
+ struct io_submit_state *state, bool force)
{
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct io_kiocb *req;
if (!percpu_ref_tryget(&ctx->refs))
- return NULL;
+ return ERR_PTR(-ENXIO);
if (!state) {
+ if (unlikely(!force && io_req_over_limit(ctx)))
+ goto out;
req = kmem_cache_alloc(req_cachep, gfp);
if (unlikely(!req))
goto out;
@@ -722,6 +745,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
size_t sz;
int ret;
+ if (unlikely(!force && io_req_over_limit(ctx)))
+ goto out;
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
@@ -754,7 +779,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
return req;
out:
percpu_ref_put(&ctx->refs);
- return NULL;
+ return ERR_PTR(-EAGAIN);
}
static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
@@ -2963,10 +2988,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
struct io_kiocb *req;
unsigned int sqe_flags;
- req = io_get_req(ctx, statep);
- if (unlikely(!req)) {
+ req = io_get_req(ctx, statep, false);
+ if (unlikely(IS_ERR(req))) {
if (!submitted)
- submitted = -EAGAIN;
+ submitted = PTR_ERR(req);
+ req = NULL;
break;
}
if (!io_get_sqring(ctx, &req->submit)) {
@@ -2986,9 +3012,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
- shadow_req = io_get_req(ctx, NULL);
- if (unlikely(!shadow_req))
+ shadow_req = io_get_req(ctx, NULL, true);
+ if (unlikely(IS_ERR(shadow_req))) {
+ shadow_req = NULL;
goto out;
+ }
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
--
Jens Axboe
next prev parent reply other threads:[~2019-11-08 0:20 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-07 23:21 [PATCH RFC] io_uring: limit inflight IO Jens Axboe
2019-11-08 0:19 ` Jens Axboe [this message]
2019-11-08 9:56 ` Pavel Begunkov
2019-11-08 14:05 ` Jens Axboe
2019-11-08 17:45 ` Jens Axboe
2019-11-09 11:16 ` Pavel Begunkov
2019-11-09 14:23 ` Jens Axboe
2019-11-09 15:15 ` Jens Axboe
2019-11-09 19:24 ` Pavel Begunkov
2019-11-09 10:33 ` Pavel Begunkov
2019-11-09 14:12 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox