From: Jens Axboe <[email protected]>
To: [email protected]
Subject: [PATCH] io_uring: provide fallback request for OOM situations
Date: Fri, 8 Nov 2019 08:22:06 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
One thing that really sucks for userspace APIs is if the kernel passes
back -ENOMEM/-EAGAIN for resource shortages. The application really has
no idea of what to do in those cases. Should it try and reap
completions? Probably a good idea. Will it solve the issue? Who knows.
This patch adds a simple fallback mechanism if we fail to allocate
memory for a request. We first to to the atomic pool and see if we can
get memory that way, if that fails, we punt to a pre-allocated request.
There's just one of these, but the important part is if we ever return
-EBUSY to the application, the applications knows that it can wait for
events and make forward progress when events have completed. This is the
important part.
Signed-off-by: Jens Axboe <[email protected]>
---
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 94ec44caac00..fb25cce9d580 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -238,6 +238,9 @@ struct io_ring_ctx {
/* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
struct completion *completions;
+ /* if all else fails... */
+ struct io_kiocb *fallback_req;
+
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
@@ -407,6 +410,10 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
if (!ctx)
return NULL;
+ ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
+ if (!ctx->fallback_req)
+ goto err;
+
ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
if (!ctx->completions)
goto err;
@@ -432,6 +439,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->inflight_list);
return ctx;
err:
+ if (ctx->fallback_req)
+ kmem_cache_free(req_cachep, ctx->fallback_req);
kfree(ctx->completions);
kfree(ctx);
return NULL;
@@ -732,6 +741,27 @@ static bool io_req_over_limit(struct io_ring_ctx *ctx)
return inflight >= limit;
}
+static inline bool io_is_fallback_req(struct io_kiocb *req)
+{
+ return req == (struct io_kiocb *)
+ ((unsigned long) req->ctx->fallback_req & ~1UL);
+}
+
+static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req;
+
+ req = kmem_cache_alloc(req_cachep, GFP_ATOMIC | __GFP_NOWARN);
+ if (req)
+ return req;
+
+ req = ctx->fallback_req;
+ if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req))
+ return req;
+
+ return NULL;
+}
+
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
struct io_submit_state *state, bool force)
{
@@ -742,21 +772,17 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
return ERR_PTR(-ENXIO);
if (!state) {
- if (unlikely(!force && io_req_over_limit(ctx))) {
- req = ERR_PTR(-EBUSY);
+ if (unlikely(!force && io_req_over_limit(ctx)))
goto out_limit;
- }
req = kmem_cache_alloc(req_cachep, gfp);
if (unlikely(!req))
- goto out;
+ goto fallback;
} else if (!state->free_reqs) {
size_t sz;
int ret;
- if (unlikely(!force && io_req_over_limit(ctx))) {
- req = ERR_PTR(-EBUSY);
+ if (unlikely(!force && io_req_over_limit(ctx)))
goto out_limit;
- }
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
@@ -767,7 +793,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
if (unlikely(ret <= 0)) {
state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
if (!state->reqs[0])
- goto out;
+ goto fallback;
ret = 1;
}
state->free_reqs = ret - 1;
@@ -779,6 +805,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
state->cur_req++;
}
+got_it:
req->file = NULL;
req->ctx = ctx;
req->flags = 0;
@@ -787,11 +814,13 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->result = 0;
INIT_IO_WORK(&req->work, io_wq_submit_work);
return req;
-out:
- req = ERR_PTR(-EAGAIN);
+fallback:
+ req = io_get_fallback_req(ctx);
+ if (req)
+ goto got_it;
out_limit:
percpu_ref_put(&ctx->refs);
- return req;
+ return ERR_PTR(-EBUSY);
}
static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
@@ -819,7 +848,10 @@ static void __io_free_req(struct io_kiocb *req)
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
}
percpu_ref_put(&ctx->refs);
- kmem_cache_free(req_cachep, req);
+ if (likely(!io_is_fallback_req(req)))
+ kmem_cache_free(req_cachep, req);
+ else
+ clear_bit_unlock(0, (unsigned long *) ctx->fallback_req);
}
static bool io_link_cancel_timeout(struct io_kiocb *req)
@@ -1025,8 +1057,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
* completions for those, only batch free for fixed
* file and non-linked commands.
*/
- if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
- REQ_F_FIXED_FILE) {
+ if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+ REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@@ -4143,6 +4175,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
ring_pages(ctx->sq_entries, ctx->cq_entries));
free_uid(ctx->user);
kfree(ctx->completions);
+ kmem_cache_free(req_cachep, ctx->fallback_req);
kfree(ctx);
}
--
Jens Axboe
reply other threads:[~2019-11-08 15:22 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox