From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH v4 07/26] io_uring: ctx-wide rsrc nodes
Date: Thu, 1 Apr 2021 15:43:46 +0100 [thread overview]
Message-ID: <7e9c693b4b9a2f47aa784b616ce29843021bb65a.1617287883.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
If we're going to ever support multiple types of resources we need
shared rsrc nodes to not bloat requests, that is implemented in this
patch. It also gives a nicer API and saves one pointer dereference
in io_req_set_rsrc_node().
We may say that all requests bound to a resource belong to one and only
one rsrc node, and considering that nodes are removed and recycled
strictly in-order, this separates requests into generations, where
generation are changed on each node switch (i.e. io_rsrc_node_switch()).
The API is simple, io_rsrc_node_switch() switches to a new generation if
needed, and also optionally kills a passed in io_rsrc_data. Each call to
io_rsrc_node_switch() have to be preceded with
io_rsrc_node_switch_start(). The start function is idempotent and should
not necessarily be followed by switch.
One difference is that once a node was set it will always retain a valid
rsrc node, even on unregister. It may be a nuisance at the moment, but
makes much sense for multiple types of resources. Another thing changed
is that nodes are bound to/associated with a io_rsrc_data later just
before killing (i.e. switching).
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 73 ++++++++++++++++++++++++++-------------------------
1 file changed, 37 insertions(+), 36 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dc4f6bb643a..47c76ec422ba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -235,7 +235,6 @@ struct io_rsrc_data {
struct io_ring_ctx *ctx;
rsrc_put_fn *do_put;
- struct io_rsrc_node *node;
struct percpu_ref refs;
struct completion done;
bool quiesce;
@@ -448,6 +447,7 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
+ struct io_rsrc_node *rsrc_node;
struct io_rsrc_node *rsrc_backup_node;
struct io_restriction restrictions;
@@ -1077,7 +1077,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
if (!req->fixed_rsrc_refs) {
- req->fixed_rsrc_refs = &ctx->file_data->node->refs;
+ req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
percpu_ref_get(req->fixed_rsrc_refs);
}
}
@@ -7075,36 +7075,32 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
spin_unlock_bh(&ctx->rsrc_ref_lock);
}
-static void io_rsrc_node_set(struct io_ring_ctx *ctx,
- struct io_rsrc_data *rsrc_data)
+static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data_to_kill)
{
- struct io_rsrc_node *rsrc_node = ctx->rsrc_backup_node;
+ WARN_ON_ONCE(!ctx->rsrc_backup_node);
+ WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
- WARN_ON_ONCE(!rsrc_node);
+ if (data_to_kill) {
+ struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
- ctx->rsrc_backup_node = NULL;
- rsrc_node->rsrc_data = rsrc_data;
+ rsrc_node->rsrc_data = data_to_kill;
+ io_rsrc_ref_lock(ctx);
+ list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
+ io_rsrc_ref_unlock(ctx);
- io_rsrc_ref_lock(ctx);
- rsrc_data->node = rsrc_node;
- list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
- io_rsrc_ref_unlock(ctx);
- percpu_ref_get(&rsrc_data->refs);
-}
-
-static void io_rsrc_node_kill(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
-{
- struct io_rsrc_node *ref_node = NULL;
+ percpu_ref_get(&data_to_kill->refs);
+ percpu_ref_kill(&rsrc_node->refs);
+ ctx->rsrc_node = NULL;
+ }
- io_rsrc_ref_lock(ctx);
- ref_node = data->node;
- data->node = NULL;
- io_rsrc_ref_unlock(ctx);
- if (ref_node)
- percpu_ref_kill(&ref_node->refs);
+ if (!ctx->rsrc_node) {
+ ctx->rsrc_node = ctx->rsrc_backup_node;
+ ctx->rsrc_backup_node = NULL;
+ }
}
-static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
+static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
@@ -7121,10 +7117,11 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
data->quiesce = true;
do {
- ret = io_rsrc_node_prealloc(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
if (ret)
break;
- io_rsrc_node_kill(ctx, data);
+ io_rsrc_node_switch(ctx, data);
+
percpu_ref_kill(&data->refs);
flush_delayed_work(&ctx->rsrc_put_work);
@@ -7133,7 +7130,6 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
break;
percpu_ref_resurrect(&data->refs);
- io_rsrc_node_set(ctx, data);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -7614,7 +7610,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
- ret = io_rsrc_node_prealloc(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
@@ -7676,7 +7672,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- io_rsrc_node_set(ctx, file_data);
+ io_rsrc_node_switch(ctx, NULL);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7765,7 +7761,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
- err = io_rsrc_node_prealloc(ctx);
+ err = io_rsrc_node_switch_start(ctx);
if (err)
return err;
@@ -7784,7 +7780,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (*file_slot) {
file = (struct file *) ((unsigned long) *file_slot & FFS_MASK);
- err = io_queue_rsrc_removal(data, data->node, file);
+ err = io_queue_rsrc_removal(data, ctx->rsrc_node, file);
if (err)
break;
*file_slot = NULL;
@@ -7819,10 +7815,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
}
- if (needs_switch) {
- percpu_ref_kill(&data->node->refs);
- io_rsrc_node_set(ctx, data);
- }
+ if (needs_switch)
+ io_rsrc_node_switch(ctx, data);
return done ? done : err;
}
@@ -8496,8 +8490,15 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
+ /* there are no registered resources left, nobody uses it */
+ if (ctx->rsrc_node)
+ io_rsrc_node_destroy(ctx->rsrc_node);
if (ctx->rsrc_backup_node)
io_rsrc_node_destroy(ctx->rsrc_backup_node);
+ flush_delayed_work(&ctx->rsrc_put_work);
+
+ WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
+ WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
--
2.24.0
next prev parent reply other threads:[~2021-04-01 18:19 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 01/26] io_uring: name rsrc bits consistently Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 02/26] io_uring: simplify io_rsrc_node_ref_zero Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 03/26] io_uring: use rsrc prealloc infra for files reg Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 04/26] io_uring: encapsulate rsrc node manipulations Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 05/26] io_uring: move rsrc_put callback into io_rsrc_data Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 06/26] io_uring: refactor io_queue_rsrc_removal() Pavel Begunkov
2021-04-01 14:43 ` Pavel Begunkov [this message]
2021-04-01 14:43 ` [PATCH v4 08/26] io_uring: reuse io_rsrc_node_destroy() Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 09/26] io_uring: remove useless is_dying check on quiesce Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 10/26] io_uring: refactor rw reissue Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 11/26] io_uring: combine lock/unlock sections on exit Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 12/26] io_uring: better ref handling in poll_remove_one Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 13/26] io_uring: remove unused hash_wait Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 14/26] io_uring: refactor io_async_cancel() Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 15/26] io_uring: improve import_fixed overflow checks Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 16/26] io_uring: store reg buffer end instead of length Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 17/26] io_uring: kill unused forward decls Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 18/26] io_uring: lock annotate timeouts and poll Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 19/26] io_uring: simplify overflow handling Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 20/26] io_uring: put link timeout req consistently Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 21/26] io_uring: deduplicate NOSIGNAL setting Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 22/26] io_uring: set proper FFS* flags on reg file update Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 23/26] io_uring: don't quiesce intial files register Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 24/26] io_uring: refactor file tables alloc/free Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 25/26] io_uring: encapsulate fixed files into struct Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 26/26] io_uring: kill outdated comment about splice punt Pavel Begunkov
2021-04-04 19:16 ` [PATCH v4 00/26] ctx wide rsrc nodes + Jens Axboe
2021-04-04 19:22 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7e9c693b4b9a2f47aa784b616ce29843021bb65a.1617287883.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox