From: Pavel Begunkov <[email protected]>
To: Dylan Yudaken <[email protected]>, Jens Axboe <[email protected]>
Cc: [email protected], [email protected]
Subject: Re: [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes
Date: Wed, 2 Nov 2022 11:20:31 +0000 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 10/31/22 13:41, Dylan Yudaken wrote:
> rsrc node cleanup can be indefinitely delayed when there are long lived
> requests. For example if a file is located in the same rsrc node as a long
> lived socket with multishot poll, then even if unregistering the file it
> will not be closed while the poll request is still active.
>
> Introduce a timer when rsrc node is switched, so that periodically we can
> retarget these long lived requests to the newest nodes. That will allow
> the old nodes to be cleaned up, freeing resources.
>
> Signed-off-by: Dylan Yudaken <[email protected]>
> ---
> include/linux/io_uring_types.h | 2 +
> io_uring/io_uring.c | 1 +
> io_uring/opdef.h | 1 +
> io_uring/rsrc.c | 92 ++++++++++++++++++++++++++++++++++
> io_uring/rsrc.h | 1 +
> 5 files changed, 97 insertions(+)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index f5b687a787a3..1d4eff4e632c 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -327,6 +327,8 @@ struct io_ring_ctx {
> struct llist_head rsrc_put_llist;
> struct list_head rsrc_ref_list;
> spinlock_t rsrc_ref_lock;
> + struct delayed_work rsrc_retarget_work;
> + bool rsrc_retarget_scheduled;
>
> struct list_head io_buffers_pages;
>
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 6cc16e39b27f..ea2260359c56 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -320,6 +320,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> spin_lock_init(&ctx->rsrc_ref_lock);
> INIT_LIST_HEAD(&ctx->rsrc_ref_list);
> INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
> + INIT_DELAYED_WORK(&ctx->rsrc_retarget_work, io_rsrc_retarget_work);
> init_llist_head(&ctx->rsrc_put_llist);
> init_llist_head(&ctx->work_llist);
> INIT_LIST_HEAD(&ctx->tctx_list);
> diff --git a/io_uring/opdef.h b/io_uring/opdef.h
> index 3efe06d25473..1b72b14cb5ab 100644
> --- a/io_uring/opdef.h
> +++ b/io_uring/opdef.h
> @@ -37,6 +37,7 @@ struct io_op_def {
> int (*prep_async)(struct io_kiocb *);
> void (*cleanup)(struct io_kiocb *);
> void (*fail)(struct io_kiocb *);
> + bool (*can_retarget_rsrc)(struct io_kiocb *);
side note: need to be split at some moment into 2 tables depending
on hotness, we want better caching for ->issue and ->prep
> };
>
> extern const struct io_op_def io_op_defs[];
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 55d4ab96fb92..106210e0d5d5 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -15,6 +15,7 @@
> #include "io_uring.h"
> #include "openclose.h"
> #include "rsrc.h"
> +#include "opdef.h"
>
> struct io_rsrc_update {
> struct file *file;
> @@ -204,6 +205,95 @@ void io_rsrc_put_work(struct work_struct *work)
> }
> }
>
> +
> +static unsigned int io_rsrc_retarget_req(struct io_ring_ctx *ctx,
> + struct io_kiocb *req)
> + __must_hold(&ctx->uring_lock)
> +{
> + if (!req->rsrc_node ||
> + req->rsrc_node == ctx->rsrc_node)
> + return 0;
> + if (!io_op_defs[req->opcode].can_retarget_rsrc)
> + return 0;
> + if (!(*io_op_defs[req->opcode].can_retarget_rsrc)(req))
> + return 0;
nit, there should be no need to deref fptr.
if (!io_op_defs[req->opcode].can_retarget_rsrc(req)) ...
> +
> + io_rsrc_put_node(req->rsrc_node, 1);
> + req->rsrc_node = ctx->rsrc_node;
> + return 1;
> +}
> +
> +static unsigned int io_rsrc_retarget_table(struct io_ring_ctx *ctx,
> + struct io_hash_table *table)
> +{
> + unsigned int nr_buckets = 1U << table->hash_bits;
> + unsigned int refs = 0;
> + struct io_kiocb *req;
> + int i;
> +
> + for (i = 0; i < nr_buckets; i++) {
> + struct io_hash_bucket *hb = &table->hbs[i];
> +
> + spin_lock(&hb->lock);
> + hlist_for_each_entry(req, &hb->list, hash_node)
> + refs += io_rsrc_retarget_req(ctx, req);
> + spin_unlock(&hb->lock);
> + }
> + return refs;
> +}
> +
> +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
> + __must_hold(&ctx->uring_lock)
> +{
> + percpu_ref_get(&ctx->refs);
> + mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 * HZ);
> + ctx->rsrc_retarget_scheduled = true;
> +}
> +
> +static void __io_rsrc_retarget_work(struct io_ring_ctx *ctx)
> + __must_hold(&ctx->uring_lock)
> +{
> + struct io_rsrc_node *node;
> + unsigned int refs;
> + bool any_waiting;
> +
> + if (!ctx->rsrc_node)
> + return;
> +
> + spin_lock_irq(&ctx->rsrc_ref_lock);
> + any_waiting = false;
> + list_for_each_entry(node, &ctx->rsrc_ref_list, node) {
> + if (!node->done) {
> + any_waiting = true;
> + break;
> + }
> + }
> + spin_unlock_irq(&ctx->rsrc_ref_lock);
> +
> + if (!any_waiting)
> + return;
> +
> + refs = io_rsrc_retarget_table(ctx, &ctx->cancel_table);
> + refs += io_rsrc_retarget_table(ctx, &ctx->cancel_table_locked);
> +
> + ctx->rsrc_cached_refs -= refs;
> + while (unlikely(ctx->rsrc_cached_refs < 0))
> + io_rsrc_refs_refill(ctx);
We can charge ->rsrc_cached_refs after setting up nodes in prep / submission
without underflowing the actual refs because we know that the requests are
not yet submitted and so won't consume refs. This one looks more troublesome
> +}
> +
> +void io_rsrc_retarget_work(struct work_struct *work)
> +{
> + struct io_ring_ctx *ctx;
> +
> + ctx = container_of(work, struct io_ring_ctx, rsrc_retarget_work.work);
> +
> + mutex_lock(&ctx->uring_lock);
> + ctx->rsrc_retarget_scheduled = false;
> + __io_rsrc_retarget_work(ctx);
> + mutex_unlock(&ctx->uring_lock);
> + percpu_ref_put(&ctx->refs);
> +}
> +
> void io_wait_rsrc_data(struct io_rsrc_data *data)
> {
> if (data && !atomic_dec_and_test(&data->refs))
> @@ -285,6 +375,8 @@ void io_rsrc_node_switch(struct io_ring_ctx *ctx,
> atomic_inc(&data_to_kill->refs);
> percpu_ref_kill(&rsrc_node->refs);
> ctx->rsrc_node = NULL;
> + if (!ctx->rsrc_retarget_scheduled)
> + io_rsrc_retarget_schedule(ctx);
> }
>
> if (!ctx->rsrc_node) {
> diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
> index 81445a477622..2b94df8fd9e8 100644
> --- a/io_uring/rsrc.h
> +++ b/io_uring/rsrc.h
> @@ -54,6 +54,7 @@ struct io_mapped_ubuf {
> };
>
> void io_rsrc_put_work(struct work_struct *work);
> +void io_rsrc_retarget_work(struct work_struct *work);
> void io_rsrc_refs_refill(struct io_ring_ctx *ctx);
> void io_wait_rsrc_data(struct io_rsrc_data *data);
> void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);
--
Pavel Begunkov
next prev parent reply other threads:[~2022-11-02 11:21 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-31 13:41 [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes Dylan Yudaken
2022-10-31 16:02 ` Jens Axboe
2022-10-31 16:45 ` Dylan Yudaken
2022-11-02 11:20 ` Pavel Begunkov [this message]
2022-10-31 13:41 ` [PATCH for-next 02/12] io_uring: io-wq helper to iterate all work Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 03/12] io_uring: support retargeting rsrc on requests in the io-wq Dylan Yudaken
2022-10-31 18:19 ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 04/12] io_uring: reschedule retargeting at shutdown of ring Dylan Yudaken
2022-10-31 16:02 ` Jens Axboe
2022-10-31 16:44 ` Dylan Yudaken
2022-10-31 19:13 ` Jens Axboe
2022-11-01 12:09 ` Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 05/12] io_uring: add tracing for io_uring_rsrc_retarget Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 06/12] io_uring: add fixed file peeking function Dylan Yudaken
2022-10-31 16:04 ` Jens Axboe
2022-10-31 16:47 ` Dylan Yudaken
2022-11-02 11:23 ` Pavel Begunkov
2022-10-31 13:41 ` [PATCH for-next 07/12] io_uring: split send_zc specific struct out of io_sr_msg Dylan Yudaken
2022-11-02 11:32 ` Pavel Begunkov
2022-11-02 13:45 ` Jens Axboe
2022-11-02 14:09 ` Pavel Begunkov
2022-11-02 14:12 ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 08/12] io_uring: recv/recvmsg retarget_rsrc support Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 09/12] io_uring: accept " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 10/12] io_uring: read " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 11/12] io_uring: read_fixed " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 12/12] io_uring: poll_add " Dylan Yudaken
2022-11-02 11:44 ` [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Pavel Begunkov
2022-11-02 13:08 ` Dylan Yudaken
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox