public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: Dylan Yudaken <[email protected]>, Jens Axboe <[email protected]>
Cc: [email protected], [email protected]
Subject: Re: [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes
Date: Wed, 2 Nov 2022 11:20:31 +0000	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 10/31/22 13:41, Dylan Yudaken wrote:
> rsrc node cleanup can be indefinitely delayed when there are long lived
> requests. For example if a file is located in the same rsrc node as a long
> lived socket with multishot poll, then even if unregistering the file it
> will not be closed while the poll request is still active.
> 
> Introduce a timer when rsrc node is switched, so that periodically we can
> retarget these long lived requests to the newest nodes. That will allow
> the old nodes to be cleaned up, freeing resources.
> 
> Signed-off-by: Dylan Yudaken <[email protected]>
> ---
>   include/linux/io_uring_types.h |  2 +
>   io_uring/io_uring.c            |  1 +
>   io_uring/opdef.h               |  1 +
>   io_uring/rsrc.c                | 92 ++++++++++++++++++++++++++++++++++
>   io_uring/rsrc.h                |  1 +
>   5 files changed, 97 insertions(+)
> 
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index f5b687a787a3..1d4eff4e632c 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -327,6 +327,8 @@ struct io_ring_ctx {
>   	struct llist_head		rsrc_put_llist;
>   	struct list_head		rsrc_ref_list;
>   	spinlock_t			rsrc_ref_lock;
> +	struct delayed_work		rsrc_retarget_work;
> +	bool				rsrc_retarget_scheduled;
>   
>   	struct list_head		io_buffers_pages;
>   
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 6cc16e39b27f..ea2260359c56 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -320,6 +320,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>   	spin_lock_init(&ctx->rsrc_ref_lock);
>   	INIT_LIST_HEAD(&ctx->rsrc_ref_list);
>   	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
> +	INIT_DELAYED_WORK(&ctx->rsrc_retarget_work, io_rsrc_retarget_work);
>   	init_llist_head(&ctx->rsrc_put_llist);
>   	init_llist_head(&ctx->work_llist);
>   	INIT_LIST_HEAD(&ctx->tctx_list);
> diff --git a/io_uring/opdef.h b/io_uring/opdef.h
> index 3efe06d25473..1b72b14cb5ab 100644
> --- a/io_uring/opdef.h
> +++ b/io_uring/opdef.h
> @@ -37,6 +37,7 @@ struct io_op_def {
>   	int (*prep_async)(struct io_kiocb *);
>   	void (*cleanup)(struct io_kiocb *);
>   	void (*fail)(struct io_kiocb *);
> +	bool (*can_retarget_rsrc)(struct io_kiocb *);

side note: need to be split at some moment into 2 tables depending
on hotness, we want better caching for ->issue and ->prep

>   };
>   
>   extern const struct io_op_def io_op_defs[];
> diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
> index 55d4ab96fb92..106210e0d5d5 100644
> --- a/io_uring/rsrc.c
> +++ b/io_uring/rsrc.c
> @@ -15,6 +15,7 @@
>   #include "io_uring.h"
>   #include "openclose.h"
>   #include "rsrc.h"
> +#include "opdef.h"
>   
>   struct io_rsrc_update {
>   	struct file			*file;
> @@ -204,6 +205,95 @@ void io_rsrc_put_work(struct work_struct *work)
>   	}
>   }
>   
> +
> +static unsigned int io_rsrc_retarget_req(struct io_ring_ctx *ctx,
> +					 struct io_kiocb *req)
> +	__must_hold(&ctx->uring_lock)
> +{
> +	if (!req->rsrc_node ||
> +	     req->rsrc_node == ctx->rsrc_node)
> +		return 0;
> +	if (!io_op_defs[req->opcode].can_retarget_rsrc)
> +		return 0;
> +	if (!(*io_op_defs[req->opcode].can_retarget_rsrc)(req))
> +		return 0;

nit, there should be no need to deref fptr.

if (!io_op_defs[req->opcode].can_retarget_rsrc(req)) ...

> +
> +	io_rsrc_put_node(req->rsrc_node, 1);
> +	req->rsrc_node = ctx->rsrc_node;
> +	return 1;
> +}
> +
> +static unsigned int io_rsrc_retarget_table(struct io_ring_ctx *ctx,
> +				   struct io_hash_table *table)
> +{
> +	unsigned int nr_buckets = 1U << table->hash_bits;
> +	unsigned int refs = 0;
> +	struct io_kiocb *req;
> +	int i;
> +
> +	for (i = 0; i < nr_buckets; i++) {
> +		struct io_hash_bucket *hb = &table->hbs[i];
> +
> +		spin_lock(&hb->lock);
> +		hlist_for_each_entry(req, &hb->list, hash_node)
> +			refs += io_rsrc_retarget_req(ctx, req);
> +		spin_unlock(&hb->lock);
> +	}
> +	return refs;
> +}
> +
> +static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
> +	__must_hold(&ctx->uring_lock)
> +{
> +	percpu_ref_get(&ctx->refs);
> +	mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 * HZ);
> +	ctx->rsrc_retarget_scheduled = true;
> +}
> +
> +static void __io_rsrc_retarget_work(struct io_ring_ctx *ctx)
> +	__must_hold(&ctx->uring_lock)
> +{
> +	struct io_rsrc_node *node;
> +	unsigned int refs;
> +	bool any_waiting;
> +
> +	if (!ctx->rsrc_node)
> +		return;
> +
> +	spin_lock_irq(&ctx->rsrc_ref_lock);
> +	any_waiting = false;
> +	list_for_each_entry(node, &ctx->rsrc_ref_list, node) {
> +		if (!node->done) {
> +			any_waiting = true;
> +			break;
> +		}
> +	}
> +	spin_unlock_irq(&ctx->rsrc_ref_lock);
> +
> +	if (!any_waiting)
> +		return;
> +
> +	refs = io_rsrc_retarget_table(ctx, &ctx->cancel_table);
> +	refs += io_rsrc_retarget_table(ctx, &ctx->cancel_table_locked);
> +
> +	ctx->rsrc_cached_refs -= refs;
> +	while (unlikely(ctx->rsrc_cached_refs < 0))
> +		io_rsrc_refs_refill(ctx);

We can charge ->rsrc_cached_refs after setting up nodes in prep / submission
without underflowing the actual refs because we know that the requests are
not yet submitted and so won't consume refs. This one looks more troublesome


> +}
> +
> +void io_rsrc_retarget_work(struct work_struct *work)
> +{
> +	struct io_ring_ctx *ctx;
> +
> +	ctx = container_of(work, struct io_ring_ctx, rsrc_retarget_work.work);
> +
> +	mutex_lock(&ctx->uring_lock);
> +	ctx->rsrc_retarget_scheduled = false;
> +	__io_rsrc_retarget_work(ctx);
> +	mutex_unlock(&ctx->uring_lock);
> +	percpu_ref_put(&ctx->refs);
> +}
> +
>   void io_wait_rsrc_data(struct io_rsrc_data *data)
>   {
>   	if (data && !atomic_dec_and_test(&data->refs))
> @@ -285,6 +375,8 @@ void io_rsrc_node_switch(struct io_ring_ctx *ctx,
>   		atomic_inc(&data_to_kill->refs);
>   		percpu_ref_kill(&rsrc_node->refs);
>   		ctx->rsrc_node = NULL;
> +		if (!ctx->rsrc_retarget_scheduled)
> +			io_rsrc_retarget_schedule(ctx);
>   	}
>   
>   	if (!ctx->rsrc_node) {
> diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
> index 81445a477622..2b94df8fd9e8 100644
> --- a/io_uring/rsrc.h
> +++ b/io_uring/rsrc.h
> @@ -54,6 +54,7 @@ struct io_mapped_ubuf {
>   };
>   
>   void io_rsrc_put_work(struct work_struct *work);
> +void io_rsrc_retarget_work(struct work_struct *work);
>   void io_rsrc_refs_refill(struct io_ring_ctx *ctx);
>   void io_wait_rsrc_data(struct io_rsrc_data *data);
>   void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);

-- 
Pavel Begunkov

  parent reply	other threads:[~2022-11-02 11:21 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-31 13:41 [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes Dylan Yudaken
2022-10-31 16:02   ` Jens Axboe
2022-10-31 16:45     ` Dylan Yudaken
2022-11-02 11:20   ` Pavel Begunkov [this message]
2022-10-31 13:41 ` [PATCH for-next 02/12] io_uring: io-wq helper to iterate all work Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 03/12] io_uring: support retargeting rsrc on requests in the io-wq Dylan Yudaken
2022-10-31 18:19   ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 04/12] io_uring: reschedule retargeting at shutdown of ring Dylan Yudaken
2022-10-31 16:02   ` Jens Axboe
2022-10-31 16:44     ` Dylan Yudaken
2022-10-31 19:13       ` Jens Axboe
2022-11-01 12:09         ` Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 05/12] io_uring: add tracing for io_uring_rsrc_retarget Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 06/12] io_uring: add fixed file peeking function Dylan Yudaken
2022-10-31 16:04   ` Jens Axboe
2022-10-31 16:47     ` Dylan Yudaken
2022-11-02 11:23   ` Pavel Begunkov
2022-10-31 13:41 ` [PATCH for-next 07/12] io_uring: split send_zc specific struct out of io_sr_msg Dylan Yudaken
2022-11-02 11:32   ` Pavel Begunkov
2022-11-02 13:45     ` Jens Axboe
2022-11-02 14:09       ` Pavel Begunkov
2022-11-02 14:12         ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 08/12] io_uring: recv/recvmsg retarget_rsrc support Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 09/12] io_uring: accept " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 10/12] io_uring: read " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 11/12] io_uring: read_fixed " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 12/12] io_uring: poll_add " Dylan Yudaken
2022-11-02 11:44 ` [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Pavel Begunkov
2022-11-02 13:08   ` Dylan Yudaken

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox