public inbox for [email protected]
 help / color / mirror / Atom feed
From: Dylan Yudaken <[email protected]>
To: Jens Axboe <[email protected]>, Pavel Begunkov <[email protected]>
Cc: <[email protected]>, <[email protected]>,
	Dylan Yudaken <[email protected]>
Subject: [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes
Date: Mon, 31 Oct 2022 06:41:15 -0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

rsrc node cleanup can be indefinitely delayed when there are long lived
requests. For example if a file is located in the same rsrc node as a long
lived socket with multishot poll, then even if unregistering the file it
will not be closed while the poll request is still active.

Introduce a timer when rsrc node is switched, so that periodically we can
retarget these long lived requests to the newest nodes. That will allow
the old nodes to be cleaned up, freeing resources.

Signed-off-by: Dylan Yudaken <[email protected]>
---
 include/linux/io_uring_types.h |  2 +
 io_uring/io_uring.c            |  1 +
 io_uring/opdef.h               |  1 +
 io_uring/rsrc.c                | 92 ++++++++++++++++++++++++++++++++++
 io_uring/rsrc.h                |  1 +
 5 files changed, 97 insertions(+)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f5b687a787a3..1d4eff4e632c 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -327,6 +327,8 @@ struct io_ring_ctx {
 	struct llist_head		rsrc_put_llist;
 	struct list_head		rsrc_ref_list;
 	spinlock_t			rsrc_ref_lock;
+	struct delayed_work		rsrc_retarget_work;
+	bool				rsrc_retarget_scheduled;
 
 	struct list_head		io_buffers_pages;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6cc16e39b27f..ea2260359c56 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -320,6 +320,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	spin_lock_init(&ctx->rsrc_ref_lock);
 	INIT_LIST_HEAD(&ctx->rsrc_ref_list);
 	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
+	INIT_DELAYED_WORK(&ctx->rsrc_retarget_work, io_rsrc_retarget_work);
 	init_llist_head(&ctx->rsrc_put_llist);
 	init_llist_head(&ctx->work_llist);
 	INIT_LIST_HEAD(&ctx->tctx_list);
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index 3efe06d25473..1b72b14cb5ab 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -37,6 +37,7 @@ struct io_op_def {
 	int (*prep_async)(struct io_kiocb *);
 	void (*cleanup)(struct io_kiocb *);
 	void (*fail)(struct io_kiocb *);
+	bool (*can_retarget_rsrc)(struct io_kiocb *);
 };
 
 extern const struct io_op_def io_op_defs[];
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 55d4ab96fb92..106210e0d5d5 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -15,6 +15,7 @@
 #include "io_uring.h"
 #include "openclose.h"
 #include "rsrc.h"
+#include "opdef.h"
 
 struct io_rsrc_update {
 	struct file			*file;
@@ -204,6 +205,95 @@ void io_rsrc_put_work(struct work_struct *work)
 	}
 }
 
+
+static unsigned int io_rsrc_retarget_req(struct io_ring_ctx *ctx,
+					 struct io_kiocb *req)
+	__must_hold(&ctx->uring_lock)
+{
+	if (!req->rsrc_node ||
+	     req->rsrc_node == ctx->rsrc_node)
+		return 0;
+	if (!io_op_defs[req->opcode].can_retarget_rsrc)
+		return 0;
+	if (!(*io_op_defs[req->opcode].can_retarget_rsrc)(req))
+		return 0;
+
+	io_rsrc_put_node(req->rsrc_node, 1);
+	req->rsrc_node = ctx->rsrc_node;
+	return 1;
+}
+
+static unsigned int io_rsrc_retarget_table(struct io_ring_ctx *ctx,
+				   struct io_hash_table *table)
+{
+	unsigned int nr_buckets = 1U << table->hash_bits;
+	unsigned int refs = 0;
+	struct io_kiocb *req;
+	int i;
+
+	for (i = 0; i < nr_buckets; i++) {
+		struct io_hash_bucket *hb = &table->hbs[i];
+
+		spin_lock(&hb->lock);
+		hlist_for_each_entry(req, &hb->list, hash_node)
+			refs += io_rsrc_retarget_req(ctx, req);
+		spin_unlock(&hb->lock);
+	}
+	return refs;
+}
+
+static void io_rsrc_retarget_schedule(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	percpu_ref_get(&ctx->refs);
+	mod_delayed_work(system_wq, &ctx->rsrc_retarget_work, 60 * HZ);
+	ctx->rsrc_retarget_scheduled = true;
+}
+
+static void __io_rsrc_retarget_work(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	struct io_rsrc_node *node;
+	unsigned int refs;
+	bool any_waiting;
+
+	if (!ctx->rsrc_node)
+		return;
+
+	spin_lock_irq(&ctx->rsrc_ref_lock);
+	any_waiting = false;
+	list_for_each_entry(node, &ctx->rsrc_ref_list, node) {
+		if (!node->done) {
+			any_waiting = true;
+			break;
+		}
+	}
+	spin_unlock_irq(&ctx->rsrc_ref_lock);
+
+	if (!any_waiting)
+		return;
+
+	refs = io_rsrc_retarget_table(ctx, &ctx->cancel_table);
+	refs += io_rsrc_retarget_table(ctx, &ctx->cancel_table_locked);
+
+	ctx->rsrc_cached_refs -= refs;
+	while (unlikely(ctx->rsrc_cached_refs < 0))
+		io_rsrc_refs_refill(ctx);
+}
+
+void io_rsrc_retarget_work(struct work_struct *work)
+{
+	struct io_ring_ctx *ctx;
+
+	ctx = container_of(work, struct io_ring_ctx, rsrc_retarget_work.work);
+
+	mutex_lock(&ctx->uring_lock);
+	ctx->rsrc_retarget_scheduled = false;
+	__io_rsrc_retarget_work(ctx);
+	mutex_unlock(&ctx->uring_lock);
+	percpu_ref_put(&ctx->refs);
+}
+
 void io_wait_rsrc_data(struct io_rsrc_data *data)
 {
 	if (data && !atomic_dec_and_test(&data->refs))
@@ -285,6 +375,8 @@ void io_rsrc_node_switch(struct io_ring_ctx *ctx,
 		atomic_inc(&data_to_kill->refs);
 		percpu_ref_kill(&rsrc_node->refs);
 		ctx->rsrc_node = NULL;
+		if (!ctx->rsrc_retarget_scheduled)
+			io_rsrc_retarget_schedule(ctx);
 	}
 
 	if (!ctx->rsrc_node) {
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 81445a477622..2b94df8fd9e8 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -54,6 +54,7 @@ struct io_mapped_ubuf {
 };
 
 void io_rsrc_put_work(struct work_struct *work);
+void io_rsrc_retarget_work(struct work_struct *work);
 void io_rsrc_refs_refill(struct io_ring_ctx *ctx);
 void io_wait_rsrc_data(struct io_rsrc_data *data);
 void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);
-- 
2.30.2


  reply	other threads:[~2022-10-31 13:41 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-31 13:41 [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Dylan Yudaken
2022-10-31 13:41 ` Dylan Yudaken [this message]
2022-10-31 16:02   ` [PATCH for-next 01/12] io_uring: infrastructure for retargeting rsrc nodes Jens Axboe
2022-10-31 16:45     ` Dylan Yudaken
2022-11-02 11:20   ` Pavel Begunkov
2022-10-31 13:41 ` [PATCH for-next 02/12] io_uring: io-wq helper to iterate all work Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 03/12] io_uring: support retargeting rsrc on requests in the io-wq Dylan Yudaken
2022-10-31 18:19   ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 04/12] io_uring: reschedule retargeting at shutdown of ring Dylan Yudaken
2022-10-31 16:02   ` Jens Axboe
2022-10-31 16:44     ` Dylan Yudaken
2022-10-31 19:13       ` Jens Axboe
2022-11-01 12:09         ` Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 05/12] io_uring: add tracing for io_uring_rsrc_retarget Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 06/12] io_uring: add fixed file peeking function Dylan Yudaken
2022-10-31 16:04   ` Jens Axboe
2022-10-31 16:47     ` Dylan Yudaken
2022-11-02 11:23   ` Pavel Begunkov
2022-10-31 13:41 ` [PATCH for-next 07/12] io_uring: split send_zc specific struct out of io_sr_msg Dylan Yudaken
2022-11-02 11:32   ` Pavel Begunkov
2022-11-02 13:45     ` Jens Axboe
2022-11-02 14:09       ` Pavel Begunkov
2022-11-02 14:12         ` Jens Axboe
2022-10-31 13:41 ` [PATCH for-next 08/12] io_uring: recv/recvmsg retarget_rsrc support Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 09/12] io_uring: accept " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 10/12] io_uring: read " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 11/12] io_uring: read_fixed " Dylan Yudaken
2022-10-31 13:41 ` [PATCH for-next 12/12] io_uring: poll_add " Dylan Yudaken
2022-11-02 11:44 ` [PATCH for-next 00/12] io_uring: retarget rsrc nodes periodically Pavel Begunkov
2022-11-02 13:08   ` Dylan Yudaken

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox