From: Keith Busch <[email protected]>
To: <[email protected]>, <[email protected]>,
<[email protected]>, <[email protected]>,
<[email protected]>
Cc: <[email protected]>, <[email protected]>,
Keith Busch <[email protected]>
Subject: [PATCHv8 6/6] io_uring: cache nodes and mapped buffers
Date: Thu, 27 Feb 2025 14:39:16 -0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
From: Keith Busch <[email protected]>
Frequent alloc/free cycles on these is pretty costly. Use an io cache to
more efficiently reuse these buffers.
Signed-off-by: Keith Busch <[email protected]>
---
include/linux/io_uring_types.h | 2 +
io_uring/filetable.c | 2 +-
io_uring/io_uring.c | 2 +
io_uring/rsrc.c | 70 +++++++++++++++++++++++++++-------
io_uring/rsrc.h | 4 +-
5 files changed, 64 insertions(+), 16 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index c0fe8a00fe53a..3ce87dcd99eec 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -294,6 +294,8 @@ struct io_ring_ctx {
struct io_file_table file_table;
struct io_rsrc_data buf_table;
+ struct io_alloc_cache node_cache;
+ struct io_alloc_cache imu_cache;
struct io_submit_state submit_state;
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index dd8eeec97acf6..a21660e3145ab 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (slot_index >= ctx->file_table.data.nr)
return -EINVAL;
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node)
return -ENOMEM;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2f5dd47e7dbf5..8f542a5f20a60 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -292,6 +292,7 @@ static void io_free_alloc_caches(struct io_ring_ctx *ctx)
io_alloc_cache_free(&ctx->uring_cache, kfree);
io_alloc_cache_free(&ctx->msg_cache, kfree);
io_futex_cache_free(ctx);
+ io_rsrc_cache_free(ctx);
}
static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
@@ -339,6 +340,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb), 0);
ret |= io_futex_cache_init(ctx);
+ ret |= io_rsrc_cache_init(ctx);
if (ret)
goto free_ref;
init_completion(&ctx->ref_comp);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 0eceaf2e03777..450b4c039334d 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -33,6 +33,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
#define IORING_MAX_FIXED_FILES (1U << 20)
#define IORING_MAX_REG_BUFFERS (1U << 14)
+#define IO_CACHED_BVECS_SEGS 32
+
int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
{
unsigned long page_limit, cur_pages, new_pages;
@@ -111,6 +113,22 @@ static void io_release_ubuf(void *priv)
unpin_user_page(imu->bvec[i].bv_page);
}
+static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
+ int nr_bvecs)
+{
+ if (nr_bvecs <= IO_CACHED_BVECS_SEGS)
+ return io_cache_alloc(&ctx->imu_cache, GFP_KERNEL);
+ return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs),
+ GFP_KERNEL);
+}
+
+static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
+{
+ if (imu->nr_bvecs > IO_CACHED_BVECS_SEGS ||
+ !io_alloc_cache_put(&ctx->imu_cache, imu))
+ kvfree(imu);
+}
+
static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
{
if (!refcount_dec_and_test(&imu->refs))
@@ -119,22 +137,44 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
if (imu->acct_pages)
io_unaccount_mem(ctx, imu->acct_pages);
imu->release(imu->priv);
- kvfree(imu);
}
-struct io_rsrc_node *io_rsrc_node_alloc(int type)
+struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
{
struct io_rsrc_node *node;
- node = kzalloc(sizeof(*node), GFP_KERNEL);
+ node = io_cache_alloc(&ctx->node_cache, GFP_KERNEL);
if (node) {
node->type = type;
node->refs = 1;
+ node->tag = 0;
+ node->file_ptr = 0;
}
return node;
}
-__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
+bool io_rsrc_cache_init(struct io_ring_ctx *ctx)
+{
+ const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec,
+ IO_CACHED_BVECS_SEGS);
+ const int node_size = sizeof(struct io_rsrc_node);
+ bool ret;
+
+ ret = io_alloc_cache_init(&ctx->node_cache, IO_ALLOC_CACHE_MAX,
+ node_size, 0);
+ ret |= io_alloc_cache_init(&ctx->imu_cache, IO_ALLOC_CACHE_MAX,
+ imu_cache_size, 0);
+ return ret;
+}
+
+void io_rsrc_cache_free(struct io_ring_ctx *ctx)
+{
+ io_alloc_cache_free(&ctx->node_cache, kfree);
+ io_alloc_cache_free(&ctx->imu_cache, kfree);
+}
+
+__cold void io_rsrc_data_free(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data)
{
if (!data->nr)
return;
@@ -207,7 +247,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) {
err = -ENOMEM;
fput(file);
@@ -465,7 +505,8 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
break;
}
- kfree(node);
+ if (!io_alloc_cache_put(&ctx->node_cache, node))
+ kvfree(node);
}
int io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -527,7 +568,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto fail;
}
ret = -ENOMEM;
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) {
fput(file);
goto fail;
@@ -732,7 +773,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (!iov->iov_base)
return NULL;
- node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node)
return ERR_PTR(-ENOMEM);
node->buf = NULL;
@@ -752,10 +793,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
coalesced = io_coalesce_buffer(&pages, &nr_pages, &data);
}
- imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+ imu = io_alloc_imu(ctx, nr_pages);
if (!imu)
goto done;
+ imu->nr_bvecs = nr_pages;
ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
if (ret) {
unpin_user_pages(pages, nr_pages);
@@ -766,7 +808,6 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->len = iov->iov_len;
- imu->nr_bvecs = nr_pages;
imu->folio_shift = PAGE_SHIFT;
imu->release = io_release_ubuf;
imu->priv = imu;
@@ -789,7 +830,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
}
done:
if (ret) {
- kvfree(imu);
+ if (imu)
+ io_free_imu(ctx, imu);
if (node)
io_put_rsrc_node(ctx, node);
node = ERR_PTR(ret);
@@ -893,14 +935,14 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
goto unlock;
}
- node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node) {
ret = -ENOMEM;
goto unlock;
}
nr_bvecs = blk_rq_nr_phys_segments(rq);
- imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL);
+ imu = io_alloc_imu(ctx, nr_bvecs);
if (!imu) {
kfree(node);
ret = -ENOMEM;
@@ -1137,7 +1179,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (!src_node) {
dst_node = NULL;
} else {
- dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
goto out_free;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 7600e2736eeb3..27e545694d01e 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -48,7 +48,9 @@ struct io_imu_folio_data {
unsigned int nr_folios;
};
-struct io_rsrc_node *io_rsrc_node_alloc(int type);
+bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
+void io_rsrc_cache_free(struct io_ring_ctx *ctx);
+struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
--
2.43.5
next prev parent reply other threads:[~2025-02-27 22:39 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-27 22:39 [PATCHv8 0/6] ublk zero copy support Keith Busch
2025-02-27 22:39 ` [PATCHv8 1/6] io_uring/rw: move buffer_select outside generic prep Keith Busch
2025-02-28 8:10 ` Ming Lei
2025-02-27 22:39 ` [PATCHv8 2/6] io_uring/rw: move fixed buffer import to issue path Keith Busch
2025-02-28 8:11 ` Ming Lei
2025-02-27 22:39 ` [PATCHv8 3/6] nvme: map uring_cmd data even if address is 0 Keith Busch
2025-02-27 22:39 ` [PATCHv8 4/6] io_uring: add support for kernel registered bvecs Keith Busch
2025-02-28 8:13 ` Ming Lei
2025-02-27 22:39 ` [PATCHv8 5/6] ublk: zc register/unregister bvec Keith Busch
2025-02-28 8:14 ` Ming Lei
2025-02-27 22:39 ` Keith Busch [this message]
2025-02-27 23:50 ` [PATCHv8 6/6] io_uring: cache nodes and mapped buffers Jens Axboe
2025-02-28 14:06 ` [PATCHv8 0/6] ublk zero copy support Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox