public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: [email protected]
Cc: [email protected], Jens Axboe <[email protected]>
Subject: [PATCH 07/16] io_uring: get rid of hashed provided buffer groups
Date: Sun,  1 May 2022 14:56:44 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Use a plain array for any group ID that's less than 64, and punt
anything beyond that to an xarray. 64 fits in a page even for 4KB
page sizes and with the planned additions.

This makes the expected group usage faster by avoiding a hash and lookup
to find our list, and it uses less memory upfront by not allocating any
memory for provided buffers unless it's actually being used.

Suggested-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 97 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 39 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index eba18685a705..7efe2de5ce81 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -283,7 +283,6 @@ struct io_rsrc_data {
 };
 
 struct io_buffer_list {
-	struct list_head list;
 	struct list_head buf_list;
 	__u16 bgid;
 };
@@ -358,7 +357,7 @@ struct io_ev_fd {
 	struct rcu_head		rcu;
 };
 
-#define IO_BUFFERS_HASH_BITS	5
+#define BGID_ARRAY	64
 
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
@@ -414,7 +413,8 @@ struct io_ring_ctx {
 		struct list_head	timeout_list;
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
-		struct list_head	*io_buffers;
+		struct io_buffer_list	*io_bl;
+		struct xarray		io_bl_xa;
 		struct list_head	io_buffers_cache;
 		struct list_head	apoll_cache;
 		struct xarray		personalities;
@@ -1613,15 +1613,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 						 unsigned int bgid)
 {
-	struct list_head *hash_list;
-	struct io_buffer_list *bl;
-
-	hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-	list_for_each_entry(bl, hash_list, list)
-		if (bl->bgid == bgid || bgid == -1U)
-			return bl;
+	if (ctx->io_bl && bgid < BGID_ARRAY)
+		return &ctx->io_bl[bgid];
 
-	return NULL;
+	return xa_load(&ctx->io_bl_xa, bgid);
 }
 
 static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
@@ -1727,12 +1722,14 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
 	struct io_ring_ctx *ctx;
-	int i, hash_bits;
+	int hash_bits;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return NULL;
 
+	xa_init(&ctx->io_bl_xa);
+
 	/*
 	 * Use 5 bits less than the max cq entries, that should give us around
 	 * 32 entries per hash list if totally full and uniformly spread.
@@ -1754,13 +1751,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	/* set invalid range, so io_import_fixed() fails meeting it */
 	ctx->dummy_ubuf->ubuf = -1UL;
 
-	ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
-					sizeof(struct list_head), GFP_KERNEL);
-	if (!ctx->io_buffers)
-		goto err;
-	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
-		INIT_LIST_HEAD(&ctx->io_buffers[i]);
-
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
 		goto err;
@@ -1796,7 +1786,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 err:
 	kfree(ctx->dummy_ubuf);
 	kfree(ctx->cancel_hash);
-	kfree(ctx->io_buffers);
+	kfree(ctx->io_bl);
+	xa_destroy(&ctx->io_bl_xa);
 	kfree(ctx);
 	return NULL;
 }
@@ -3560,15 +3551,14 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
 	return __io_import_fixed(req, rw, iter, imu);
 }
 
-static void io_buffer_add_list(struct io_ring_ctx *ctx,
-			       struct io_buffer_list *bl, unsigned int bgid)
+static int io_buffer_add_list(struct io_ring_ctx *ctx,
+			      struct io_buffer_list *bl, unsigned int bgid)
 {
-	struct list_head *list;
-
-	list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-	INIT_LIST_HEAD(&bl->buf_list);
 	bl->bgid = bgid;
-	list_add(&bl->list, list);
+	if (bgid < BGID_ARRAY)
+		return 0;
+
+	return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
 }
 
 static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
@@ -5318,6 +5308,23 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 	return i ? 0 : -ENOMEM;
 }
 
+static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+{
+	int i;
+
+	ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
+				GFP_KERNEL);
+	if (!ctx->io_bl)
+		return -ENOMEM;
+
+	for (i = 0; i < BGID_ARRAY; i++) {
+		INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
+		ctx->io_bl[i].bgid = i;
+	}
+
+	return 0;
+}
+
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
@@ -5327,6 +5334,12 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	io_ring_submit_lock(ctx, issue_flags);
 
+	if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+		ret = io_init_bl_list(ctx);
+		if (ret)
+			goto err;
+	}
+
 	bl = io_buffer_get_list(ctx, p->bgid);
 	if (unlikely(!bl)) {
 		bl = kmalloc(sizeof(*bl), GFP_KERNEL);
@@ -5334,7 +5347,11 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 			ret = -ENOMEM;
 			goto err;
 		}
-		io_buffer_add_list(ctx, bl, p->bgid);
+		ret = io_buffer_add_list(ctx, bl, p->bgid);
+		if (ret) {
+			kfree(bl);
+			goto err;
+		}
 	}
 
 	ret = io_add_buffers(ctx, p, bl);
@@ -10437,19 +10454,19 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
+	struct io_buffer_list *bl;
+	unsigned long index;
 	int i;
 
-	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
-		struct list_head *list = &ctx->io_buffers[i];
-
-		while (!list_empty(list)) {
-			struct io_buffer_list *bl;
+	for (i = 0; i < BGID_ARRAY; i++) {
+		if (!ctx->io_bl)
+			break;
+		__io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+	}
 
-			bl = list_first_entry(list, struct io_buffer_list, list);
-			__io_remove_buffers(ctx, bl, -1U);
-			list_del(&bl->list);
-			kfree(bl);
-		}
+	xa_for_each(&ctx->io_bl_xa, index, bl) {
+		xa_erase(&ctx->io_bl_xa, bl->bgid);
+		__io_remove_buffers(ctx, bl, -1U);
 	}
 
 	while (!list_empty(&ctx->io_buffers_pages)) {
@@ -10558,7 +10575,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		io_wq_put_hash(ctx->hash_map);
 	kfree(ctx->cancel_hash);
 	kfree(ctx->dummy_ubuf);
-	kfree(ctx->io_buffers);
+	kfree(ctx->io_bl);
+	xa_destroy(&ctx->io_bl_xa);
 	kfree(ctx);
 }
 
@@ -12467,6 +12485,7 @@ static int __init io_uring_init(void)
 
 	/* ->buf_index is u16 */
 	BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+	BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE);
 
 	/* should fit into one byte */
 	BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
-- 
2.35.1


  parent reply	other threads:[~2022-05-01 20:57 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-01 20:56 [PATCHSET v4 0/16] Add support for ring mapped provided buffers Jens Axboe
2022-05-01 20:56 ` [PATCH 01/16] io_uring: kill io_recv_buffer_select() wrapper Jens Axboe
2022-05-01 20:56 ` [PATCH 02/16] io_uring: use 'sr' vs 'req->sr_msg' consistently Jens Axboe
2022-05-01 20:56 ` [PATCH 03/16] io_uring: make io_buffer_select() return the user address directly Jens Axboe
2022-05-09 12:06   ` Dylan Yudaken
2022-05-09 12:12     ` Dylan Yudaken
2022-05-09 12:28       ` Jens Axboe
2022-05-09 12:43         ` Dylan Yudaken
2022-05-09 12:46           ` Jens Axboe
2022-05-09 12:21     ` Jens Axboe
2022-05-01 20:56 ` [PATCH 04/16] io_uring: kill io_rw_buffer_select() wrapper Jens Axboe
2022-05-01 20:56 ` [PATCH 05/16] io_uring: ignore ->buf_index if REQ_F_BUFFER_SELECT isn't set Jens Axboe
2022-05-01 20:56 ` [PATCH 06/16] io_uring: always use req->buf_index for the provided buffer group Jens Axboe
2022-05-01 20:56 ` Jens Axboe [this message]
2022-05-01 20:56 ` [PATCH 08/16] io_uring: never call io_buffer_select() for a buffer re-select Jens Axboe
2022-05-01 20:56 ` [PATCH 09/16] io_uring: abstract out provided buffer list selection Jens Axboe
2022-05-01 20:56 ` [PATCH 10/16] io_uring: move provided and fixed buffers into the same io_kiocb area Jens Axboe
2022-05-01 20:56 ` [PATCH 11/16] io_uring: move provided buffer state closer to submit state Jens Axboe
2022-05-01 20:56 ` [PATCH 12/16] io_uring: eliminate the need to track provided buffer ID separately Jens Axboe
2022-05-01 20:56 ` [PATCH 13/16] io_uring: don't clear req->kbuf when buffer selection is done Jens Axboe
2022-05-01 20:56 ` [PATCH 14/16] io_uring: add buffer selection support to IORING_OP_NOP Jens Axboe
2022-05-01 20:56 ` [PATCH 15/16] io_uring: add io_pin_pages() helper Jens Axboe
2022-05-01 20:56 ` [PATCH 16/16] io_uring: add support for ring mapped supplied buffers Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox