public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Joanne Koong <joannelkoong@gmail.com>
To: axboe@kernel.dk, io-uring@vger.kernel.org
Cc: csander@purestorage.com, bernd@bsbernd.com, hch@infradead.org,
	asml.silence@gmail.com
Subject: [PATCH v2 1/9] io_uring/memmap: chunk allocations in io_region_allocate_pages()
Date: Tue, 17 Feb 2026 18:51:59 -0800	[thread overview]
Message-ID: <20260218025207.1425553-2-joannelkoong@gmail.com> (raw)
In-Reply-To: <20260218025207.1425553-1-joannelkoong@gmail.com>

Currently, io_region_allocate_pages() tries a single compound allocation
for the entire region, and falls back to alloc_pages_bulk_node() if that
fails.

When allocating a large region, trying to do a single compound
allocation may be unrealistic while allocating page by page may be
inefficient and cause worse TLB performance.

Rework io_region_allocate_pages() to allocate memory in 2MB chunks,
attempting a compound allocation for each chunk.

Replace IO_REGION_F_SINGLE_REF with IO_REGION_F_COMPOUND_PAGES to
reflect that the page array may contain tail pages from multiple
compound allocations.

Currently, alloc_pages_bulk_node() fails when the GFP_KERNEL_ACCOUNT gfp
flag is set. This makes this commit a necessary change in order to use
kernel-managed ring buffers (which will allocate regions of large
sizes), at least until that issue is fixed.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 io_uring/memmap.c | 87 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 23 deletions(-)

diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 89f56609e50a..6e91960aa8fc 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -15,6 +15,28 @@
 #include "rsrc.h"
 #include "zcrx.h"
 
+static void release_compound_pages(struct page **pages, unsigned long nr_pages)
+{
+	struct page *page;
+	unsigned int nr, i = 0;
+
+	while (nr_pages) {
+		page = pages[i];
+
+		if (!page || WARN_ON_ONCE(page != compound_head(page)))
+			return;
+
+		nr = compound_nr(page);
+		put_page(page);
+
+		if (nr >= nr_pages)
+			return;
+
+		i += nr;
+		nr_pages -= nr;
+	}
+}
+
 static bool io_mem_alloc_compound(struct page **pages, int nr_pages,
 				  size_t size, gfp_t gfp)
 {
@@ -84,22 +106,19 @@ enum {
 	IO_REGION_F_VMAP			= 1,
 	/* memory is provided by user and pinned by the kernel */
 	IO_REGION_F_USER_PROVIDED		= 2,
-	/* only the first page in the array is ref'ed */
-	IO_REGION_F_SINGLE_REF			= 4,
+	/* memory may contain tail pages from compound allocations */
+	IO_REGION_F_COMPOUND_PAGES		= 4,
 };
 
 void io_free_region(struct user_struct *user, struct io_mapped_region *mr)
 {
 	if (mr->pages) {
-		long nr_refs = mr->nr_pages;
-
-		if (mr->flags & IO_REGION_F_SINGLE_REF)
-			nr_refs = 1;
-
 		if (mr->flags & IO_REGION_F_USER_PROVIDED)
-			unpin_user_pages(mr->pages, nr_refs);
+			unpin_user_pages(mr->pages, mr->nr_pages);
+		else if (mr->flags & IO_REGION_F_COMPOUND_PAGES)
+			release_compound_pages(mr->pages, mr->nr_pages);
 		else
-			release_pages(mr->pages, nr_refs);
+			release_pages(mr->pages, mr->nr_pages);
 
 		kvfree(mr->pages);
 	}
@@ -154,28 +173,50 @@ static int io_region_allocate_pages(struct io_mapped_region *mr,
 				    unsigned long mmap_offset)
 {
 	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
-	size_t size = io_region_size(mr);
 	unsigned long nr_allocated;
-	struct page **pages;
+	struct page **pages, **cur_pages;
+	unsigned chunk_size, chunk_nr_pages;
+	unsigned int pages_left;
 
 	pages = kvmalloc_array(mr->nr_pages, sizeof(*pages), gfp);
 	if (!pages)
 		return -ENOMEM;
 
-	if (io_mem_alloc_compound(pages, mr->nr_pages, size, gfp)) {
-		mr->flags |= IO_REGION_F_SINGLE_REF;
-		goto done;
-	}
+	chunk_size = SZ_2M;
+	chunk_nr_pages = chunk_size >> PAGE_SHIFT;
+	pages_left = mr->nr_pages;
+	cur_pages = pages;
+
+	while (pages_left) {
+		unsigned int nr_pages = min(pages_left,
+					    chunk_nr_pages);
+
+		if (io_mem_alloc_compound(cur_pages, nr_pages,
+					  nr_pages << PAGE_SHIFT, gfp)) {
+			mr->flags |= IO_REGION_F_COMPOUND_PAGES;
+			cur_pages += nr_pages;
+			pages_left -= nr_pages;
+			continue;
+		}
 
-	nr_allocated = alloc_pages_bulk_node(gfp, NUMA_NO_NODE,
-					     mr->nr_pages, pages);
-	if (nr_allocated != mr->nr_pages) {
-		if (nr_allocated)
-			release_pages(pages, nr_allocated);
-		kvfree(pages);
-		return -ENOMEM;
+		nr_allocated = alloc_pages_bulk_node(gfp, NUMA_NO_NODE,
+						     nr_pages, cur_pages);
+		if (nr_allocated != nr_pages) {
+			unsigned int total =
+				(cur_pages - pages) + nr_allocated;
+
+			if (mr->flags & IO_REGION_F_COMPOUND_PAGES)
+				release_compound_pages(pages, total);
+			else
+				release_pages(pages, total);
+			kvfree(pages);
+			return -ENOMEM;
+		}
+
+		cur_pages += nr_pages;
+		pages_left -= nr_pages;
 	}
-done:
+
 	reg->mmap_offset = mmap_offset;
 	mr->pages = pages;
 	return 0;
-- 
2.47.3


  reply	other threads:[~2026-02-18  2:56 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-18  2:51 [PATCH v2 0/9] io_uring: add kernel-managed buffer rings Joanne Koong
2026-02-18  2:51 ` Joanne Koong [this message]
2026-02-18  2:52 ` [PATCH v2 2/9] io_uring/kbuf: add support for " Joanne Koong
2026-02-18  2:52 ` [PATCH v2 3/9] io_uring/kbuf: support kernel-managed buffer rings in buffer selection Joanne Koong
2026-02-18  2:52 ` [PATCH v2 4/9] io_uring/kbuf: add buffer ring pinning/unpinning Joanne Koong
2026-02-18  2:52 ` [PATCH v2 5/9] io_uring/kbuf: return buffer id in buffer selection Joanne Koong
2026-02-18  2:52 ` [PATCH v2 6/9] io_uring/kbuf: add recycling for kernel managed buffer rings Joanne Koong
2026-02-18  2:52 ` [PATCH v2 7/9] io_uring/kbuf: add io_uring_is_kmbuf_ring() Joanne Koong
2026-02-18  2:52 ` [PATCH v2 8/9] io_uring/kbuf: export io_ring_buffer_select() Joanne Koong
2026-02-18  2:52 ` [PATCH v2 9/9] io_uring/cmd: set selected buffer index in __io_uring_cmd_done() Joanne Koong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260218025207.1425553-2-joannelkoong@gmail.com \
    --to=joannelkoong@gmail.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bernd@bsbernd.com \
    --cc=csander@purestorage.com \
    --cc=hch@infradead.org \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox