From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [PATCH v2 14/18] io_uring: use region api for CQ
Date: Sun, 24 Nov 2024 21:12:31 +0000 [thread overview]
Message-ID: <42b3eda88aed4b3542534747cb0ce22744042d98.1732481694.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Convert internal parts of the CQ/SQ array managment to the region API.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 8 +----
io_uring/io_uring.c | 36 +++++++---------------
io_uring/memmap.c | 55 +++++-----------------------------
io_uring/memmap.h | 4 ---
io_uring/register.c | 35 ++++++++++------------
5 files changed, 36 insertions(+), 102 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3f353f269c6e..2db252841509 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -426,14 +426,8 @@ struct io_ring_ctx {
*/
struct mutex mmap_lock;
- /*
- * If IORING_SETUP_NO_MMAP is used, then the below holds
- * the gup'ed pages for the two rings, and the sqes.
- */
- unsigned short n_ring_pages;
- struct page **ring_pages;
-
struct io_mapped_region sq_region;
+ struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
};
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a1dca7bce54a..b346a1f5f353 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2626,26 +2626,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}
-static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
- size_t size)
-{
- return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
- size);
-}
-
static void io_rings_free(struct io_ring_ctx *ctx)
{
- if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
- io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
- true);
- } else {
- io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
- ctx->n_ring_pages = 0;
- vunmap(ctx->rings);
- }
-
io_free_region(ctx, &ctx->sq_region);
-
+ io_free_region(ctx, &ctx->ring_region);
ctx->rings = NULL;
ctx->sq_sqes = NULL;
}
@@ -3476,15 +3460,17 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
if (size == SIZE_MAX)
return -EOVERFLOW;
- if (!(ctx->flags & IORING_SETUP_NO_MMAP))
- rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
- else
- rings = io_rings_map(ctx, p->cq_off.user_addr, size);
-
- if (IS_ERR(rings))
- return PTR_ERR(rings);
+ memset(&rd, 0, sizeof(rd));
+ rd.size = PAGE_ALIGN(size);
+ if (ctx->flags & IORING_SETUP_NO_MMAP) {
+ rd.user_addr = p->cq_off.user_addr;
+ rd.flags |= IORING_MEM_REGION_TYPE_USER;
+ }
+ ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING);
+ if (ret)
+ return ret;
+ ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
- ctx->rings = rings;
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
rings->sq_ring_mask = p->sq_entries - 1;
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 0a2d03bd312b..52afe0576be6 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -118,18 +118,6 @@ void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
*npages = 0;
}
-void io_pages_free(struct page ***pages, int npages)
-{
- struct page **page_array = *pages;
-
- if (!page_array)
- return;
-
- unpin_user_pages(page_array, npages);
- kvfree(page_array);
- *pages = NULL;
-}
-
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
{
unsigned long start, end, nr_pages;
@@ -167,34 +155,6 @@ struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
return ERR_PTR(ret);
}
-void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
- unsigned long uaddr, size_t size)
-{
- struct page **page_array;
- unsigned int nr_pages;
- void *page_addr;
-
- *npages = 0;
-
- if (uaddr & (PAGE_SIZE - 1) || !size)
- return ERR_PTR(-EINVAL);
-
- nr_pages = 0;
- page_array = io_pin_pages(uaddr, size, &nr_pages);
- if (IS_ERR(page_array))
- return page_array;
-
- page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
- if (page_addr) {
- *pages = page_array;
- *npages = nr_pages;
- return page_addr;
- }
-
- io_pages_free(&page_array, nr_pages);
- return ERR_PTR(-ENOMEM);
-}
-
enum {
IO_REGION_F_VMAP = 1,
IO_REGION_F_USER_PINNED = 2,
@@ -383,9 +343,10 @@ static void *io_region_validate_mmap(struct io_ring_ctx *ctx,
static int io_region_mmap(struct io_ring_ctx *ctx,
struct io_mapped_region *mr,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma,
+ unsigned max_pages)
{
- unsigned long nr_pages = mr->nr_pages;
+ unsigned long nr_pages = min(mr->nr_pages, max_pages);
vm_flags_set(vma, VM_DONTEXPAND);
return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
@@ -449,7 +410,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
struct io_ring_ctx *ctx = file->private_data;
size_t sz = vma->vm_end - vma->vm_start;
long offset = vma->vm_pgoff << PAGE_SHIFT;
- unsigned int npages;
+ unsigned int page_limit;
void *ptr;
guard(mutex)(&ctx->mmap_lock);
@@ -461,14 +422,14 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
case IORING_OFF_CQ_RING:
- npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
- return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
+ page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
case IORING_OFF_SQES:
- return io_region_mmap(ctx, &ctx->sq_region, vma);
+ return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
case IORING_OFF_PBUF_RING:
return io_pbuf_mmap(file, vma);
case IORING_MAP_OFF_PARAM_REGION:
- return io_region_mmap(ctx, &ctx->param_region, vma);
+ return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
}
return -EINVAL;
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index 2402bca3d700..7395996eb353 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -4,7 +4,6 @@
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
-void io_pages_free(struct page ***pages, int npages);
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
struct page **pages, int npages);
@@ -13,9 +12,6 @@ void *io_pages_map(struct page ***out_pages, unsigned short *npages,
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
bool put_pages);
-void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
- unsigned long uaddr, size_t size);
-
#ifndef CONFIG_MMU
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
#endif
diff --git a/io_uring/register.c b/io_uring/register.c
index 44cd64923d31..f1698c18c7cb 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -367,26 +367,19 @@ static int io_register_clock(struct io_ring_ctx *ctx,
* either mapping or freeing.
*/
struct io_ring_ctx_rings {
- unsigned short n_ring_pages;
- struct page **ring_pages;
struct io_rings *rings;
-
struct io_uring_sqe *sq_sqes;
+
struct io_mapped_region sq_region;
+ struct io_mapped_region ring_region;
};
static void io_register_free_rings(struct io_ring_ctx *ctx,
struct io_uring_params *p,
struct io_ring_ctx_rings *r)
{
- if (!(p->flags & IORING_SETUP_NO_MMAP)) {
- io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
- true);
- } else {
- io_pages_free(&r->ring_pages, r->n_ring_pages);
- vunmap(r->rings);
- }
io_free_region(ctx, &r->sq_region);
+ io_free_region(ctx, &r->ring_region);
}
#define swap_old(ctx, o, n, field) \
@@ -436,13 +429,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (size == SIZE_MAX)
return -EOVERFLOW;
- if (!(p.flags & IORING_SETUP_NO_MMAP))
- n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
- else
- n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
- p.cq_off.user_addr, size);
- if (IS_ERR(n.rings))
- return PTR_ERR(n.rings);
+ memset(&rd, 0, sizeof(rd));
+ rd.size = PAGE_ALIGN(size);
+ if (p.flags & IORING_SETUP_NO_MMAP) {
+ rd.user_addr = p.cq_off.user_addr;
+ rd.flags |= IORING_MEM_REGION_TYPE_USER;
+ }
+ ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
+ if (ret) {
+ io_register_free_rings(ctx, &p, &n);
+ return ret;
+ }
+ n.rings = io_region_get_ptr(&n.ring_region);
n.rings->sq_ring_mask = p.sq_entries - 1;
n.rings->cq_ring_mask = p.cq_entries - 1;
@@ -552,8 +550,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
ctx->rings = n.rings;
ctx->sq_sqes = n.sq_sqes;
- swap_old(ctx, o, n, n_ring_pages);
- swap_old(ctx, o, n, ring_pages);
+ swap_old(ctx, o, n, ring_region);
swap_old(ctx, o, n, sq_region);
to_free = &o;
ret = 0;
--
2.46.0
next prev parent reply other threads:[~2024-11-24 21:12 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-24 21:12 [PATCH v2 00/18] kernel allocated regions and convert memmap to regions Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 01/18] io_uring: rename ->resize_lock Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 02/18] io_uring/rsrc: export io_check_coalesce_buffer Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 03/18] io_uring/memmap: add internal region flags Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 04/18] io_uring/memmap: flag regions with user pages Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 05/18] io_uring/memmap: account memory before pinning Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 06/18] io_uring/memmap: reuse io_free_region for failure path Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 07/18] io_uring/memmap: optimise single folio regions Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 08/18] io_uring/memmap: helper for pinning region pages Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 09/18] io_uring/memmap: add IO_REGION_F_SINGLE_REF Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 10/18] io_uring/memmap: implement kernel allocated regions Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 11/18] io_uring/memmap: implement mmap for regions Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 12/18] io_uring: pass ctx to io_register_free_rings Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 13/18] io_uring: use region api for SQ Pavel Begunkov
2024-11-24 21:12 ` Pavel Begunkov [this message]
2024-11-24 21:12 ` [PATCH v2 15/18] io_uring/kbuf: use mmap_lock to sync with mmap Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 16/18] io_uring/kbuf: remove pbuf ring refcounting Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 17/18] io_uring/kbuf: use region api for pbuf rings Pavel Begunkov
2024-11-24 21:12 ` [PATCH v2 18/18] io_uring/memmap: unify io_uring mmap'ing code Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=42b3eda88aed4b3542534747cb0ce22744042d98.1732481694.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox