From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [PATCH 11/11] io_uring/memmap: implement mmap for regions
Date: Wed, 20 Nov 2024 23:33:34 +0000 [thread overview]
Message-ID: <461a81aac8d96a14c3054585faef7b3bdaa2a759.1732144783.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
The patch implements mmap for the param region and enables the kernel
allocation mode. Internally it uses a fixed mmap offset, however the
user has to use the offset returned in
struct io_uring_region_desc::mmap_offset.
Note, mmap doesn't and can't take ->uring_lock and the region / ring
lookup is protected by ->mmap_lock, and it's directly peeking at
ctx->param_region. We can't protect io_create_region() with the
mmap_lock as it'd deadlock, which is why io_create_region_mmap_safe()
initialises it for us in a temporary variable and then publishes it
with the lock taken. It's intentionally decoupled from main region
helpers, and in the future we might want to have a list of active
regions, which then could be protected by the ->mmap_lock.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/memmap.c | 61 +++++++++++++++++++++++++++++++++++++++++----
io_uring/memmap.h | 10 +++++++-
io_uring/register.c | 6 ++---
3 files changed, 67 insertions(+), 10 deletions(-)
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 8598770bc385..5d971ba33d5a 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -265,7 +265,8 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx,
static int io_region_allocate_pages(struct io_ring_ctx *ctx,
struct io_mapped_region *mr,
- struct io_uring_region_desc *reg)
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset)
{
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
unsigned long size = mr->nr_pages << PAGE_SHIFT;
@@ -280,8 +281,7 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx,
p = io_mem_alloc_compound(pages, mr->nr_pages, size, gfp);
if (!IS_ERR(p)) {
mr->flags |= IO_REGION_F_SINGLE_REF;
- mr->pages = pages;
- return 0;
+ goto done;
}
nr_allocated = alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL,
@@ -292,12 +292,15 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx,
kvfree(pages);
return -ENOMEM;
}
+done:
+ reg->mmap_offset = mmap_offset;
mr->pages = pages;
return 0;
}
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
- struct io_uring_region_desc *reg)
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset)
{
int nr_pages, ret;
u64 end;
@@ -331,7 +334,7 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
if (reg->flags & IORING_MEM_REGION_TYPE_USER)
ret = io_region_pin_pages(ctx, mr, reg);
else
- ret = io_region_allocate_pages(ctx, mr, reg);
+ ret = io_region_allocate_pages(ctx, mr, reg, mmap_offset);
if (ret)
goto out_free;
@@ -344,6 +347,50 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
return ret;
}
+int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset)
+{
+ struct io_mapped_region tmp_mr;
+ int ret;
+
+ memcpy(&tmp_mr, mr, sizeof(tmp_mr));
+ ret = io_create_region(ctx, &tmp_mr, reg, mmap_offset);
+ if (ret)
+ return ret;
+
+ /*
+ * Once published mmap can find it without holding only the ->mmap_lock
+ * and not ->uring_lock.
+ */
+ guard(mutex)(&ctx->mmap_lock);
+ memcpy(mr, &tmp_mr, sizeof(tmp_mr));
+ return 0;
+}
+
+static void *io_region_validate_mmap(struct io_ring_ctx *ctx,
+ struct io_mapped_region *mr)
+{
+ lockdep_assert_held(&ctx->mmap_lock);
+
+ if (!io_region_is_set(mr))
+ return ERR_PTR(-EINVAL);
+ if (mr->flags & IO_REGION_F_USER_PINNED)
+ return ERR_PTR(-EINVAL);
+
+ return io_region_get_ptr(mr);
+}
+
+static int io_region_mmap(struct io_ring_ctx *ctx,
+ struct io_mapped_region *mr,
+ struct vm_area_struct *vma)
+{
+ unsigned long nr_pages = mr->nr_pages;
+
+ vm_flags_set(vma, VM_DONTEXPAND);
+ return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
+}
+
static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
size_t sz)
{
@@ -379,6 +426,8 @@ static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
io_put_bl(ctx, bl);
return ptr;
}
+ case IORING_MAP_OFF_PARAM_REGION:
+ return io_region_validate_mmap(ctx, &ctx->param_region);
}
return ERR_PTR(-EINVAL);
@@ -419,6 +468,8 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
ctx->n_sqe_pages);
case IORING_OFF_PBUF_RING:
return io_pbuf_mmap(file, vma);
+ case IORING_MAP_OFF_PARAM_REGION:
+ return io_region_mmap(ctx, &ctx->param_region, vma);
}
return -EINVAL;
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index 2096a8427277..2402bca3d700 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -1,6 +1,8 @@
#ifndef IO_URING_MEMMAP_H
#define IO_URING_MEMMAP_H
+#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
+
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
void io_pages_free(struct page ***pages, int npages);
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
@@ -24,7 +26,13 @@ int io_uring_mmap(struct file *file, struct vm_area_struct *vma);
void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr);
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
- struct io_uring_region_desc *reg);
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset);
+
+int io_create_region_mmap_safe(struct io_ring_ctx *ctx,
+ struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset);
static inline void *io_region_get_ptr(struct io_mapped_region *mr)
{
diff --git a/io_uring/register.c b/io_uring/register.c
index f043d3f6b026..5b099ec36d00 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -585,9 +585,6 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
rd_uptr = u64_to_user_ptr(reg.region_uptr);
if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
return -EFAULT;
-
- if (!(rd.flags & IORING_MEM_REGION_TYPE_USER))
- return -EINVAL;
if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
return -EINVAL;
if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG)
@@ -602,7 +599,8 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
!(ctx->flags & IORING_SETUP_R_DISABLED))
return -EINVAL;
- ret = io_create_region(ctx, &ctx->param_region, &rd);
+ ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd,
+ IORING_MAP_OFF_PARAM_REGION);
if (ret)
return ret;
if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
--
2.46.0
next prev parent reply other threads:[~2024-11-20 23:33 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-20 23:33 [PATCH 00/11] support kernel allocated regions Pavel Begunkov
2024-11-20 23:33 ` [PATCH 01/11] io_uring: rename ->resize_lock Pavel Begunkov
2024-11-20 23:33 ` [PATCH 02/11] io_uring/rsrc: export io_check_coalesce_buffer Pavel Begunkov
2024-11-20 23:33 ` [PATCH 03/11] io_uring/memmap: add internal region flags Pavel Begunkov
2024-11-20 23:33 ` [PATCH 04/11] io_uring/memmap: flag regions with user pages Pavel Begunkov
2024-11-20 23:33 ` [PATCH 05/11] io_uring/memmap: account memory before pinning Pavel Begunkov
2024-11-20 23:33 ` [PATCH 06/11] io_uring/memmap: reuse io_free_region for failure path Pavel Begunkov
2024-11-20 23:33 ` [PATCH 07/11] io_uring/memmap: optimise single folio regions Pavel Begunkov
2024-11-20 23:33 ` [PATCH 08/11] io_uring/memmap: helper for pinning region pages Pavel Begunkov
2024-11-20 23:33 ` [PATCH 09/11] io_uring/memmap: add IO_REGION_F_SINGLE_REF Pavel Begunkov
2024-11-20 23:33 ` [PATCH 10/11] io_uring/memmap: implement kernel allocated regions Pavel Begunkov
2024-11-20 23:33 ` Pavel Begunkov [this message]
2024-11-21 1:28 ` [PATCH 00/11] support " Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=461a81aac8d96a14c3054585faef7b3bdaa2a759.1732144783.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox