From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [PATCH 4/6] io_uring: introduce memory regions
Date: Thu, 14 Nov 2024 04:14:23 +0000 [thread overview]
Message-ID: <cd8e0927651ecdb99776503e50aa3554573b9a61.1731556844.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
We've got a good number of mappings we share with the userspace, that
includes the main rings, provided buffer rings and at least a couple
more types. And all of them duplicate some of the code for page pinning,
mmap'ing and attempts to optimise it with huge pages.
Introduce a notion of regions. For userspace it's just a new structure
called struct io_uring_region_desc which supposed to parameterise all
such mapping / queues creations. It either represents a user provided
memory, in which case the user_addr field should point to it, or a
request to the kernel to creating the memory, in which case the user is
supposed to mmap it after using the offset returned in the mmap_offset
field. With uniform userspace API we can avoid additional boiler plate
code and when we'd be adding some optimisation it'll be applied to all
mapping types.
Internally, there is a new structure struct io_mapped_region holding all
relevant runtime information and some helpers to work with it. This
patch limits it to user provided regions, which will be extended as a
follow up work.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 6 ++++
include/uapi/linux/io_uring.h | 13 +++++++
io_uring/memmap.c | 65 ++++++++++++++++++++++++++++++++++
io_uring/memmap.h | 14 ++++++++
4 files changed, 98 insertions(+)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 52a5da99a205..1d3a37234ace 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -75,6 +75,12 @@ struct io_hash_table {
unsigned hash_bits;
};
+struct io_mapped_region {
+ struct page **pages;
+ void *vmap_ptr;
+ size_t nr_pages;
+};
+
/*
* Arbitrary limit, can be raised if need be
*/
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 132f5db3d4e8..7ceeccbbf4cb 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -647,6 +647,19 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
+enum {
+ /* initialise with user memory pointed by user_addr */
+ IORING_REGION_USER_MEM = 1,
+};
+
+struct io_uring_region_desc {
+ __u64 user_addr;
+ __u64 size;
+ __u64 flags;
+ __u64 mmap_offset;
+ __u64 __resv[4];
+};
+
/*
* Register a fully sparse file space, rather than pass in an array of all
* -1 file descriptors.
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 6ab59c60dfd0..6b03f5641ef3 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -12,6 +12,7 @@
#include "memmap.h"
#include "kbuf.h"
+#include "rsrc.h"
static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
size_t size, gfp_t gfp)
@@ -194,6 +195,70 @@ void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
return ERR_PTR(-ENOMEM);
}
+void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
+{
+ if (mr->pages)
+ unpin_user_pages(mr->pages, mr->nr_pages);
+ if (mr->vmap_ptr)
+ vunmap(mr->vmap_ptr);
+ if (mr->nr_pages && ctx->user)
+ __io_unaccount_mem(ctx->user, mr->nr_pages);
+
+ memset(mr, 0, sizeof(*mr));
+}
+
+int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg)
+{
+ int pages_accounted = 0;
+ struct page **pages;
+ int nr_pages, ret;
+ void *vptr;
+ u64 end;
+
+ if (WARN_ON_ONCE(mr->pages || mr->vmap_ptr || mr->nr_pages))
+ return -EFAULT;
+ if (memchr_inv(®->__resv, 0, sizeof(reg->__resv)))
+ return -EINVAL;
+ if (reg->flags != IORING_REGION_USER_MEM)
+ return -EINVAL;
+ if (!reg->user_addr)
+ return -EFAULT;
+ if (!reg->size || reg->mmap_offset)
+ return -EINVAL;
+ if ((reg->size >> PAGE_SHIFT) > INT_MAX)
+ return E2BIG;
+ if ((reg->user_addr | reg->size) & ~PAGE_MASK)
+ return -EINVAL;
+ if (check_add_overflow(reg->user_addr, reg->size, &end))
+ return -EOVERFLOW;
+
+ pages = io_pin_pages(reg->user_addr, reg->size, &nr_pages);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ if (ctx->user) {
+ ret = __io_account_mem(ctx->user, nr_pages);
+ if (ret)
+ goto out_free;
+ pages_accounted = nr_pages;
+ }
+
+ vptr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!vptr)
+ goto out_free;
+
+ mr->pages = pages;
+ mr->vmap_ptr = vptr;
+ mr->nr_pages = nr_pages;
+ return 0;
+out_free:
+ if (pages_accounted)
+ __io_unaccount_mem(ctx->user, pages_accounted);
+ io_pages_free(&pages, nr_pages);
+ return ret;
+}
+
static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
size_t sz)
{
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index 5cec5b7ac49a..f361a635b6c7 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -22,4 +22,18 @@ unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags);
int io_uring_mmap(struct file *file, struct vm_area_struct *vma);
+void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr);
+int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg);
+
+static inline void *io_region_get_ptr(struct io_mapped_region *mr)
+{
+ return mr->vmap_ptr;
+}
+
+static inline bool io_region_is_set(struct io_mapped_region *mr)
+{
+ return !!mr->nr_pages;
+}
+
#endif
--
2.46.0
next prev parent reply other threads:[~2024-11-14 4:14 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-14 4:14 [PATCH 0/6] regions, param pre-mapping and reg waits extension Pavel Begunkov
2024-11-14 4:14 ` [PATCH 1/6] io_uring: fortify io_pin_pages with a warning Pavel Begunkov
2024-11-14 4:14 ` [PATCH 2/6] io_uring: disable ENTER_EXT_ARG_REG for IOPOLL Pavel Begunkov
2024-11-14 4:14 ` [PATCH 3/6] io_uring: temporarily disable registered waits Pavel Begunkov
2024-11-14 4:14 ` Pavel Begunkov [this message]
2024-11-15 14:44 ` [PATCH 4/6] io_uring: introduce memory regions Jens Axboe
2024-11-15 15:54 ` Pavel Begunkov
2024-11-14 4:14 ` [PATCH 5/6] io_uring: add parameter region registration Pavel Begunkov
2024-11-14 4:14 ` [PATCH 6/6] io_uring: enable IORING_ENTER_EXT_ARG_REG back Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cd8e0927651ecdb99776503e50aa3554573b9a61.1731556844.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox