From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [PATCH 5/6] io_uring: add parameter region registration
Date: Thu, 14 Nov 2024 04:14:24 +0000 [thread overview]
Message-ID: <481f7a4973b86038f6b03f0d1e9ce4e127ced315.1731556844.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Allow the user to pre-register a region for passing various paramteres.
To use it for passing wait loop arguments, which is wired in the
following commit, the region has to be registered with the
IORING_PARAM_REGION_WAIT_ARG flag set. The flag also requires the
context to be currently disabled, i.e. IORING_SETUP_R_DISABLED, to avoid
races with otherwise potentially running waiters.
This will also be useful in the future for various request / SQE
arguments like iovec, the meta read/write API, and also for BPF.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 6 ++++
include/uapi/linux/io_uring.h | 13 ++++++++
io_uring/io_uring.c | 1 +
io_uring/register.c | 59 ++++++++++++++++++++++++++++++++++
4 files changed, 79 insertions(+)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1d3a37234ace..aa5f5ea98076 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -324,6 +324,9 @@ struct io_ring_ctx {
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
unsigned cq_extra;
+
+ void *cq_wait_arg;
+ size_t cq_wait_size;
} ____cacheline_aligned_in_smp;
/*
@@ -429,6 +432,9 @@ struct io_ring_ctx {
unsigned short n_sqe_pages;
struct page **ring_pages;
struct page **sqe_pages;
+
+ /* used for optimised request parameter and wait argument passing */
+ struct io_mapped_region param_region;
};
struct io_tw_state {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7ceeccbbf4cb..49b94029c137 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -627,6 +627,8 @@ enum io_uring_register_op {
/* resize CQ ring */
IORING_REGISTER_RESIZE_RINGS = 33,
+ IORING_REGISTER_PARAM_REGION = 34,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -660,6 +662,17 @@ struct io_uring_region_desc {
__u64 __resv[4];
};
+enum {
+ /* expose the region as registered wait arguments */
+ IORING_PARAM_REGION_WAIT_ARG = 1,
+};
+
+struct io_uring_param_region_reg {
+ __u64 region_uptr; /* struct io_uring_region_desc * */
+ __u64 flags;
+ __u64 __resv[2];
+};
+
/*
* Register a fully sparse file space, rather than pass in an array of all
* -1 file descriptors.
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 286b7bb73978..c640b8a4ceee 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2709,6 +2709,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
io_futex_cache_free(ctx);
io_destroy_buffers(ctx);
+ io_free_region(ctx, &ctx->param_region);
mutex_unlock(&ctx->uring_lock);
if (ctx->sq_creds)
put_cred(ctx->sq_creds);
diff --git a/io_uring/register.c b/io_uring/register.c
index 3c5a3cfb186b..d1ba14da37ea 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -570,6 +570,59 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
return ret;
}
+/*
+ * Register a page holding N entries of struct io_uring_reg_wait, which can
+ * be used via io_uring_enter(2) if IORING_GETEVENTS_EXT_ARG_REG is set.
+ * If that is set with IORING_GETEVENTS_EXT_ARG, then instead of passing
+ * in a pointer for a struct io_uring_getevents_arg, an index into this
+ * registered array is passed, avoiding two (arg + timeout) copies per
+ * invocation.
+ */
+static int io_register_mapped_heap(struct io_ring_ctx *ctx, void __user *uarg)
+{
+ struct io_uring_param_region_reg __user *reg_uptr = uarg;
+ struct io_uring_param_region_reg reg;
+ struct io_uring_region_desc __user *rd_uptr;
+ struct io_uring_region_desc rd;
+ int ret;
+
+ if (io_region_is_set(&ctx->param_region))
+ return -EBUSY;
+ if (copy_from_user(®, reg_uptr, sizeof(reg)))
+ return -EFAULT;
+ rd_uptr = u64_to_user_ptr(reg.region_uptr);
+ if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
+ return -EFAULT;
+
+ if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
+ return -EINVAL;
+ if (reg.flags != IORING_PARAM_REGION_WAIT_ARG)
+ return -EINVAL;
+
+ /*
+ * This ensures there are no waiters. Waiters are unlocked and it's
+ * hard to synchronise with them, especially if we need to initialise
+ * the region.
+ */
+ if ((reg.flags & IORING_PARAM_REGION_WAIT_ARG) &&
+ !(ctx->flags & IORING_SETUP_R_DISABLED))
+ return -EINVAL;
+
+ ret = io_create_region(ctx, &ctx->param_region, &rd);
+ if (ret)
+ return ret;
+ if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
+ io_free_region(ctx, &ctx->param_region);
+ return -EFAULT;
+ }
+
+ if (reg.flags & IORING_PARAM_REGION_WAIT_ARG) {
+ ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region);
+ ctx->cq_wait_size = rd.size;
+ }
+ return 0;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -764,6 +817,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_resize_rings(ctx, arg);
break;
+ case IORING_REGISTER_PARAM_REGION:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_mapped_heap(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
--
2.46.0
next prev parent reply other threads:[~2024-11-14 4:14 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-14 4:14 [PATCH 0/6] regions, param pre-mapping and reg waits extension Pavel Begunkov
2024-11-14 4:14 ` [PATCH 1/6] io_uring: fortify io_pin_pages with a warning Pavel Begunkov
2024-11-14 4:14 ` [PATCH 2/6] io_uring: disable ENTER_EXT_ARG_REG for IOPOLL Pavel Begunkov
2024-11-14 4:14 ` [PATCH 3/6] io_uring: temporarily disable registered waits Pavel Begunkov
2024-11-14 4:14 ` [PATCH 4/6] io_uring: introduce memory regions Pavel Begunkov
2024-11-15 14:44 ` Jens Axboe
2024-11-15 15:54 ` Pavel Begunkov
2024-11-14 4:14 ` Pavel Begunkov [this message]
2024-11-14 4:14 ` [PATCH 6/6] io_uring: enable IORING_ENTER_EXT_ARG_REG back Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=481f7a4973b86038f6b03f0d1e9ce4e127ced315.1731556844.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox