public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements
Date: Wed, 30 Oct 2024 19:44:56 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Currently cloning a buffer table will fail if the destination already has
a table. But it should be possible to use it to replace existing elements.
Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow
the destination to already having a buffer table. If that is the case,
then entries designated by offset + nr buffers will be replaced if they
already exist.

Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have
an existing table, in which case it'll work just like not having the
flag set and an empty table - it'll just assign the newly created table
for that case.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/uapi/linux/io_uring.h |  3 +-
 io_uring/rsrc.c               | 66 +++++++++++++++++++++++++++--------
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cc8dbe78c126..ce58c4590de6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -713,7 +713,8 @@ struct io_uring_clock_register {
 };
 
 enum {
-	IORING_REGISTER_SRC_REGISTERED = 1,
+	IORING_REGISTER_SRC_REGISTERED	= (1U << 0),
+	IORING_REGISTER_DST_REPLACE	= (1U << 1),
 };
 
 struct io_uring_clone_buffers {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d00870128bb9..673ff00da727 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -927,8 +927,40 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
 static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
 			    struct io_uring_clone_buffers *arg)
 {
-	int i, ret, nbufs, off, nr;
 	struct io_rsrc_data data;
+	int i, ret, off, nr;
+	unsigned int nbufs;
+
+	/* if offsets are given, must have nr specified too */
+	if (!arg->nr && (arg->dst_off || arg->src_off))
+		return -EINVAL;
+	/* not allowed unless REPLACE is set */
+	if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
+		return -EBUSY;
+
+	nbufs = READ_ONCE(src_ctx->buf_table.nr);
+	if (!arg->nr)
+		arg->nr = nbufs;
+	else if (arg->nr > nbufs)
+		return -EINVAL;
+	else if (arg->nr > IORING_MAX_REG_BUFFERS)
+		return -EINVAL;
+	if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
+		return -EOVERFLOW;
+
+	ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr));
+	if (ret)
+		return ret;
+
+	/* Fill entries in data from dst that won't overlap with src */
+	for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) {
+		struct io_rsrc_node *src_node = ctx->buf_table.nodes[i];
+
+		if (src_node) {
+			data.nodes[i] = src_node;
+			src_node->refs++;
+		}
+	}
 
 	/*
 	 * Drop our own lock here. We'll setup the data we need and reference
@@ -951,14 +983,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 		goto out_unlock;
 	if (off > nbufs)
 		goto out_unlock;
-	if (check_add_overflow(arg->nr, arg->dst_off, &off))
-		goto out_unlock;
-	ret = -EINVAL;
-	if (off > IORING_MAX_REG_BUFFERS)
-		goto out_unlock;
-	ret = io_rsrc_data_alloc(&data, off);
-	if (ret)
-		goto out_unlock;
 
 	off = arg->dst_off;
 	i = arg->src_off;
@@ -986,6 +1010,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
 	mutex_unlock(&src_ctx->uring_lock);
 	mutex_lock(&ctx->uring_lock);
+
+	/*
+	 * If asked for replace, put the old table. data->nodes[] holds both
+	 * old and new nodes at this point.
+	 */
+	if (arg->flags & IORING_REGISTER_DST_REPLACE)
+		io_rsrc_data_free(&ctx->buf_table);
+
+	/*
+	 * ctx->buf_table should be empty now - either the contents are being
+	 * replaced and we just freed the table, or someone raced setting up
+	 * a buffer table while the clone was happening. If not empty, fall
+	 * through to failure handling.
+	 */
 	if (!ctx->buf_table.nr) {
 		ctx->buf_table = data;
 		return 0;
@@ -995,14 +1033,14 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
 	mutex_lock(&src_ctx->uring_lock);
 	/* someone raced setting up buffers, dump ours */
 	ret = -EBUSY;
-	i = nbufs;
 out_put_free:
+	i = data.nr;
 	while (i--) {
 		io_buffer_unmap(src_ctx, data.nodes[i]);
 		kfree(data.nodes[i]);
 	}
-	io_rsrc_data_free(&data);
 out_unlock:
+	io_rsrc_data_free(&data);
 	mutex_unlock(&src_ctx->uring_lock);
 	mutex_lock(&ctx->uring_lock);
 	return ret;
@@ -1022,12 +1060,12 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
 	struct file *file;
 	int ret;
 
-	if (ctx->buf_table.nr)
-		return -EBUSY;
 	if (copy_from_user(&buf, arg, sizeof(buf)))
 		return -EFAULT;
-	if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED)
+	if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE))
 		return -EINVAL;
+	if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr)
+		return -EBUSY;
 	if (memchr_inv(buf.pad, 0, sizeof(buf.pad)))
 		return -EINVAL;
 
-- 
2.45.2


  parent reply	other threads:[~2024-10-31  1:46 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-31  1:44 [PATCHSET v2 0/2] Add support for cloning partial buffer sets Jens Axboe
2024-10-31  1:44 ` [PATCH 1/2] io_uring/rsrc: allow cloning at an offset Jens Axboe
2024-10-31  1:44 ` Jens Axboe [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-10-30 16:54 [PATCHSET 0/2] Add support for cloning partial buffer sets Jens Axboe
2024-10-30 16:54 ` [PATCH 2/2] io_uring/rsrc: allow cloning with node replacements Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox