public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] buffer table registration cleanup
@ 2025-04-04 16:22 Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 1/4] io_uring/rsrc: avoid assigning buf table on failure Pavel Begunkov
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 16:22 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Cleanup up buffer table registration, and deduplicate it with updates.
Similar series can be done for file tables.

Pavel Begunkov (4):
  io_uring/rsrc: avoid assigning buf table on failure
  io_uring: deduplicate node tagging
  io_uring: early return for sparse buf table registration
  io_uring: reuse buffer updates for registration

 io_uring/rsrc.c | 111 ++++++++++++++++++------------------------------
 1 file changed, 41 insertions(+), 70 deletions(-)

-- 
2.48.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/4] io_uring/rsrc: avoid assigning buf table on failure
  2025-04-04 16:22 [PATCH 0/4] buffer table registration cleanup Pavel Begunkov
@ 2025-04-04 16:22 ` Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 2/4] io_uring: deduplicate node tagging Pavel Begunkov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 16:22 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

The fail path of io_sqe_buffers_register() assigns ->buf_table just to
implicitly pass it into io_sqe_buffers_unregister(). Be more explicit
by using io_rsrc_data_free() for destruction and passing the table to it
directly.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/rsrc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index b36c8825550e..3c6e6e396052 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -914,11 +914,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 		data.nodes[i] = node;
 	}
 
-	ctx->buf_table = data;
 	if (ret) {
-		io_clear_table_tags(&ctx->buf_table);
-		io_sqe_buffers_unregister(ctx);
+		io_clear_table_tags(&data);
+		io_rsrc_data_free(ctx, &data);
+		return ret;
 	}
+	ctx->buf_table = data;
 	return ret;
 }
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] io_uring: deduplicate node tagging
  2025-04-04 16:22 [PATCH 0/4] buffer table registration cleanup Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 1/4] io_uring/rsrc: avoid assigning buf table on failure Pavel Begunkov
@ 2025-04-04 16:22 ` Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 3/4] io_uring: early return for sparse buf table registration Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 4/4] io_uring: reuse buffer updates for registration Pavel Begunkov
  3 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 16:22 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Move tag handling into io_sqe_buffer_register() so that we don't need to
duplicate it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/rsrc.c | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 3c6e6e396052..d5e29536466c 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -27,7 +27,7 @@ struct io_rsrc_update {
 };
 
 static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
-			struct iovec *iov, struct page **last_hpage);
+			struct iovec *iov, struct page **last_hpage, u64 tag);
 
 /* only define max */
 #define IORING_MAX_FIXED_FILES	(1U << 20)
@@ -311,18 +311,11 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 		err = io_buffer_validate(iov);
 		if (err)
 			break;
-		node = io_sqe_buffer_register(ctx, iov, &last_hpage);
+		node = io_sqe_buffer_register(ctx, iov, &last_hpage, tag);
 		if (IS_ERR(node)) {
 			err = PTR_ERR(node);
 			break;
 		}
-		if (tag) {
-			if (!node) {
-				err = -EINVAL;
-				break;
-			}
-			node->tag = tag;
-		}
 		i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
 		io_reset_rsrc_node(ctx, &ctx->buf_table, i);
 		ctx->buf_table.nodes[i] = node;
@@ -771,7 +764,8 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
 
 static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 						   struct iovec *iov,
-						   struct page **last_hpage)
+						   struct page **last_hpage,
+						   u64 tag)
 {
 	struct io_mapped_ubuf *imu = NULL;
 	struct page **pages = NULL;
@@ -782,12 +776,16 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 	struct io_imu_folio_data data;
 	bool coalesced = false;
 
-	if (!iov->iov_base)
+	if (!iov->iov_base) {
+		if (tag)
+			return ERR_PTR(-EINVAL);
 		return NULL;
+	}
 
 	node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
+	node->tag = tag;
 
 	ret = -ENOMEM;
 	pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
@@ -899,18 +897,11 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 			}
 		}
 
-		node = io_sqe_buffer_register(ctx, iov, &last_hpage);
+		node = io_sqe_buffer_register(ctx, iov, &last_hpage, tag);
 		if (IS_ERR(node)) {
 			ret = PTR_ERR(node);
 			break;
 		}
-		if (tag) {
-			if (!node) {
-				ret = -EINVAL;
-				break;
-			}
-			node->tag = tag;
-		}
 		data.nodes[i] = node;
 	}
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] io_uring: early return for sparse buf table registration
  2025-04-04 16:22 [PATCH 0/4] buffer table registration cleanup Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 1/4] io_uring/rsrc: avoid assigning buf table on failure Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 2/4] io_uring: deduplicate node tagging Pavel Begunkov
@ 2025-04-04 16:22 ` Pavel Begunkov
  2025-04-04 16:22 ` [PATCH 4/4] io_uring: reuse buffer updates for registration Pavel Begunkov
  3 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 16:22 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

We don't need to do anything after io_rsrc_data_alloc() if the goal is
to create an empty buffer table, so just return earlier and clean up the
rest of io_sqe_buffers_register(). It should be particularly nice with
IORING_RSRC_REGISTER_SPARSE.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/rsrc.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d5e29536466c..958eee7b4a47 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -867,28 +867,28 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	if (ret)
 		return ret;
 
-	if (!arg)
-		memset(iov, 0, sizeof(*iov));
+	if (!arg) {
+		ctx->buf_table = data;
+		return 0;
+	}
 
 	for (i = 0; i < nr_args; i++) {
 		struct io_rsrc_node *node;
 		u64 tag = 0;
 
-		if (arg) {
-			uvec = (struct iovec __user *) arg;
-			iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
-			if (IS_ERR(iov)) {
-				ret = PTR_ERR(iov);
-				break;
-			}
-			ret = io_buffer_validate(iov);
-			if (ret)
-				break;
-			if (ctx->compat)
-				arg += sizeof(struct compat_iovec);
-			else
-				arg += sizeof(struct iovec);
+		uvec = (struct iovec __user *) arg;
+		iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
+		if (IS_ERR(iov)) {
+			ret = PTR_ERR(iov);
+			break;
 		}
+		ret = io_buffer_validate(iov);
+		if (ret)
+			break;
+		if (ctx->compat)
+			arg += sizeof(struct compat_iovec);
+		else
+			arg += sizeof(struct iovec);
 
 		if (tags) {
 			if (copy_from_user(&tag, &tags[i], sizeof(tag))) {
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] io_uring: reuse buffer updates for registration
  2025-04-04 16:22 [PATCH 0/4] buffer table registration cleanup Pavel Begunkov
                   ` (2 preceding siblings ...)
  2025-04-04 16:22 ` [PATCH 3/4] io_uring: early return for sparse buf table registration Pavel Begunkov
@ 2025-04-04 16:22 ` Pavel Begunkov
  2025-04-04 16:38   ` Jens Axboe
  3 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 16:22 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

After registering an empty buffer table, we can reuse the code for
buffer updates to actually register buffers.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/rsrc.c | 85 +++++++++++++++++++------------------------------
 1 file changed, 32 insertions(+), 53 deletions(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 958eee7b4a47..6b5ec1504dcd 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -277,9 +277,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 	return done ? done : err;
 }
 
-static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
-				   struct io_uring_rsrc_update2 *up,
-				   unsigned int nr_args)
+static int io_buffer_table_update(struct io_ring_ctx *ctx,
+				  struct io_rsrc_data *buf_table,
+				  struct io_uring_rsrc_update2 *up,
+				  unsigned int nr_args,
+				  unsigned *last_error)
 {
 	u64 __user *tags = u64_to_user_ptr(up->tags);
 	struct iovec fast_iov, *iov;
@@ -289,9 +291,9 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 	__u32 done;
 	int i, err;
 
-	if (!ctx->buf_table.nr)
+	if (!buf_table->nr)
 		return -ENXIO;
-	if (up->offset + nr_args > ctx->buf_table.nr)
+	if (up->offset + nr_args > buf_table->nr)
 		return -EINVAL;
 
 	for (done = 0; done < nr_args; done++) {
@@ -316,17 +318,26 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 			err = PTR_ERR(node);
 			break;
 		}
-		i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
-		io_reset_rsrc_node(ctx, &ctx->buf_table, i);
-		ctx->buf_table.nodes[i] = node;
+		i = array_index_nospec(up->offset + done, buf_table->nr);
+		io_reset_rsrc_node(ctx, buf_table, i);
+		buf_table->nodes[i] = node;
 		if (ctx->compat)
 			user_data += sizeof(struct compat_iovec);
 		else
 			user_data += sizeof(struct iovec);
 	}
+	if (last_error)
+		*last_error = err;
 	return done ? done : err;
 }
 
+static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
+				   struct io_uring_rsrc_update2 *up,
+				   unsigned int nr_args)
+{
+	return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, NULL);
+}
+
 static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 				     struct io_uring_rsrc_update2 *up,
 				     unsigned nr_args)
@@ -851,11 +862,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 			    unsigned int nr_args, u64 __user *tags)
 {
-	struct page *last_hpage = NULL;
 	struct io_rsrc_data data;
-	struct iovec fast_iov, *iov = &fast_iov;
-	const struct iovec __user *uvec;
-	int i, ret;
+	int ret, err;
 
 	BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
 
@@ -867,51 +875,22 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	if (ret)
 		return ret;
 
-	if (!arg) {
-		ctx->buf_table = data;
-		return 0;
-	}
-
-	for (i = 0; i < nr_args; i++) {
-		struct io_rsrc_node *node;
-		u64 tag = 0;
-
-		uvec = (struct iovec __user *) arg;
-		iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
-		if (IS_ERR(iov)) {
-			ret = PTR_ERR(iov);
-			break;
-		}
-		ret = io_buffer_validate(iov);
-		if (ret)
-			break;
-		if (ctx->compat)
-			arg += sizeof(struct compat_iovec);
-		else
-			arg += sizeof(struct iovec);
-
-		if (tags) {
-			if (copy_from_user(&tag, &tags[i], sizeof(tag))) {
-				ret = -EFAULT;
-				break;
-			}
-		}
+	if (arg) {
+		struct io_uring_rsrc_update2 update_arg = {
+			.tags = (u64)(unsigned long)tags,
+			.data = (u64)(unsigned long)arg,
+			.offset = 0,
+		};
 
-		node = io_sqe_buffer_register(ctx, iov, &last_hpage, tag);
-		if (IS_ERR(node)) {
-			ret = PTR_ERR(node);
-			break;
+		ret = io_buffer_table_update(ctx, &data, &update_arg, nr_args, &err);
+		if (ret != nr_args) {
+			io_clear_table_tags(&data);
+			io_rsrc_data_free(ctx, &data);
+			return err;
 		}
-		data.nodes[i] = node;
-	}
-
-	if (ret) {
-		io_clear_table_tags(&data);
-		io_rsrc_data_free(ctx, &data);
-		return ret;
 	}
 	ctx->buf_table = data;
-	return ret;
+	return 0;
 }
 
 int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] io_uring: reuse buffer updates for registration
  2025-04-04 16:22 ` [PATCH 4/4] io_uring: reuse buffer updates for registration Pavel Begunkov
@ 2025-04-04 16:38   ` Jens Axboe
  2025-04-04 21:38     ` Pavel Begunkov
  0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2025-04-04 16:38 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/4/25 10:22 AM, Pavel Begunkov wrote:
> @@ -316,17 +318,26 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>  			err = PTR_ERR(node);
>  			break;
>  		}
> -		i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
> -		io_reset_rsrc_node(ctx, &ctx->buf_table, i);
> -		ctx->buf_table.nodes[i] = node;
> +		i = array_index_nospec(up->offset + done, buf_table->nr);
> +		io_reset_rsrc_node(ctx, buf_table, i);
> +		buf_table->nodes[i] = node;
>  		if (ctx->compat)
>  			user_data += sizeof(struct compat_iovec);
>  		else
>  			user_data += sizeof(struct iovec);
>  	}
> +	if (last_error)
> +		*last_error = err;
>  	return done ? done : err;
>  }
>  
> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
> +				   struct io_uring_rsrc_update2 *up,
> +				   unsigned int nr_args)
> +{
> +	return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, NULL);
> +}

Minor style preference, but just do:

	unsigned last_err;
	return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, &last_err);

and skip that last_error could be conditionally set?

Outside of that, no comments on the series, nice cleanups.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] io_uring: reuse buffer updates for registration
  2025-04-04 16:38   ` Jens Axboe
@ 2025-04-04 21:38     ` Pavel Begunkov
  2025-04-04 22:00       ` Jens Axboe
  0 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2025-04-04 21:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 4/4/25 17:38, Jens Axboe wrote:
> On 4/4/25 10:22 AM, Pavel Begunkov wrote:
>> @@ -316,17 +318,26 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>>   			err = PTR_ERR(node);
>>   			break;
>>   		}
>> -		i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
>> -		io_reset_rsrc_node(ctx, &ctx->buf_table, i);
>> -		ctx->buf_table.nodes[i] = node;
>> +		i = array_index_nospec(up->offset + done, buf_table->nr);
>> +		io_reset_rsrc_node(ctx, buf_table, i);
>> +		buf_table->nodes[i] = node;
>>   		if (ctx->compat)
>>   			user_data += sizeof(struct compat_iovec);
>>   		else
>>   			user_data += sizeof(struct iovec);
>>   	}
>> +	if (last_error)
>> +		*last_error = err;
>>   	return done ? done : err;
>>   }
>>   
>> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>> +				   struct io_uring_rsrc_update2 *up,
>> +				   unsigned int nr_args)
>> +{
>> +	return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, NULL);
>> +}
> 
> Minor style preference, but just do:
> 
> 	unsigned last_err;
> 	return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, &last_err);
> 
> and skip that last_error could be conditionally set?

I can't say I see how that's better though

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] io_uring: reuse buffer updates for registration
  2025-04-04 21:38     ` Pavel Begunkov
@ 2025-04-04 22:00       ` Jens Axboe
  0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2025-04-04 22:00 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/4/25 3:38 PM, Pavel Begunkov wrote:
> On 4/4/25 17:38, Jens Axboe wrote:
>> On 4/4/25 10:22 AM, Pavel Begunkov wrote:
>>> @@ -316,17 +318,26 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>>>               err = PTR_ERR(node);
>>>               break;
>>>           }
>>> -        i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
>>> -        io_reset_rsrc_node(ctx, &ctx->buf_table, i);
>>> -        ctx->buf_table.nodes[i] = node;
>>> +        i = array_index_nospec(up->offset + done, buf_table->nr);
>>> +        io_reset_rsrc_node(ctx, buf_table, i);
>>> +        buf_table->nodes[i] = node;
>>>           if (ctx->compat)
>>>               user_data += sizeof(struct compat_iovec);
>>>           else
>>>               user_data += sizeof(struct iovec);
>>>       }
>>> +    if (last_error)
>>> +        *last_error = err;
>>>       return done ? done : err;
>>>   }
>>>   +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>>> +                   struct io_uring_rsrc_update2 *up,
>>> +                   unsigned int nr_args)
>>> +{
>>> +    return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, NULL);
>>> +}
>>
>> Minor style preference, but just do:
>>
>>     unsigned last_err;
>>     return io_buffer_table_update(ctx, &ctx->buf_table, up, nr_args, &last_err);
>>
>> and skip that last_error could be conditionally set?
> 
> I can't say I see how that's better though

I think so though, it makes the store unconditional and there's no risk
of anything forgetting that last_error is sometimes valid, sometimes
not. The last part is the most annoying to me, it's just more fragile
imho.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-04-04 22:01 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-04 16:22 [PATCH 0/4] buffer table registration cleanup Pavel Begunkov
2025-04-04 16:22 ` [PATCH 1/4] io_uring/rsrc: avoid assigning buf table on failure Pavel Begunkov
2025-04-04 16:22 ` [PATCH 2/4] io_uring: deduplicate node tagging Pavel Begunkov
2025-04-04 16:22 ` [PATCH 3/4] io_uring: early return for sparse buf table registration Pavel Begunkov
2025-04-04 16:22 ` [PATCH 4/4] io_uring: reuse buffer updates for registration Pavel Begunkov
2025-04-04 16:38   ` Jens Axboe
2025-04-04 21:38     ` Pavel Begunkov
2025-04-04 22:00       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox