public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 5.12] io_uring: Convert personality_idr to XArray
@ 2021-03-08 14:16 Pavel Begunkov
  2021-03-08 14:22 ` Pavel Begunkov
  2021-03-09 20:53 ` Jens Axboe
  0 siblings, 2 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-03-08 14:16 UTC (permalink / raw)
  To: Jens Axboe, io-uring
  Cc: Matthew Wilcox, yangerkun, asml.silence, Stefan Metzmacher

From: "Matthew Wilcox (Oracle)" <[email protected]>

You can't call idr_remove() from within a idr_for_each() callback,
but you can call xa_erase() from an xa_for_each() loop, so switch the
entire personality_idr from the IDR to the XArray.  This manifests as a
use-after-free as idr_for_each() attempts to walk the rest of the node
after removing the last entry from it.

Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
Cc: [email protected] # 5.6+
Reported-by: yangerkun <[email protected]>
Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
[Pavel: rebased (creds load was moved into io_init_req())]
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

p.s. passes liburing tests well

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ef9f836cccc..5505e19f1391 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -407,7 +407,8 @@ struct io_ring_ctx {
 
 	struct idr		io_buffer_idr;
 
-	struct idr		personality_idr;
+	struct xarray		personalities;
+	u32			pers_next;
 
 	struct {
 		unsigned		cached_cq_tail;
@@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_completion(&ctx->ref_comp);
 	init_completion(&ctx->sq_thread_comp);
 	idr_init(&ctx->io_buffer_idr);
-	idr_init(&ctx->personality_idr);
+	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
 	spin_lock_init(&ctx->completion_lock);
@@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	req->work.list.next = NULL;
 	personality = READ_ONCE(sqe->personality);
 	if (personality) {
-		req->work.creds = idr_find(&ctx->personality_idr, personality);
+		req->work.creds = xa_load(&ctx->personalities, personality);
 		if (!req->work.creds)
 			return -EINVAL;
 		get_cred(req->work.creds);
@@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 	io_eventfd_unregister(ctx);
 	io_destroy_buffers(ctx);
-	idr_destroy(&ctx->personality_idr);
 
 #if defined(CONFIG_UNIX)
 	if (ctx->ring_sock) {
@@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 {
 	const struct cred *creds;
 
-	creds = idr_remove(&ctx->personality_idr, id);
+	creds = xa_erase(&ctx->personalities, id);
 	if (creds) {
 		put_cred(creds);
 		return 0;
@@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 	return -EINVAL;
 }
 
-static int io_remove_personalities(int id, void *p, void *data)
-{
-	struct io_ring_ctx *ctx = data;
-
-	io_unregister_personality(ctx, id);
-	return 0;
-}
-
 static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
 {
 	struct callback_head *work, *next;
@@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
 
 static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 {
+	unsigned long index;
+	struct creds *creds;
+
 	mutex_lock(&ctx->uring_lock);
 	percpu_ref_kill(&ctx->refs);
 	/* if force is set, the ring is going away. always drop after that */
 	ctx->cq_overflow_flushed = 1;
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
-	idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
+	xa_for_each(&ctx->personalities, index, creds)
+		io_unregister_personality(ctx, index);
 	mutex_unlock(&ctx->uring_lock);
 
 	io_kill_timeouts(ctx, NULL, NULL);
@@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 }
 
 #ifdef CONFIG_PROC_FS
-static int io_uring_show_cred(int id, void *p, void *data)
+static int io_uring_show_cred(struct seq_file *m, unsigned int id,
+		const struct cred *cred)
 {
-	const struct cred *cred = p;
-	struct seq_file *m = data;
 	struct user_namespace *uns = seq_user_ns(m);
 	struct group_info *gi;
 	kernel_cap_t cap;
@@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
 						(unsigned int) buf->len);
 	}
-	if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
+	if (has_lock && !xa_empty(&ctx->personalities)) {
+		unsigned long index;
+		const struct cred *cred;
+
 		seq_printf(m, "Personalities:\n");
-		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
+		xa_for_each(&ctx->personalities, index, cred)
+			io_uring_show_cred(m, index, cred);
 	}
 	seq_printf(m, "PollList:\n");
 	spin_lock_irq(&ctx->completion_lock);
@@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 static int io_register_personality(struct io_ring_ctx *ctx)
 {
 	const struct cred *creds;
+	u32 id;
 	int ret;
 
 	creds = get_current_cred();
 
-	ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
-				USHRT_MAX, GFP_KERNEL);
-	if (ret < 0)
-		put_cred(creds);
+	ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
+			XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
+	if (!ret)
+		return id;
+	put_cred(creds);
 	return ret;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
@ 2021-03-08 14:22 ` Pavel Begunkov
  2021-03-08 16:16   ` Matthew Wilcox
  2021-03-09 11:23   ` yangerkun
  2021-03-09 20:53 ` Jens Axboe
  1 sibling, 2 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-03-08 14:22 UTC (permalink / raw)
  To: Jens Axboe, io-uring
  Cc: Matthew Wilcox, yangerkun, Stefan Metzmacher, yi.zhang

On 08/03/2021 14:16, Pavel Begunkov wrote:
> From: "Matthew Wilcox (Oracle)" <[email protected]>
> 
> You can't call idr_remove() from within a idr_for_each() callback,
> but you can call xa_erase() from an xa_for_each() loop, so switch the
> entire personality_idr from the IDR to the XArray.  This manifests as a
> use-after-free as idr_for_each() attempts to walk the rest of the node
> after removing the last entry from it.

yangerkun, can you test it and similarly take care of buffer idr?


> 
> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
> Cc: [email protected] # 5.6+
> Reported-by: yangerkun <[email protected]>
> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
> [Pavel: rebased (creds load was moved into io_init_req())]
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>  fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
>  1 file changed, 24 insertions(+), 23 deletions(-)
> 
> p.s. passes liburing tests well
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5ef9f836cccc..5505e19f1391 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>  
>  	struct idr		io_buffer_idr;
>  
> -	struct idr		personality_idr;
> +	struct xarray		personalities;
> +	u32			pers_next;
>  
>  	struct {
>  		unsigned		cached_cq_tail;
> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>  	init_completion(&ctx->ref_comp);
>  	init_completion(&ctx->sq_thread_comp);
>  	idr_init(&ctx->io_buffer_idr);
> -	idr_init(&ctx->personality_idr);
> +	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>  	mutex_init(&ctx->uring_lock);
>  	init_waitqueue_head(&ctx->wait);
>  	spin_lock_init(&ctx->completion_lock);
> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>  	req->work.list.next = NULL;
>  	personality = READ_ONCE(sqe->personality);
>  	if (personality) {
> -		req->work.creds = idr_find(&ctx->personality_idr, personality);
> +		req->work.creds = xa_load(&ctx->personalities, personality);
>  		if (!req->work.creds)
>  			return -EINVAL;
>  		get_cred(req->work.creds);
> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
>  	mutex_unlock(&ctx->uring_lock);
>  	io_eventfd_unregister(ctx);
>  	io_destroy_buffers(ctx);
> -	idr_destroy(&ctx->personality_idr);
>  
>  #if defined(CONFIG_UNIX)
>  	if (ctx->ring_sock) {
> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>  {
>  	const struct cred *creds;
>  
> -	creds = idr_remove(&ctx->personality_idr, id);
> +	creds = xa_erase(&ctx->personalities, id);
>  	if (creds) {
>  		put_cred(creds);
>  		return 0;
> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>  	return -EINVAL;
>  }
>  
> -static int io_remove_personalities(int id, void *p, void *data)
> -{
> -	struct io_ring_ctx *ctx = data;
> -
> -	io_unregister_personality(ctx, id);
> -	return 0;
> -}
> -
>  static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
>  {
>  	struct callback_head *work, *next;
> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
>  
>  static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
>  {
> +	unsigned long index;
> +	struct creds *creds;
> +
>  	mutex_lock(&ctx->uring_lock);
>  	percpu_ref_kill(&ctx->refs);
>  	/* if force is set, the ring is going away. always drop after that */
>  	ctx->cq_overflow_flushed = 1;
>  	if (ctx->rings)
>  		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
> -	idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
> +	xa_for_each(&ctx->personalities, index, creds)
> +		io_unregister_personality(ctx, index);
>  	mutex_unlock(&ctx->uring_lock);
>  
>  	io_kill_timeouts(ctx, NULL, NULL);
> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
>  }
>  
>  #ifdef CONFIG_PROC_FS
> -static int io_uring_show_cred(int id, void *p, void *data)
> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
> +		const struct cred *cred)
>  {
> -	const struct cred *cred = p;
> -	struct seq_file *m = data;
>  	struct user_namespace *uns = seq_user_ns(m);
>  	struct group_info *gi;
>  	kernel_cap_t cap;
> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
>  		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
>  						(unsigned int) buf->len);
>  	}
> -	if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
> +	if (has_lock && !xa_empty(&ctx->personalities)) {
> +		unsigned long index;
> +		const struct cred *cred;
> +
>  		seq_printf(m, "Personalities:\n");
> -		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
> +		xa_for_each(&ctx->personalities, index, cred)
> +			io_uring_show_cred(m, index, cred);
>  	}
>  	seq_printf(m, "PollList:\n");
>  	spin_lock_irq(&ctx->completion_lock);
> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
>  static int io_register_personality(struct io_ring_ctx *ctx)
>  {
>  	const struct cred *creds;
> +	u32 id;
>  	int ret;
>  
>  	creds = get_current_cred();
>  
> -	ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
> -				USHRT_MAX, GFP_KERNEL);
> -	if (ret < 0)
> -		put_cred(creds);
> +	ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
> +			XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
> +	if (!ret)
> +		return id;
> +	put_cred(creds);
>  	return ret;
>  }
>  
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-08 14:22 ` Pavel Begunkov
@ 2021-03-08 16:16   ` Matthew Wilcox
  2021-03-09 11:23   ` yangerkun
  1 sibling, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-03-08 16:16 UTC (permalink / raw)
  To: Pavel Begunkov
  Cc: Jens Axboe, io-uring, yangerkun, Stefan Metzmacher, yi.zhang

On Mon, Mar 08, 2021 at 02:22:10PM +0000, Pavel Begunkov wrote:
> On 08/03/2021 14:16, Pavel Begunkov wrote:
> > From: "Matthew Wilcox (Oracle)" <[email protected]>
> > 
> > You can't call idr_remove() from within a idr_for_each() callback,
> > but you can call xa_erase() from an xa_for_each() loop, so switch the
> > entire personality_idr from the IDR to the XArray.  This manifests as a
> > use-after-free as idr_for_each() attempts to walk the rest of the node
> > after removing the last entry from it.
> 
> yangerkun, can you test it and similarly take care of buffer idr?

FWIW, I did a fairly naive conversion of the personalities IDR, because
efficiency really isn't the most important (you don't have a lot of
personalities, generally).  the buffer_idr seems like it might see a
lot more action than the personalities, so you might want to consider
something like:

+++ b/fs/io_uring.c
@@ -8543,7 +8543,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
        if (ctx->rings)
                __io_cqring_overflow_flush(ctx, true, NULL, NULL);
        xa_for_each(&ctx->personalities, index, creds)
-               io_unregister_personality(ctx, index);
+               put_cred(creds);
+       xa_destroy(&ctx->personalities);
        mutex_unlock(&ctx->uring_lock);
 
        io_kill_timeouts(ctx, NULL, NULL);

to be more efficient.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-08 14:22 ` Pavel Begunkov
  2021-03-08 16:16   ` Matthew Wilcox
@ 2021-03-09 11:23   ` yangerkun
  2021-03-13  8:02     ` yangerkun
  1 sibling, 1 reply; 12+ messages in thread
From: yangerkun @ 2021-03-09 11:23 UTC (permalink / raw)
  To: Pavel Begunkov, Jens Axboe, io-uring
  Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang



在 2021/3/8 22:22, Pavel Begunkov 写道:
> On 08/03/2021 14:16, Pavel Begunkov wrote:
>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>
>> You can't call idr_remove() from within a idr_for_each() callback,
>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>> entire personality_idr from the IDR to the XArray.  This manifests as a
>> use-after-free as idr_for_each() attempts to walk the rest of the node
>> after removing the last entry from it.
> 
> yangerkun, can you test it and similarly take care of buffer idr?

Will try it latter :)

> 
> 
>>
>> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
>> Cc: [email protected] # 5.6+
>> Reported-by: yangerkun <[email protected]>
>> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
>> [Pavel: rebased (creds load was moved into io_init_req())]
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>>   fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
>>   1 file changed, 24 insertions(+), 23 deletions(-)
>>
>> p.s. passes liburing tests well
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 5ef9f836cccc..5505e19f1391 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>>   
>>   	struct idr		io_buffer_idr;
>>   
>> -	struct idr		personality_idr;
>> +	struct xarray		personalities;
>> +	u32			pers_next;
>>   
>>   	struct {
>>   		unsigned		cached_cq_tail;
>> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>>   	init_completion(&ctx->ref_comp);
>>   	init_completion(&ctx->sq_thread_comp);
>>   	idr_init(&ctx->io_buffer_idr);
>> -	idr_init(&ctx->personality_idr);
>> +	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>>   	mutex_init(&ctx->uring_lock);
>>   	init_waitqueue_head(&ctx->wait);
>>   	spin_lock_init(&ctx->completion_lock);
>> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>   	req->work.list.next = NULL;
>>   	personality = READ_ONCE(sqe->personality);
>>   	if (personality) {
>> -		req->work.creds = idr_find(&ctx->personality_idr, personality);
>> +		req->work.creds = xa_load(&ctx->personalities, personality);
>>   		if (!req->work.creds)
>>   			return -EINVAL;
>>   		get_cred(req->work.creds);
>> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
>>   	mutex_unlock(&ctx->uring_lock);
>>   	io_eventfd_unregister(ctx);
>>   	io_destroy_buffers(ctx);
>> -	idr_destroy(&ctx->personality_idr);
>>   
>>   #if defined(CONFIG_UNIX)
>>   	if (ctx->ring_sock) {
>> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>>   {
>>   	const struct cred *creds;
>>   
>> -	creds = idr_remove(&ctx->personality_idr, id);
>> +	creds = xa_erase(&ctx->personalities, id);
>>   	if (creds) {
>>   		put_cred(creds);
>>   		return 0;
>> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>>   	return -EINVAL;
>>   }
>>   
>> -static int io_remove_personalities(int id, void *p, void *data)
>> -{
>> -	struct io_ring_ctx *ctx = data;
>> -
>> -	io_unregister_personality(ctx, id);
>> -	return 0;
>> -}
>> -
>>   static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
>>   {
>>   	struct callback_head *work, *next;
>> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
>>   
>>   static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
>>   {
>> +	unsigned long index;
>> +	struct creds *creds;
>> +
>>   	mutex_lock(&ctx->uring_lock);
>>   	percpu_ref_kill(&ctx->refs);
>>   	/* if force is set, the ring is going away. always drop after that */
>>   	ctx->cq_overflow_flushed = 1;
>>   	if (ctx->rings)
>>   		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
>> -	idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
>> +	xa_for_each(&ctx->personalities, index, creds)
>> +		io_unregister_personality(ctx, index);
>>   	mutex_unlock(&ctx->uring_lock);
>>   
>>   	io_kill_timeouts(ctx, NULL, NULL);
>> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
>>   }
>>   
>>   #ifdef CONFIG_PROC_FS
>> -static int io_uring_show_cred(int id, void *p, void *data)
>> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
>> +		const struct cred *cred)
>>   {
>> -	const struct cred *cred = p;
>> -	struct seq_file *m = data;
>>   	struct user_namespace *uns = seq_user_ns(m);
>>   	struct group_info *gi;
>>   	kernel_cap_t cap;
>> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
>>   		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
>>   						(unsigned int) buf->len);
>>   	}
>> -	if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
>> +	if (has_lock && !xa_empty(&ctx->personalities)) {
>> +		unsigned long index;
>> +		const struct cred *cred;
>> +
>>   		seq_printf(m, "Personalities:\n");
>> -		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
>> +		xa_for_each(&ctx->personalities, index, cred)
>> +			io_uring_show_cred(m, index, cred);
>>   	}
>>   	seq_printf(m, "PollList:\n");
>>   	spin_lock_irq(&ctx->completion_lock);
>> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
>>   static int io_register_personality(struct io_ring_ctx *ctx)
>>   {
>>   	const struct cred *creds;
>> +	u32 id;
>>   	int ret;
>>   
>>   	creds = get_current_cred();
>>   
>> -	ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
>> -				USHRT_MAX, GFP_KERNEL);
>> -	if (ret < 0)
>> -		put_cred(creds);
>> +	ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
>> +			XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
>> +	if (!ret)
>> +		return id;
>> +	put_cred(creds);
>>   	return ret;
>>   }
>>   
>>
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
  2021-03-08 14:22 ` Pavel Begunkov
@ 2021-03-09 20:53 ` Jens Axboe
  1 sibling, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-03-09 20:53 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring; +Cc: Matthew Wilcox, yangerkun, Stefan Metzmacher

On 3/8/21 7:16 AM, Pavel Begunkov wrote:
> From: "Matthew Wilcox (Oracle)" <[email protected]>
> 
> You can't call idr_remove() from within a idr_for_each() callback,
> but you can call xa_erase() from an xa_for_each() loop, so switch the
> entire personality_idr from the IDR to the XArray.  This manifests as a
> use-after-free as idr_for_each() attempts to walk the rest of the node
> after removing the last entry from it.

I applied this one yesterday, just forgot to reply here. I agree with
Matthew's optimization, though I suspect we'll have a few creds at
most and it won't make much of a difference in real life. Buffers would
likely be a lot more plentiful, so it's worth keeping in mind for
that one.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-09 11:23   ` yangerkun
@ 2021-03-13  8:02     ` yangerkun
  2021-03-13 15:34       ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: yangerkun @ 2021-03-13  8:02 UTC (permalink / raw)
  To: Pavel Begunkov, Jens Axboe, io-uring
  Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang



在 2021/3/9 19:23, yangerkun 写道:
> 
> 
> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>
>>> You can't call idr_remove() from within a idr_for_each() callback,
>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>> entire personality_idr from the IDR to the XArray.  This manifests as a
>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>> after removing the last entry from it.
>>
>> yangerkun, can you test it and similarly take care of buffer idr?
> 
> Will try it latter :)

Sorry for the latter reply. The patch pass the testcase.

Besides, should we apply this patch first to deal with the same UAF for
io_buffer_idr before convert to XArray?

https://lore.kernel.org/io-uring/[email protected]/T/#u

> 
>>
>>
>>>
>>> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
>>> Cc: [email protected] # 5.6+
>>> Reported-by: yangerkun <[email protected]>
>>> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
>>> [Pavel: rebased (creds load was moved into io_init_req())]
>>> Signed-off-by: Pavel Begunkov <[email protected]>
>>> ---
>>>   fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
>>>   1 file changed, 24 insertions(+), 23 deletions(-)
>>>
>>> p.s. passes liburing tests well
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 5ef9f836cccc..5505e19f1391 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>>>       struct idr        io_buffer_idr;
>>> -    struct idr        personality_idr;
>>> +    struct xarray        personalities;
>>> +    u32            pers_next;
>>>       struct {
>>>           unsigned        cached_cq_tail;
>>> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx 
>>> *io_ring_ctx_alloc(struct io_uring_params *p)
>>>       init_completion(&ctx->ref_comp);
>>>       init_completion(&ctx->sq_thread_comp);
>>>       idr_init(&ctx->io_buffer_idr);
>>> -    idr_init(&ctx->personality_idr);
>>> +    xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>>>       mutex_init(&ctx->uring_lock);
>>>       init_waitqueue_head(&ctx->wait);
>>>       spin_lock_init(&ctx->completion_lock);
>>> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, 
>>> struct io_kiocb *req,
>>>       req->work.list.next = NULL;
>>>       personality = READ_ONCE(sqe->personality);
>>>       if (personality) {
>>> -        req->work.creds = idr_find(&ctx->personality_idr, personality);
>>> +        req->work.creds = xa_load(&ctx->personalities, personality);
>>>           if (!req->work.creds)
>>>               return -EINVAL;
>>>           get_cred(req->work.creds);
>>> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx 
>>> *ctx)
>>>       mutex_unlock(&ctx->uring_lock);
>>>       io_eventfd_unregister(ctx);
>>>       io_destroy_buffers(ctx);
>>> -    idr_destroy(&ctx->personality_idr);
>>>   #if defined(CONFIG_UNIX)
>>>       if (ctx->ring_sock) {
>>> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct 
>>> io_ring_ctx *ctx, unsigned id)
>>>   {
>>>       const struct cred *creds;
>>> -    creds = idr_remove(&ctx->personality_idr, id);
>>> +    creds = xa_erase(&ctx->personalities, id);
>>>       if (creds) {
>>>           put_cred(creds);
>>>           return 0;
>>> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct 
>>> io_ring_ctx *ctx, unsigned id)
>>>       return -EINVAL;
>>>   }
>>> -static int io_remove_personalities(int id, void *p, void *data)
>>> -{
>>> -    struct io_ring_ctx *ctx = data;
>>> -
>>> -    io_unregister_personality(ctx, id);
>>> -    return 0;
>>> -}
>>> -
>>>   static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
>>>   {
>>>       struct callback_head *work, *next;
>>> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct 
>>> work_struct *work)
>>>   static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
>>>   {
>>> +    unsigned long index;
>>> +    struct creds *creds;
>>> +
>>>       mutex_lock(&ctx->uring_lock);
>>>       percpu_ref_kill(&ctx->refs);
>>>       /* if force is set, the ring is going away. always drop after 
>>> that */
>>>       ctx->cq_overflow_flushed = 1;
>>>       if (ctx->rings)
>>>           __io_cqring_overflow_flush(ctx, true, NULL, NULL);
>>> -    idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
>>> +    xa_for_each(&ctx->personalities, index, creds)
>>> +        io_unregister_personality(ctx, index);
>>>       mutex_unlock(&ctx->uring_lock);
>>>       io_kill_timeouts(ctx, NULL, NULL);
>>> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, 
>>> fd, u32, to_submit,
>>>   }
>>>   #ifdef CONFIG_PROC_FS
>>> -static int io_uring_show_cred(int id, void *p, void *data)
>>> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
>>> +        const struct cred *cred)
>>>   {
>>> -    const struct cred *cred = p;
>>> -    struct seq_file *m = data;
>>>       struct user_namespace *uns = seq_user_ns(m);
>>>       struct group_info *gi;
>>>       kernel_cap_t cap;
>>> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct 
>>> io_ring_ctx *ctx, struct seq_file *m)
>>>           seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
>>>                           (unsigned int) buf->len);
>>>       }
>>> -    if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
>>> +    if (has_lock && !xa_empty(&ctx->personalities)) {
>>> +        unsigned long index;
>>> +        const struct cred *cred;
>>> +
>>>           seq_printf(m, "Personalities:\n");
>>> -        idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
>>> +        xa_for_each(&ctx->personalities, index, cred)
>>> +            io_uring_show_cred(m, index, cred);
>>>       }
>>>       seq_printf(m, "PollList:\n");
>>>       spin_lock_irq(&ctx->completion_lock);
>>> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, 
>>> void __user *arg, unsigned nr_args)
>>>   static int io_register_personality(struct io_ring_ctx *ctx)
>>>   {
>>>       const struct cred *creds;
>>> +    u32 id;
>>>       int ret;
>>>       creds = get_current_cred();
>>> -    ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
>>> -                USHRT_MAX, GFP_KERNEL);
>>> -    if (ret < 0)
>>> -        put_cred(creds);
>>> +    ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
>>> +            XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
>>> +    if (!ret)
>>> +        return id;
>>> +    put_cred(creds);
>>>       return ret;
>>>   }
>>>
>>
> .

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13  8:02     ` yangerkun
@ 2021-03-13 15:34       ` Jens Axboe
  2021-03-13 19:01         ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 15:34 UTC (permalink / raw)
  To: yangerkun, Pavel Begunkov, io-uring
  Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang

On 3/13/21 1:02 AM, yangerkun wrote:
> 
> 
> 在 2021/3/9 19:23, yangerkun 写道:
>>
>>
>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>
>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>> entire personality_idr from the IDR to the XArray.  This manifests as a
>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>> after removing the last entry from it.
>>>
>>> yangerkun, can you test it and similarly take care of buffer idr?
>>
>> Will try it latter :)
> 
> Sorry for the latter reply. The patch pass the testcase.
> 
> Besides, should we apply this patch first to deal with the same UAF for
> io_buffer_idr before convert to XArray?
> 
> https://lore.kernel.org/io-uring/[email protected]/T/#u

Agree, and then defer an xarray conversion to 5.13. I'll take a look at
your patch and get it applied.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13 15:34       ` Jens Axboe
@ 2021-03-13 19:01         ` Jens Axboe
  2021-03-13 19:30           ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 19:01 UTC (permalink / raw)
  To: yangerkun, Pavel Begunkov, io-uring
  Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang

On 3/13/21 8:34 AM, Jens Axboe wrote:
> On 3/13/21 1:02 AM, yangerkun wrote:
>>
>>
>> 在 2021/3/9 19:23, yangerkun 写道:
>>>
>>>
>>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>>
>>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>>> entire personality_idr from the IDR to the XArray.  This manifests as a
>>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>>> after removing the last entry from it.
>>>>
>>>> yangerkun, can you test it and similarly take care of buffer idr?
>>>
>>> Will try it latter :)
>>
>> Sorry for the latter reply. The patch pass the testcase.
>>
>> Besides, should we apply this patch first to deal with the same UAF for
>> io_buffer_idr before convert to XArray?
>>
>> https://lore.kernel.org/io-uring/[email protected]/T/#u
> 
> Agree, and then defer an xarray conversion to 5.13. I'll take a look at
> your patch and get it applied.

That one is very broken, it both fails removal cases and it's got leak
issues too.

I'm going to take a look at just doing xarray instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13 19:01         ` Jens Axboe
@ 2021-03-13 19:30           ` Jens Axboe
  2021-03-13 19:54             ` Matthew Wilcox
  0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 19:30 UTC (permalink / raw)
  To: yangerkun, Pavel Begunkov, io-uring
  Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang

On 3/13/21 12:01 PM, Jens Axboe wrote:
> On 3/13/21 8:34 AM, Jens Axboe wrote:
>> On 3/13/21 1:02 AM, yangerkun wrote:
>>>
>>>
>>> 在 2021/3/9 19:23, yangerkun 写道:
>>>>
>>>>
>>>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>>>
>>>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>>>> entire personality_idr from the IDR to the XArray.  This manifests as a
>>>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>>>> after removing the last entry from it.
>>>>>
>>>>> yangerkun, can you test it and similarly take care of buffer idr?
>>>>
>>>> Will try it latter :)
>>>
>>> Sorry for the latter reply. The patch pass the testcase.
>>>
>>> Besides, should we apply this patch first to deal with the same UAF for
>>> io_buffer_idr before convert to XArray?
>>>
>>> https://lore.kernel.org/io-uring/[email protected]/T/#u
>>
>> Agree, and then defer an xarray conversion to 5.13. I'll take a look at
>> your patch and get it applied.
> 
> That one is very broken, it both fails removal cases and it's got leak
> issues too.
> 
> I'm going to take a look at just doing xarray instead.

Something like the below - tested as working for me, and has no leaks.


From: Jens Axboe <[email protected]>
Subject: [PATCH] io_uring: convert io_buffer_idr to XArray

Like we did for the personality idr, convert the IO buffer idr to use
XArray. This avoids a use-after-free on removal of entries, since idr
doesn't like doing so from inside an iterator.

Fixes: 5a2e745d4d43 ("io_uring: buffer registration infrastructure")
Cc: [email protected]
Reported-by: Hulk Robot <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 49 ++++++++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05adc4887ef3..3d259a120f0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -402,7 +402,8 @@ struct io_ring_ctx {
 	struct socket		*ring_sock;
 #endif
 
-	struct idr		io_buffer_idr;
+	struct xarray		io_buffer;
+	u32			io_buffer_next;
 
 	struct xarray		personalities;
 	u32			pers_next;
@@ -1135,7 +1136,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	init_completion(&ctx->ref_comp);
-	idr_init(&ctx->io_buffer_idr);
+	xa_init_flags(&ctx->io_buffer, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
@@ -2843,7 +2844,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 
 	lockdep_assert_held(&req->ctx->uring_lock);
 
-	head = idr_find(&req->ctx->io_buffer_idr, bgid);
+	head = xa_load(&req->ctx->io_buffer, bgid);
 	if (head) {
 		if (!list_empty(&head->list)) {
 			kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 			list_del(&kbuf->list);
 		} else {
 			kbuf = head;
-			idr_remove(&req->ctx->io_buffer_idr, bgid);
+			__xa_erase(&req->ctx->io_buffer, bgid);
 		}
 		if (*len > kbuf->len)
 			*len = kbuf->len;
@@ -3892,7 +3893,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 	}
 	i++;
 	kfree(buf);
-	idr_remove(&ctx->io_buffer_idr, bgid);
+	__xa_erase(&ctx->io_buffer, bgid);
 
 	return i;
 }
@@ -3910,7 +3911,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
 
 	ret = -ENOENT;
-	head = idr_find(&ctx->io_buffer_idr, p->bgid);
+	head = xa_load(&ctx->io_buffer, p->bgid);
 	if (head)
 		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
 	if (ret < 0)
@@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	lockdep_assert_held(&ctx->uring_lock);
 
-	list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+	list = head = xa_load(&ctx->io_buffer, p->bgid);
 
 	ret = io_add_buffers(p, &head);
-	if (ret < 0)
-		goto out;
+	if (ret >= 0 && !list) {
+		u32 id = -1U;
 
-	if (!list) {
-		ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
-					GFP_KERNEL);
-		if (ret < 0) {
+		ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
+					XA_LIMIT(0, USHRT_MAX),
+					&ctx->io_buffer_next, GFP_KERNEL);
+		if (ret < 0)
 			__io_remove_buffers(ctx, head, p->bgid, -1U);
-			goto out;
-		}
+		else
+			ret = id;
 	}
-out:
 	if (ret < 0)
 		req_set_fail_links(req);
 
@@ -8333,19 +8333,14 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	return -ENXIO;
 }
 
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
-	struct io_ring_ctx *ctx = data;
-	struct io_buffer *buf = p;
-
-	__io_remove_buffers(ctx, buf, id, -1U);
-	return 0;
-}
-
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-	idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
-	idr_destroy(&ctx->io_buffer_idr);
+	struct io_buffer *buf;
+	unsigned long index;
+
+	xa_for_each(&ctx->io_buffer, index, buf)
+		__io_remove_buffers(ctx, buf, index, -1U);
+	xa_destroy(&ctx->io_buffer);
 }
 
 static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
-- 
2.30.2


-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13 19:30           ` Jens Axboe
@ 2021-03-13 19:54             ` Matthew Wilcox
  2021-03-13 20:13               ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2021-03-13 19:54 UTC (permalink / raw)
  To: Jens Axboe
  Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang

On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
>  			list_del(&kbuf->list);
>  		} else {
>  			kbuf = head;
> -			idr_remove(&req->ctx->io_buffer_idr, bgid);
> +			__xa_erase(&req->ctx->io_buffer, bgid);

Umm ... __xa_erase()?  Did you enable all the lockdep infrastructure?
This should have tripped some of the debugging code because I don't think
you're holding the xa_lock.

> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>  
>  	lockdep_assert_held(&ctx->uring_lock);
>  
> -	list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
> +	list = head = xa_load(&ctx->io_buffer, p->bgid);
>  
>  	ret = io_add_buffers(p, &head);
> -	if (ret < 0)
> -		goto out;
> +	if (ret >= 0 && !list) {
> +		u32 id = -1U;
>  
> -	if (!list) {
> -		ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
> -					GFP_KERNEL);
> -		if (ret < 0) {
> +		ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
> +					XA_LIMIT(0, USHRT_MAX),
> +					&ctx->io_buffer_next, GFP_KERNEL);

I don't understand why this works.  The equivalent transformation here
would have been:

		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);

with various options to handle it differently.

>  static void io_destroy_buffers(struct io_ring_ctx *ctx)
>  {
> -	idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
> -	idr_destroy(&ctx->io_buffer_idr);
> +	struct io_buffer *buf;
> +	unsigned long index;
> +
> +	xa_for_each(&ctx->io_buffer, index, buf)
> +		__io_remove_buffers(ctx, buf, index, -1U);
> +	xa_destroy(&ctx->io_buffer);

Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything.  If that
loop didn't empty the array, something is terribly wrong and we should
know about it somehow instead of making the memory leak harder to find.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13 19:54             ` Matthew Wilcox
@ 2021-03-13 20:13               ` Jens Axboe
  2021-03-13 20:22                 ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 20:13 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang

On 3/13/21 12:54 PM, Matthew Wilcox wrote:
> On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
>> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
>>  			list_del(&kbuf->list);
>>  		} else {
>>  			kbuf = head;
>> -			idr_remove(&req->ctx->io_buffer_idr, bgid);
>> +			__xa_erase(&req->ctx->io_buffer, bgid);
> 
> Umm ... __xa_erase()?  Did you enable all the lockdep infrastructure?
> This should have tripped some of the debugging code because I don't think
> you're holding the xa_lock.

Not run with lockdep - and probably my misunderstanding, do we need xa_lock()
if we provide our own locking?

>> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>>  
>>  	lockdep_assert_held(&ctx->uring_lock);
>>  
>> -	list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
>> +	list = head = xa_load(&ctx->io_buffer, p->bgid);
>>  
>>  	ret = io_add_buffers(p, &head);
>> -	if (ret < 0)
>> -		goto out;
>> +	if (ret >= 0 && !list) {
>> +		u32 id = -1U;
>>  
>> -	if (!list) {
>> -		ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
>> -					GFP_KERNEL);
>> -		if (ret < 0) {
>> +		ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
>> +					XA_LIMIT(0, USHRT_MAX),
>> +					&ctx->io_buffer_next, GFP_KERNEL);
> 
> I don't understand why this works.  The equivalent transformation here
> would have been:
> 
> 		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
> 
> with various options to handle it differently.

True, that does look kinda weird (and wrong). I'll fix that up.

>>  static void io_destroy_buffers(struct io_ring_ctx *ctx)
>>  {
>> -	idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
>> -	idr_destroy(&ctx->io_buffer_idr);
>> +	struct io_buffer *buf;
>> +	unsigned long index;
>> +
>> +	xa_for_each(&ctx->io_buffer, index, buf)
>> +		__io_remove_buffers(ctx, buf, index, -1U);
>> +	xa_destroy(&ctx->io_buffer);
> 
> Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything.  If that
> loop didn't empty the array, something is terribly wrong and we should
> know about it somehow instead of making the memory leak harder to find.

Probably also my misunderstanding - do I not need to call xa_destroy()
if I prune all the members? Assumed we needed it to free some internal
state, but maybe that's not the case?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
  2021-03-13 20:13               ` Jens Axboe
@ 2021-03-13 20:22                 ` Jens Axboe
  0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 20:22 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang

On 3/13/21 1:13 PM, Jens Axboe wrote:
> On 3/13/21 12:54 PM, Matthew Wilcox wrote:
>> On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
>>> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
>>>  			list_del(&kbuf->list);
>>>  		} else {
>>>  			kbuf = head;
>>> -			idr_remove(&req->ctx->io_buffer_idr, bgid);
>>> +			__xa_erase(&req->ctx->io_buffer, bgid);
>>
>> Umm ... __xa_erase()?  Did you enable all the lockdep infrastructure?
>> This should have tripped some of the debugging code because I don't think
>> you're holding the xa_lock.
> 
> Not run with lockdep - and probably my misunderstanding, do we need xa_lock()
> if we provide our own locking?
> 
>>> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>>>  
>>>  	lockdep_assert_held(&ctx->uring_lock);
>>>  
>>> -	list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
>>> +	list = head = xa_load(&ctx->io_buffer, p->bgid);
>>>  
>>>  	ret = io_add_buffers(p, &head);
>>> -	if (ret < 0)
>>> -		goto out;
>>> +	if (ret >= 0 && !list) {
>>> +		u32 id = -1U;
>>>  
>>> -	if (!list) {
>>> -		ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
>>> -					GFP_KERNEL);
>>> -		if (ret < 0) {
>>> +		ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
>>> +					XA_LIMIT(0, USHRT_MAX),
>>> +					&ctx->io_buffer_next, GFP_KERNEL);
>>
>> I don't understand why this works.  The equivalent transformation here
>> would have been:
>>
>> 		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
>>
>> with various options to handle it differently.
> 
> True, that does look kinda weird (and wrong). I'll fix that up.
> 
>>>  static void io_destroy_buffers(struct io_ring_ctx *ctx)
>>>  {
>>> -	idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
>>> -	idr_destroy(&ctx->io_buffer_idr);
>>> +	struct io_buffer *buf;
>>> +	unsigned long index;
>>> +
>>> +	xa_for_each(&ctx->io_buffer, index, buf)
>>> +		__io_remove_buffers(ctx, buf, index, -1U);
>>> +	xa_destroy(&ctx->io_buffer);
>>
>> Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything.  If that
>> loop didn't empty the array, something is terribly wrong and we should
>> know about it somehow instead of making the memory leak harder to find.
> 
> Probably also my misunderstanding - do I not need to call xa_destroy()
> if I prune all the members? Assumed we needed it to free some internal
> state, but maybe that's not the case?

Here's a v2. Verified no leaks with the killed xa_destroy(), and that
lockdep is happy. BTW, much better API, which is evident from the fact
that a conversion like this ends up with the below diffstat:

 io_uring.c |   43 +++++++++++++++----------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)


commit 51c681e3487d091b447175088bcf546f5ce1bf35
Author: Jens Axboe <[email protected]>
Date:   Sat Mar 13 12:29:43 2021 -0700

    io_uring: convert io_buffer_idr to XArray
    
    Like we did for the personality idr, convert the IO buffer idr to use
    XArray. This avoids a use-after-free on removal of entries, since idr
    doesn't like doing so from inside an iterator.
    
    Fixes: 5a2e745d4d43 ("io_uring: buffer registration infrastructure")
    Cc: [email protected]
    Reported-by: Hulk Robot <[email protected]>
    Signed-off-by: Jens Axboe <[email protected]>

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05adc4887ef3..642ad08d8964 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -402,7 +402,7 @@ struct io_ring_ctx {
 	struct socket		*ring_sock;
 #endif
 
-	struct idr		io_buffer_idr;
+	struct xarray		io_buffer;
 
 	struct xarray		personalities;
 	u32			pers_next;
@@ -1135,7 +1135,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	init_completion(&ctx->ref_comp);
-	idr_init(&ctx->io_buffer_idr);
+	xa_init_flags(&ctx->io_buffer, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
@@ -2843,7 +2843,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 
 	lockdep_assert_held(&req->ctx->uring_lock);
 
-	head = idr_find(&req->ctx->io_buffer_idr, bgid);
+	head = xa_load(&req->ctx->io_buffer, bgid);
 	if (head) {
 		if (!list_empty(&head->list)) {
 			kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -2851,7 +2851,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 			list_del(&kbuf->list);
 		} else {
 			kbuf = head;
-			idr_remove(&req->ctx->io_buffer_idr, bgid);
+			xa_erase(&req->ctx->io_buffer, bgid);
 		}
 		if (*len > kbuf->len)
 			*len = kbuf->len;
@@ -3892,7 +3892,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 	}
 	i++;
 	kfree(buf);
-	idr_remove(&ctx->io_buffer_idr, bgid);
+	xa_erase(&ctx->io_buffer, bgid);
 
 	return i;
 }
@@ -3910,7 +3910,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
 
 	ret = -ENOENT;
-	head = idr_find(&ctx->io_buffer_idr, p->bgid);
+	head = xa_load(&ctx->io_buffer, p->bgid);
 	if (head)
 		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
 	if (ret < 0)
@@ -3993,21 +3993,14 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	lockdep_assert_held(&ctx->uring_lock);
 
-	list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+	list = head = xa_load(&ctx->io_buffer, p->bgid);
 
 	ret = io_add_buffers(p, &head);
-	if (ret < 0)
-		goto out;
-
-	if (!list) {
-		ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
-					GFP_KERNEL);
-		if (ret < 0) {
+	if (ret >= 0 && !list) {
+		ret = xa_insert(&ctx->io_buffer, p->bgid, head, GFP_KERNEL);
+		if (ret < 0)
 			__io_remove_buffers(ctx, head, p->bgid, -1U);
-			goto out;
-		}
 	}
-out:
 	if (ret < 0)
 		req_set_fail_links(req);
 
@@ -8333,19 +8326,13 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	return -ENXIO;
 }
 
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
-	struct io_ring_ctx *ctx = data;
-	struct io_buffer *buf = p;
-
-	__io_remove_buffers(ctx, buf, id, -1U);
-	return 0;
-}
-
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-	idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
-	idr_destroy(&ctx->io_buffer_idr);
+	struct io_buffer *buf;
+	unsigned long index;
+
+	xa_for_each(&ctx->io_buffer, index, buf)
+		__io_remove_buffers(ctx, buf, index, -1U);
 }
 
 static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-03-13 20:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
2021-03-08 14:22 ` Pavel Begunkov
2021-03-08 16:16   ` Matthew Wilcox
2021-03-09 11:23   ` yangerkun
2021-03-13  8:02     ` yangerkun
2021-03-13 15:34       ` Jens Axboe
2021-03-13 19:01         ` Jens Axboe
2021-03-13 19:30           ` Jens Axboe
2021-03-13 19:54             ` Matthew Wilcox
2021-03-13 20:13               ` Jens Axboe
2021-03-13 20:22                 ` Jens Axboe
2021-03-09 20:53 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox