* [PATCH 5.12] io_uring: Convert personality_idr to XArray
@ 2021-03-08 14:16 Pavel Begunkov
2021-03-08 14:22 ` Pavel Begunkov
2021-03-09 20:53 ` Jens Axboe
0 siblings, 2 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-03-08 14:16 UTC (permalink / raw)
To: Jens Axboe, io-uring
Cc: Matthew Wilcox, yangerkun, asml.silence, Stefan Metzmacher
From: "Matthew Wilcox (Oracle)" <[email protected]>
You can't call idr_remove() from within a idr_for_each() callback,
but you can call xa_erase() from an xa_for_each() loop, so switch the
entire personality_idr from the IDR to the XArray. This manifests as a
use-after-free as idr_for_each() attempts to walk the rest of the node
after removing the last entry from it.
Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
Cc: [email protected] # 5.6+
Reported-by: yangerkun <[email protected]>
Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
[Pavel: rebased (creds load was moved into io_init_req())]
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
p.s. passes liburing tests well
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ef9f836cccc..5505e19f1391 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -407,7 +407,8 @@ struct io_ring_ctx {
struct idr io_buffer_idr;
- struct idr personality_idr;
+ struct xarray personalities;
+ u32 pers_next;
struct {
unsigned cached_cq_tail;
@@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_completion(&ctx->ref_comp);
init_completion(&ctx->sq_thread_comp);
idr_init(&ctx->io_buffer_idr);
- idr_init(&ctx->personality_idr);
+ xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
@@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->work.list.next = NULL;
personality = READ_ONCE(sqe->personality);
if (personality) {
- req->work.creds = idr_find(&ctx->personality_idr, personality);
+ req->work.creds = xa_load(&ctx->personalities, personality);
if (!req->work.creds)
return -EINVAL;
get_cred(req->work.creds);
@@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
- idr_destroy(&ctx->personality_idr);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
@@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
{
const struct cred *creds;
- creds = idr_remove(&ctx->personality_idr, id);
+ creds = xa_erase(&ctx->personalities, id);
if (creds) {
put_cred(creds);
return 0;
@@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
return -EINVAL;
}
-static int io_remove_personalities(int id, void *p, void *data)
-{
- struct io_ring_ctx *ctx = data;
-
- io_unregister_personality(ctx, id);
- return 0;
-}
-
static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
{
struct callback_head *work, *next;
@@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
+ unsigned long index;
+ struct creds *creds;
+
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
/* if force is set, the ring is going away. always drop after that */
ctx->cq_overflow_flushed = 1;
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true, NULL, NULL);
- idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
+ xa_for_each(&ctx->personalities, index, creds)
+ io_unregister_personality(ctx, index);
mutex_unlock(&ctx->uring_lock);
io_kill_timeouts(ctx, NULL, NULL);
@@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
#ifdef CONFIG_PROC_FS
-static int io_uring_show_cred(int id, void *p, void *data)
+static int io_uring_show_cred(struct seq_file *m, unsigned int id,
+ const struct cred *cred)
{
- const struct cred *cred = p;
- struct seq_file *m = data;
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
kernel_cap_t cap;
@@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
(unsigned int) buf->len);
}
- if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
+ if (has_lock && !xa_empty(&ctx->personalities)) {
+ unsigned long index;
+ const struct cred *cred;
+
seq_printf(m, "Personalities:\n");
- idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
+ xa_for_each(&ctx->personalities, index, cred)
+ io_uring_show_cred(m, index, cred);
}
seq_printf(m, "PollList:\n");
spin_lock_irq(&ctx->completion_lock);
@@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
static int io_register_personality(struct io_ring_ctx *ctx)
{
const struct cred *creds;
+ u32 id;
int ret;
creds = get_current_cred();
- ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
- USHRT_MAX, GFP_KERNEL);
- if (ret < 0)
- put_cred(creds);
+ ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
+ XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
+ if (!ret)
+ return id;
+ put_cred(creds);
return ret;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
@ 2021-03-08 14:22 ` Pavel Begunkov
2021-03-08 16:16 ` Matthew Wilcox
2021-03-09 11:23 ` yangerkun
2021-03-09 20:53 ` Jens Axboe
1 sibling, 2 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-03-08 14:22 UTC (permalink / raw)
To: Jens Axboe, io-uring
Cc: Matthew Wilcox, yangerkun, Stefan Metzmacher, yi.zhang
On 08/03/2021 14:16, Pavel Begunkov wrote:
> From: "Matthew Wilcox (Oracle)" <[email protected]>
>
> You can't call idr_remove() from within a idr_for_each() callback,
> but you can call xa_erase() from an xa_for_each() loop, so switch the
> entire personality_idr from the IDR to the XArray. This manifests as a
> use-after-free as idr_for_each() attempts to walk the rest of the node
> after removing the last entry from it.
yangerkun, can you test it and similarly take care of buffer idr?
>
> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
> Cc: [email protected] # 5.6+
> Reported-by: yangerkun <[email protected]>
> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
> [Pavel: rebased (creds load was moved into io_init_req())]
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
> 1 file changed, 24 insertions(+), 23 deletions(-)
>
> p.s. passes liburing tests well
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5ef9f836cccc..5505e19f1391 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>
> struct idr io_buffer_idr;
>
> - struct idr personality_idr;
> + struct xarray personalities;
> + u32 pers_next;
>
> struct {
> unsigned cached_cq_tail;
> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> init_completion(&ctx->ref_comp);
> init_completion(&ctx->sq_thread_comp);
> idr_init(&ctx->io_buffer_idr);
> - idr_init(&ctx->personality_idr);
> + xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
> mutex_init(&ctx->uring_lock);
> init_waitqueue_head(&ctx->wait);
> spin_lock_init(&ctx->completion_lock);
> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
> req->work.list.next = NULL;
> personality = READ_ONCE(sqe->personality);
> if (personality) {
> - req->work.creds = idr_find(&ctx->personality_idr, personality);
> + req->work.creds = xa_load(&ctx->personalities, personality);
> if (!req->work.creds)
> return -EINVAL;
> get_cred(req->work.creds);
> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
> mutex_unlock(&ctx->uring_lock);
> io_eventfd_unregister(ctx);
> io_destroy_buffers(ctx);
> - idr_destroy(&ctx->personality_idr);
>
> #if defined(CONFIG_UNIX)
> if (ctx->ring_sock) {
> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
> {
> const struct cred *creds;
>
> - creds = idr_remove(&ctx->personality_idr, id);
> + creds = xa_erase(&ctx->personalities, id);
> if (creds) {
> put_cred(creds);
> return 0;
> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
> return -EINVAL;
> }
>
> -static int io_remove_personalities(int id, void *p, void *data)
> -{
> - struct io_ring_ctx *ctx = data;
> -
> - io_unregister_personality(ctx, id);
> - return 0;
> -}
> -
> static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
> {
> struct callback_head *work, *next;
> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
>
> static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
> {
> + unsigned long index;
> + struct creds *creds;
> +
> mutex_lock(&ctx->uring_lock);
> percpu_ref_kill(&ctx->refs);
> /* if force is set, the ring is going away. always drop after that */
> ctx->cq_overflow_flushed = 1;
> if (ctx->rings)
> __io_cqring_overflow_flush(ctx, true, NULL, NULL);
> - idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
> + xa_for_each(&ctx->personalities, index, creds)
> + io_unregister_personality(ctx, index);
> mutex_unlock(&ctx->uring_lock);
>
> io_kill_timeouts(ctx, NULL, NULL);
> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
> }
>
> #ifdef CONFIG_PROC_FS
> -static int io_uring_show_cred(int id, void *p, void *data)
> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
> + const struct cred *cred)
> {
> - const struct cred *cred = p;
> - struct seq_file *m = data;
> struct user_namespace *uns = seq_user_ns(m);
> struct group_info *gi;
> kernel_cap_t cap;
> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
> seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
> (unsigned int) buf->len);
> }
> - if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
> + if (has_lock && !xa_empty(&ctx->personalities)) {
> + unsigned long index;
> + const struct cred *cred;
> +
> seq_printf(m, "Personalities:\n");
> - idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
> + xa_for_each(&ctx->personalities, index, cred)
> + io_uring_show_cred(m, index, cred);
> }
> seq_printf(m, "PollList:\n");
> spin_lock_irq(&ctx->completion_lock);
> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
> static int io_register_personality(struct io_ring_ctx *ctx)
> {
> const struct cred *creds;
> + u32 id;
> int ret;
>
> creds = get_current_cred();
>
> - ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
> - USHRT_MAX, GFP_KERNEL);
> - if (ret < 0)
> - put_cred(creds);
> + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
> + XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
> + if (!ret)
> + return id;
> + put_cred(creds);
> return ret;
> }
>
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-08 14:22 ` Pavel Begunkov
@ 2021-03-08 16:16 ` Matthew Wilcox
2021-03-09 11:23 ` yangerkun
1 sibling, 0 replies; 12+ messages in thread
From: Matthew Wilcox @ 2021-03-08 16:16 UTC (permalink / raw)
To: Pavel Begunkov
Cc: Jens Axboe, io-uring, yangerkun, Stefan Metzmacher, yi.zhang
On Mon, Mar 08, 2021 at 02:22:10PM +0000, Pavel Begunkov wrote:
> On 08/03/2021 14:16, Pavel Begunkov wrote:
> > From: "Matthew Wilcox (Oracle)" <[email protected]>
> >
> > You can't call idr_remove() from within a idr_for_each() callback,
> > but you can call xa_erase() from an xa_for_each() loop, so switch the
> > entire personality_idr from the IDR to the XArray. This manifests as a
> > use-after-free as idr_for_each() attempts to walk the rest of the node
> > after removing the last entry from it.
>
> yangerkun, can you test it and similarly take care of buffer idr?
FWIW, I did a fairly naive conversion of the personalities IDR, because
efficiency really isn't the most important (you don't have a lot of
personalities, generally). the buffer_idr seems like it might see a
lot more action than the personalities, so you might want to consider
something like:
+++ b/fs/io_uring.c
@@ -8543,7 +8543,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true, NULL, NULL);
xa_for_each(&ctx->personalities, index, creds)
- io_unregister_personality(ctx, index);
+ put_cred(creds);
+ xa_destroy(&ctx->personalities);
mutex_unlock(&ctx->uring_lock);
io_kill_timeouts(ctx, NULL, NULL);
to be more efficient.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-08 14:22 ` Pavel Begunkov
2021-03-08 16:16 ` Matthew Wilcox
@ 2021-03-09 11:23 ` yangerkun
2021-03-13 8:02 ` yangerkun
1 sibling, 1 reply; 12+ messages in thread
From: yangerkun @ 2021-03-09 11:23 UTC (permalink / raw)
To: Pavel Begunkov, Jens Axboe, io-uring
Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang
在 2021/3/8 22:22, Pavel Begunkov 写道:
> On 08/03/2021 14:16, Pavel Begunkov wrote:
>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>
>> You can't call idr_remove() from within a idr_for_each() callback,
>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>> entire personality_idr from the IDR to the XArray. This manifests as a
>> use-after-free as idr_for_each() attempts to walk the rest of the node
>> after removing the last entry from it.
>
> yangerkun, can you test it and similarly take care of buffer idr?
Will try it latter :)
>
>
>>
>> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
>> Cc: [email protected] # 5.6+
>> Reported-by: yangerkun <[email protected]>
>> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
>> [Pavel: rebased (creds load was moved into io_init_req())]
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>> fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
>> 1 file changed, 24 insertions(+), 23 deletions(-)
>>
>> p.s. passes liburing tests well
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 5ef9f836cccc..5505e19f1391 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>>
>> struct idr io_buffer_idr;
>>
>> - struct idr personality_idr;
>> + struct xarray personalities;
>> + u32 pers_next;
>>
>> struct {
>> unsigned cached_cq_tail;
>> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>> init_completion(&ctx->ref_comp);
>> init_completion(&ctx->sq_thread_comp);
>> idr_init(&ctx->io_buffer_idr);
>> - idr_init(&ctx->personality_idr);
>> + xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>> mutex_init(&ctx->uring_lock);
>> init_waitqueue_head(&ctx->wait);
>> spin_lock_init(&ctx->completion_lock);
>> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>> req->work.list.next = NULL;
>> personality = READ_ONCE(sqe->personality);
>> if (personality) {
>> - req->work.creds = idr_find(&ctx->personality_idr, personality);
>> + req->work.creds = xa_load(&ctx->personalities, personality);
>> if (!req->work.creds)
>> return -EINVAL;
>> get_cred(req->work.creds);
>> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
>> mutex_unlock(&ctx->uring_lock);
>> io_eventfd_unregister(ctx);
>> io_destroy_buffers(ctx);
>> - idr_destroy(&ctx->personality_idr);
>>
>> #if defined(CONFIG_UNIX)
>> if (ctx->ring_sock) {
>> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>> {
>> const struct cred *creds;
>>
>> - creds = idr_remove(&ctx->personality_idr, id);
>> + creds = xa_erase(&ctx->personalities, id);
>> if (creds) {
>> put_cred(creds);
>> return 0;
>> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
>> return -EINVAL;
>> }
>>
>> -static int io_remove_personalities(int id, void *p, void *data)
>> -{
>> - struct io_ring_ctx *ctx = data;
>> -
>> - io_unregister_personality(ctx, id);
>> - return 0;
>> -}
>> -
>> static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
>> {
>> struct callback_head *work, *next;
>> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct work_struct *work)
>>
>> static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
>> {
>> + unsigned long index;
>> + struct creds *creds;
>> +
>> mutex_lock(&ctx->uring_lock);
>> percpu_ref_kill(&ctx->refs);
>> /* if force is set, the ring is going away. always drop after that */
>> ctx->cq_overflow_flushed = 1;
>> if (ctx->rings)
>> __io_cqring_overflow_flush(ctx, true, NULL, NULL);
>> - idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
>> + xa_for_each(&ctx->personalities, index, creds)
>> + io_unregister_personality(ctx, index);
>> mutex_unlock(&ctx->uring_lock);
>>
>> io_kill_timeouts(ctx, NULL, NULL);
>> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
>> }
>>
>> #ifdef CONFIG_PROC_FS
>> -static int io_uring_show_cred(int id, void *p, void *data)
>> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
>> + const struct cred *cred)
>> {
>> - const struct cred *cred = p;
>> - struct seq_file *m = data;
>> struct user_namespace *uns = seq_user_ns(m);
>> struct group_info *gi;
>> kernel_cap_t cap;
>> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
>> seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
>> (unsigned int) buf->len);
>> }
>> - if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
>> + if (has_lock && !xa_empty(&ctx->personalities)) {
>> + unsigned long index;
>> + const struct cred *cred;
>> +
>> seq_printf(m, "Personalities:\n");
>> - idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
>> + xa_for_each(&ctx->personalities, index, cred)
>> + io_uring_show_cred(m, index, cred);
>> }
>> seq_printf(m, "PollList:\n");
>> spin_lock_irq(&ctx->completion_lock);
>> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
>> static int io_register_personality(struct io_ring_ctx *ctx)
>> {
>> const struct cred *creds;
>> + u32 id;
>> int ret;
>>
>> creds = get_current_cred();
>>
>> - ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
>> - USHRT_MAX, GFP_KERNEL);
>> - if (ret < 0)
>> - put_cred(creds);
>> + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
>> + XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
>> + if (!ret)
>> + return id;
>> + put_cred(creds);
>> return ret;
>> }
>>
>>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
2021-03-08 14:22 ` Pavel Begunkov
@ 2021-03-09 20:53 ` Jens Axboe
1 sibling, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-03-09 20:53 UTC (permalink / raw)
To: Pavel Begunkov, io-uring; +Cc: Matthew Wilcox, yangerkun, Stefan Metzmacher
On 3/8/21 7:16 AM, Pavel Begunkov wrote:
> From: "Matthew Wilcox (Oracle)" <[email protected]>
>
> You can't call idr_remove() from within a idr_for_each() callback,
> but you can call xa_erase() from an xa_for_each() loop, so switch the
> entire personality_idr from the IDR to the XArray. This manifests as a
> use-after-free as idr_for_each() attempts to walk the rest of the node
> after removing the last entry from it.
I applied this one yesterday, just forgot to reply here. I agree with
Matthew's optimization, though I suspect we'll have a few creds at
most and it won't make much of a difference in real life. Buffers would
likely be a lot more plentiful, so it's worth keeping in mind for
that one.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-09 11:23 ` yangerkun
@ 2021-03-13 8:02 ` yangerkun
2021-03-13 15:34 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: yangerkun @ 2021-03-13 8:02 UTC (permalink / raw)
To: Pavel Begunkov, Jens Axboe, io-uring
Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang
在 2021/3/9 19:23, yangerkun 写道:
>
>
> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>
>>> You can't call idr_remove() from within a idr_for_each() callback,
>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>> entire personality_idr from the IDR to the XArray. This manifests as a
>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>> after removing the last entry from it.
>>
>> yangerkun, can you test it and similarly take care of buffer idr?
>
> Will try it latter :)
Sorry for the latter reply. The patch pass the testcase.
Besides, should we apply this patch first to deal with the same UAF for
io_buffer_idr before convert to XArray?
https://lore.kernel.org/io-uring/[email protected]/T/#u
>
>>
>>
>>>
>>> Fixes: 071698e13ac6 ("io_uring: allow registering credentials")
>>> Cc: [email protected] # 5.6+
>>> Reported-by: yangerkun <[email protected]>
>>> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
>>> [Pavel: rebased (creds load was moved into io_init_req())]
>>> Signed-off-by: Pavel Begunkov <[email protected]>
>>> ---
>>> fs/io_uring.c | 47 ++++++++++++++++++++++++-----------------------
>>> 1 file changed, 24 insertions(+), 23 deletions(-)
>>>
>>> p.s. passes liburing tests well
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 5ef9f836cccc..5505e19f1391 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -407,7 +407,8 @@ struct io_ring_ctx {
>>> struct idr io_buffer_idr;
>>> - struct idr personality_idr;
>>> + struct xarray personalities;
>>> + u32 pers_next;
>>> struct {
>>> unsigned cached_cq_tail;
>>> @@ -1138,7 +1139,7 @@ static struct io_ring_ctx
>>> *io_ring_ctx_alloc(struct io_uring_params *p)
>>> init_completion(&ctx->ref_comp);
>>> init_completion(&ctx->sq_thread_comp);
>>> idr_init(&ctx->io_buffer_idr);
>>> - idr_init(&ctx->personality_idr);
>>> + xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
>>> mutex_init(&ctx->uring_lock);
>>> init_waitqueue_head(&ctx->wait);
>>> spin_lock_init(&ctx->completion_lock);
>>> @@ -6338,7 +6339,7 @@ static int io_init_req(struct io_ring_ctx *ctx,
>>> struct io_kiocb *req,
>>> req->work.list.next = NULL;
>>> personality = READ_ONCE(sqe->personality);
>>> if (personality) {
>>> - req->work.creds = idr_find(&ctx->personality_idr, personality);
>>> + req->work.creds = xa_load(&ctx->personalities, personality);
>>> if (!req->work.creds)
>>> return -EINVAL;
>>> get_cred(req->work.creds);
>>> @@ -8359,7 +8360,6 @@ static void io_ring_ctx_free(struct io_ring_ctx
>>> *ctx)
>>> mutex_unlock(&ctx->uring_lock);
>>> io_eventfd_unregister(ctx);
>>> io_destroy_buffers(ctx);
>>> - idr_destroy(&ctx->personality_idr);
>>> #if defined(CONFIG_UNIX)
>>> if (ctx->ring_sock) {
>>> @@ -8424,7 +8424,7 @@ static int io_unregister_personality(struct
>>> io_ring_ctx *ctx, unsigned id)
>>> {
>>> const struct cred *creds;
>>> - creds = idr_remove(&ctx->personality_idr, id);
>>> + creds = xa_erase(&ctx->personalities, id);
>>> if (creds) {
>>> put_cred(creds);
>>> return 0;
>>> @@ -8433,14 +8433,6 @@ static int io_unregister_personality(struct
>>> io_ring_ctx *ctx, unsigned id)
>>> return -EINVAL;
>>> }
>>> -static int io_remove_personalities(int id, void *p, void *data)
>>> -{
>>> - struct io_ring_ctx *ctx = data;
>>> -
>>> - io_unregister_personality(ctx, id);
>>> - return 0;
>>> -}
>>> -
>>> static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
>>> {
>>> struct callback_head *work, *next;
>>> @@ -8530,13 +8522,17 @@ static void io_ring_exit_work(struct
>>> work_struct *work)
>>> static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
>>> {
>>> + unsigned long index;
>>> + struct creds *creds;
>>> +
>>> mutex_lock(&ctx->uring_lock);
>>> percpu_ref_kill(&ctx->refs);
>>> /* if force is set, the ring is going away. always drop after
>>> that */
>>> ctx->cq_overflow_flushed = 1;
>>> if (ctx->rings)
>>> __io_cqring_overflow_flush(ctx, true, NULL, NULL);
>>> - idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
>>> + xa_for_each(&ctx->personalities, index, creds)
>>> + io_unregister_personality(ctx, index);
>>> mutex_unlock(&ctx->uring_lock);
>>> io_kill_timeouts(ctx, NULL, NULL);
>>> @@ -9166,10 +9162,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int,
>>> fd, u32, to_submit,
>>> }
>>> #ifdef CONFIG_PROC_FS
>>> -static int io_uring_show_cred(int id, void *p, void *data)
>>> +static int io_uring_show_cred(struct seq_file *m, unsigned int id,
>>> + const struct cred *cred)
>>> {
>>> - const struct cred *cred = p;
>>> - struct seq_file *m = data;
>>> struct user_namespace *uns = seq_user_ns(m);
>>> struct group_info *gi;
>>> kernel_cap_t cap;
>>> @@ -9237,9 +9232,13 @@ static void __io_uring_show_fdinfo(struct
>>> io_ring_ctx *ctx, struct seq_file *m)
>>> seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
>>> (unsigned int) buf->len);
>>> }
>>> - if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
>>> + if (has_lock && !xa_empty(&ctx->personalities)) {
>>> + unsigned long index;
>>> + const struct cred *cred;
>>> +
>>> seq_printf(m, "Personalities:\n");
>>> - idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
>>> + xa_for_each(&ctx->personalities, index, cred)
>>> + io_uring_show_cred(m, index, cred);
>>> }
>>> seq_printf(m, "PollList:\n");
>>> spin_lock_irq(&ctx->completion_lock);
>>> @@ -9568,14 +9567,16 @@ static int io_probe(struct io_ring_ctx *ctx,
>>> void __user *arg, unsigned nr_args)
>>> static int io_register_personality(struct io_ring_ctx *ctx)
>>> {
>>> const struct cred *creds;
>>> + u32 id;
>>> int ret;
>>> creds = get_current_cred();
>>> - ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
>>> - USHRT_MAX, GFP_KERNEL);
>>> - if (ret < 0)
>>> - put_cred(creds);
>>> + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
>>> + XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
>>> + if (!ret)
>>> + return id;
>>> + put_cred(creds);
>>> return ret;
>>> }
>>>
>>
> .
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 8:02 ` yangerkun
@ 2021-03-13 15:34 ` Jens Axboe
2021-03-13 19:01 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 15:34 UTC (permalink / raw)
To: yangerkun, Pavel Begunkov, io-uring
Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang
On 3/13/21 1:02 AM, yangerkun wrote:
>
>
> 在 2021/3/9 19:23, yangerkun 写道:
>>
>>
>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>
>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>> entire personality_idr from the IDR to the XArray. This manifests as a
>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>> after removing the last entry from it.
>>>
>>> yangerkun, can you test it and similarly take care of buffer idr?
>>
>> Will try it latter :)
>
> Sorry for the latter reply. The patch pass the testcase.
>
> Besides, should we apply this patch first to deal with the same UAF for
> io_buffer_idr before convert to XArray?
>
> https://lore.kernel.org/io-uring/[email protected]/T/#u
Agree, and then defer an xarray conversion to 5.13. I'll take a look at
your patch and get it applied.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 15:34 ` Jens Axboe
@ 2021-03-13 19:01 ` Jens Axboe
2021-03-13 19:30 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 19:01 UTC (permalink / raw)
To: yangerkun, Pavel Begunkov, io-uring
Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang
On 3/13/21 8:34 AM, Jens Axboe wrote:
> On 3/13/21 1:02 AM, yangerkun wrote:
>>
>>
>> 在 2021/3/9 19:23, yangerkun 写道:
>>>
>>>
>>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>>
>>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>>> entire personality_idr from the IDR to the XArray. This manifests as a
>>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>>> after removing the last entry from it.
>>>>
>>>> yangerkun, can you test it and similarly take care of buffer idr?
>>>
>>> Will try it latter :)
>>
>> Sorry for the latter reply. The patch pass the testcase.
>>
>> Besides, should we apply this patch first to deal with the same UAF for
>> io_buffer_idr before convert to XArray?
>>
>> https://lore.kernel.org/io-uring/[email protected]/T/#u
>
> Agree, and then defer an xarray conversion to 5.13. I'll take a look at
> your patch and get it applied.
That one is very broken, it both fails removal cases and it's got leak
issues too.
I'm going to take a look at just doing xarray instead.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 19:01 ` Jens Axboe
@ 2021-03-13 19:30 ` Jens Axboe
2021-03-13 19:54 ` Matthew Wilcox
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 19:30 UTC (permalink / raw)
To: yangerkun, Pavel Begunkov, io-uring
Cc: Matthew Wilcox, Stefan Metzmacher, yi.zhang
On 3/13/21 12:01 PM, Jens Axboe wrote:
> On 3/13/21 8:34 AM, Jens Axboe wrote:
>> On 3/13/21 1:02 AM, yangerkun wrote:
>>>
>>>
>>> 在 2021/3/9 19:23, yangerkun 写道:
>>>>
>>>>
>>>> 在 2021/3/8 22:22, Pavel Begunkov 写道:
>>>>> On 08/03/2021 14:16, Pavel Begunkov wrote:
>>>>>> From: "Matthew Wilcox (Oracle)" <[email protected]>
>>>>>>
>>>>>> You can't call idr_remove() from within a idr_for_each() callback,
>>>>>> but you can call xa_erase() from an xa_for_each() loop, so switch the
>>>>>> entire personality_idr from the IDR to the XArray. This manifests as a
>>>>>> use-after-free as idr_for_each() attempts to walk the rest of the node
>>>>>> after removing the last entry from it.
>>>>>
>>>>> yangerkun, can you test it and similarly take care of buffer idr?
>>>>
>>>> Will try it latter :)
>>>
>>> Sorry for the latter reply. The patch pass the testcase.
>>>
>>> Besides, should we apply this patch first to deal with the same UAF for
>>> io_buffer_idr before convert to XArray?
>>>
>>> https://lore.kernel.org/io-uring/[email protected]/T/#u
>>
>> Agree, and then defer an xarray conversion to 5.13. I'll take a look at
>> your patch and get it applied.
>
> That one is very broken, it both fails removal cases and it's got leak
> issues too.
>
> I'm going to take a look at just doing xarray instead.
Something like the below - tested as working for me, and has no leaks.
From: Jens Axboe <[email protected]>
Subject: [PATCH] io_uring: convert io_buffer_idr to XArray
Like we did for the personality idr, convert the IO buffer idr to use
XArray. This avoids a use-after-free on removal of entries, since idr
doesn't like doing so from inside an iterator.
Fixes: 5a2e745d4d43 ("io_uring: buffer registration infrastructure")
Cc: [email protected]
Reported-by: Hulk Robot <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 49 ++++++++++++++++++++++---------------------------
1 file changed, 22 insertions(+), 27 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05adc4887ef3..3d259a120f0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -402,7 +402,8 @@ struct io_ring_ctx {
struct socket *ring_sock;
#endif
- struct idr io_buffer_idr;
+ struct xarray io_buffer;
+ u32 io_buffer_next;
struct xarray personalities;
u32 pers_next;
@@ -1135,7 +1136,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ref_comp);
- idr_init(&ctx->io_buffer_idr);
+ xa_init_flags(&ctx->io_buffer, XA_FLAGS_ALLOC1);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
@@ -2843,7 +2844,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
lockdep_assert_held(&req->ctx->uring_lock);
- head = idr_find(&req->ctx->io_buffer_idr, bgid);
+ head = xa_load(&req->ctx->io_buffer, bgid);
if (head) {
if (!list_empty(&head->list)) {
kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
list_del(&kbuf->list);
} else {
kbuf = head;
- idr_remove(&req->ctx->io_buffer_idr, bgid);
+ __xa_erase(&req->ctx->io_buffer, bgid);
}
if (*len > kbuf->len)
*len = kbuf->len;
@@ -3892,7 +3893,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
}
i++;
kfree(buf);
- idr_remove(&ctx->io_buffer_idr, bgid);
+ __xa_erase(&ctx->io_buffer, bgid);
return i;
}
@@ -3910,7 +3911,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
ret = -ENOENT;
- head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ head = xa_load(&ctx->io_buffer, p->bgid);
if (head)
ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
if (ret < 0)
@@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
- list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ list = head = xa_load(&ctx->io_buffer, p->bgid);
ret = io_add_buffers(p, &head);
- if (ret < 0)
- goto out;
+ if (ret >= 0 && !list) {
+ u32 id = -1U;
- if (!list) {
- ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
- GFP_KERNEL);
- if (ret < 0) {
+ ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
+ XA_LIMIT(0, USHRT_MAX),
+ &ctx->io_buffer_next, GFP_KERNEL);
+ if (ret < 0)
__io_remove_buffers(ctx, head, p->bgid, -1U);
- goto out;
- }
+ else
+ ret = id;
}
-out:
if (ret < 0)
req_set_fail_links(req);
@@ -8333,19 +8333,14 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
}
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
- struct io_ring_ctx *ctx = data;
- struct io_buffer *buf = p;
-
- __io_remove_buffers(ctx, buf, id, -1U);
- return 0;
-}
-
static void io_destroy_buffers(struct io_ring_ctx *ctx)
{
- idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
- idr_destroy(&ctx->io_buffer_idr);
+ struct io_buffer *buf;
+ unsigned long index;
+
+ xa_for_each(&ctx->io_buffer, index, buf)
+ __io_remove_buffers(ctx, buf, index, -1U);
+ xa_destroy(&ctx->io_buffer);
}
static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
--
2.30.2
--
Jens Axboe
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 19:30 ` Jens Axboe
@ 2021-03-13 19:54 ` Matthew Wilcox
2021-03-13 20:13 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2021-03-13 19:54 UTC (permalink / raw)
To: Jens Axboe
Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang
On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
> list_del(&kbuf->list);
> } else {
> kbuf = head;
> - idr_remove(&req->ctx->io_buffer_idr, bgid);
> + __xa_erase(&req->ctx->io_buffer, bgid);
Umm ... __xa_erase()? Did you enable all the lockdep infrastructure?
This should have tripped some of the debugging code because I don't think
you're holding the xa_lock.
> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>
> lockdep_assert_held(&ctx->uring_lock);
>
> - list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
> + list = head = xa_load(&ctx->io_buffer, p->bgid);
>
> ret = io_add_buffers(p, &head);
> - if (ret < 0)
> - goto out;
> + if (ret >= 0 && !list) {
> + u32 id = -1U;
>
> - if (!list) {
> - ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
> - GFP_KERNEL);
> - if (ret < 0) {
> + ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
> + XA_LIMIT(0, USHRT_MAX),
> + &ctx->io_buffer_next, GFP_KERNEL);
I don't understand why this works. The equivalent transformation here
would have been:
ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
with various options to handle it differently.
> static void io_destroy_buffers(struct io_ring_ctx *ctx)
> {
> - idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
> - idr_destroy(&ctx->io_buffer_idr);
> + struct io_buffer *buf;
> + unsigned long index;
> +
> + xa_for_each(&ctx->io_buffer, index, buf)
> + __io_remove_buffers(ctx, buf, index, -1U);
> + xa_destroy(&ctx->io_buffer);
Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything. If that
loop didn't empty the array, something is terribly wrong and we should
know about it somehow instead of making the memory leak harder to find.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 19:54 ` Matthew Wilcox
@ 2021-03-13 20:13 ` Jens Axboe
2021-03-13 20:22 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 20:13 UTC (permalink / raw)
To: Matthew Wilcox
Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang
On 3/13/21 12:54 PM, Matthew Wilcox wrote:
> On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
>> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
>> list_del(&kbuf->list);
>> } else {
>> kbuf = head;
>> - idr_remove(&req->ctx->io_buffer_idr, bgid);
>> + __xa_erase(&req->ctx->io_buffer, bgid);
>
> Umm ... __xa_erase()? Did you enable all the lockdep infrastructure?
> This should have tripped some of the debugging code because I don't think
> you're holding the xa_lock.
Not run with lockdep - and probably my misunderstanding, do we need xa_lock()
if we provide our own locking?
>> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>>
>> lockdep_assert_held(&ctx->uring_lock);
>>
>> - list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
>> + list = head = xa_load(&ctx->io_buffer, p->bgid);
>>
>> ret = io_add_buffers(p, &head);
>> - if (ret < 0)
>> - goto out;
>> + if (ret >= 0 && !list) {
>> + u32 id = -1U;
>>
>> - if (!list) {
>> - ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
>> - GFP_KERNEL);
>> - if (ret < 0) {
>> + ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
>> + XA_LIMIT(0, USHRT_MAX),
>> + &ctx->io_buffer_next, GFP_KERNEL);
>
> I don't understand why this works. The equivalent transformation here
> would have been:
>
> ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
>
> with various options to handle it differently.
True, that does look kinda weird (and wrong). I'll fix that up.
>> static void io_destroy_buffers(struct io_ring_ctx *ctx)
>> {
>> - idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
>> - idr_destroy(&ctx->io_buffer_idr);
>> + struct io_buffer *buf;
>> + unsigned long index;
>> +
>> + xa_for_each(&ctx->io_buffer, index, buf)
>> + __io_remove_buffers(ctx, buf, index, -1U);
>> + xa_destroy(&ctx->io_buffer);
>
> Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything. If that
> loop didn't empty the array, something is terribly wrong and we should
> know about it somehow instead of making the memory leak harder to find.
Probably also my misunderstanding - do I not need to call xa_destroy()
if I prune all the members? Assumed we needed it to free some internal
state, but maybe that's not the case?
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5.12] io_uring: Convert personality_idr to XArray
2021-03-13 20:13 ` Jens Axboe
@ 2021-03-13 20:22 ` Jens Axboe
0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-03-13 20:22 UTC (permalink / raw)
To: Matthew Wilcox
Cc: yangerkun, Pavel Begunkov, io-uring, Stefan Metzmacher, yi.zhang
On 3/13/21 1:13 PM, Jens Axboe wrote:
> On 3/13/21 12:54 PM, Matthew Wilcox wrote:
>> On Sat, Mar 13, 2021 at 12:30:14PM -0700, Jens Axboe wrote:
>>> @@ -2851,7 +2852,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
>>> list_del(&kbuf->list);
>>> } else {
>>> kbuf = head;
>>> - idr_remove(&req->ctx->io_buffer_idr, bgid);
>>> + __xa_erase(&req->ctx->io_buffer, bgid);
>>
>> Umm ... __xa_erase()? Did you enable all the lockdep infrastructure?
>> This should have tripped some of the debugging code because I don't think
>> you're holding the xa_lock.
>
> Not run with lockdep - and probably my misunderstanding, do we need xa_lock()
> if we provide our own locking?
>
>>> @@ -3993,21 +3994,20 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
>>>
>>> lockdep_assert_held(&ctx->uring_lock);
>>>
>>> - list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
>>> + list = head = xa_load(&ctx->io_buffer, p->bgid);
>>>
>>> ret = io_add_buffers(p, &head);
>>> - if (ret < 0)
>>> - goto out;
>>> + if (ret >= 0 && !list) {
>>> + u32 id = -1U;
>>>
>>> - if (!list) {
>>> - ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
>>> - GFP_KERNEL);
>>> - if (ret < 0) {
>>> + ret = __xa_alloc_cyclic(&ctx->io_buffer, &id, head,
>>> + XA_LIMIT(0, USHRT_MAX),
>>> + &ctx->io_buffer_next, GFP_KERNEL);
>>
>> I don't understand why this works. The equivalent transformation here
>> would have been:
>>
>> ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
>>
>> with various options to handle it differently.
>
> True, that does look kinda weird (and wrong). I'll fix that up.
>
>>> static void io_destroy_buffers(struct io_ring_ctx *ctx)
>>> {
>>> - idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
>>> - idr_destroy(&ctx->io_buffer_idr);
>>> + struct io_buffer *buf;
>>> + unsigned long index;
>>> +
>>> + xa_for_each(&ctx->io_buffer, index, buf)
>>> + __io_remove_buffers(ctx, buf, index, -1U);
>>> + xa_destroy(&ctx->io_buffer);
>>
>> Honestly, I'd do BUG_ON(!xa_empty(&ctx->io_buffers)) if anything. If that
>> loop didn't empty the array, something is terribly wrong and we should
>> know about it somehow instead of making the memory leak harder to find.
>
> Probably also my misunderstanding - do I not need to call xa_destroy()
> if I prune all the members? Assumed we needed it to free some internal
> state, but maybe that's not the case?
Here's a v2. Verified no leaks with the killed xa_destroy(), and that
lockdep is happy. BTW, much better API, which is evident from the fact
that a conversion like this ends up with the below diffstat:
io_uring.c | 43 +++++++++++++++----------------------------
1 file changed, 15 insertions(+), 28 deletions(-)
commit 51c681e3487d091b447175088bcf546f5ce1bf35
Author: Jens Axboe <[email protected]>
Date: Sat Mar 13 12:29:43 2021 -0700
io_uring: convert io_buffer_idr to XArray
Like we did for the personality idr, convert the IO buffer idr to use
XArray. This avoids a use-after-free on removal of entries, since idr
doesn't like doing so from inside an iterator.
Fixes: 5a2e745d4d43 ("io_uring: buffer registration infrastructure")
Cc: [email protected]
Reported-by: Hulk Robot <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05adc4887ef3..642ad08d8964 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -402,7 +402,7 @@ struct io_ring_ctx {
struct socket *ring_sock;
#endif
- struct idr io_buffer_idr;
+ struct xarray io_buffer;
struct xarray personalities;
u32 pers_next;
@@ -1135,7 +1135,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ref_comp);
- idr_init(&ctx->io_buffer_idr);
+ xa_init_flags(&ctx->io_buffer, XA_FLAGS_ALLOC1);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
@@ -2843,7 +2843,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
lockdep_assert_held(&req->ctx->uring_lock);
- head = idr_find(&req->ctx->io_buffer_idr, bgid);
+ head = xa_load(&req->ctx->io_buffer, bgid);
if (head) {
if (!list_empty(&head->list)) {
kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -2851,7 +2851,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
list_del(&kbuf->list);
} else {
kbuf = head;
- idr_remove(&req->ctx->io_buffer_idr, bgid);
+ xa_erase(&req->ctx->io_buffer, bgid);
}
if (*len > kbuf->len)
*len = kbuf->len;
@@ -3892,7 +3892,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
}
i++;
kfree(buf);
- idr_remove(&ctx->io_buffer_idr, bgid);
+ xa_erase(&ctx->io_buffer, bgid);
return i;
}
@@ -3910,7 +3910,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
ret = -ENOENT;
- head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ head = xa_load(&ctx->io_buffer, p->bgid);
if (head)
ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
if (ret < 0)
@@ -3993,21 +3993,14 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
- list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ list = head = xa_load(&ctx->io_buffer, p->bgid);
ret = io_add_buffers(p, &head);
- if (ret < 0)
- goto out;
-
- if (!list) {
- ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
- GFP_KERNEL);
- if (ret < 0) {
+ if (ret >= 0 && !list) {
+ ret = xa_insert(&ctx->io_buffer, p->bgid, head, GFP_KERNEL);
+ if (ret < 0)
__io_remove_buffers(ctx, head, p->bgid, -1U);
- goto out;
- }
}
-out:
if (ret < 0)
req_set_fail_links(req);
@@ -8333,19 +8326,13 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
}
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
- struct io_ring_ctx *ctx = data;
- struct io_buffer *buf = p;
-
- __io_remove_buffers(ctx, buf, id, -1U);
- return 0;
-}
-
static void io_destroy_buffers(struct io_ring_ctx *ctx)
{
- idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
- idr_destroy(&ctx->io_buffer_idr);
+ struct io_buffer *buf;
+ unsigned long index;
+
+ xa_for_each(&ctx->io_buffer, index, buf)
+ __io_remove_buffers(ctx, buf, index, -1U);
}
static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
--
Jens Axboe
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-03-13 20:23 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-03-08 14:16 [PATCH 5.12] io_uring: Convert personality_idr to XArray Pavel Begunkov
2021-03-08 14:22 ` Pavel Begunkov
2021-03-08 16:16 ` Matthew Wilcox
2021-03-09 11:23 ` yangerkun
2021-03-13 8:02 ` yangerkun
2021-03-13 15:34 ` Jens Axboe
2021-03-13 19:01 ` Jens Axboe
2021-03-13 19:30 ` Jens Axboe
2021-03-13 19:54 ` Matthew Wilcox
2021-03-13 20:13 ` Jens Axboe
2021-03-13 20:22 ` Jens Axboe
2021-03-09 20:53 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox