* [PATCHSET 0/4] Allow allocated direct descriptors
@ 2022-05-08 23:49 Jens Axboe
2022-05-08 23:49 ` [PATCH 1/4] io_uring: track fixed files with a bitmap Jens Axboe
` (4 more replies)
0 siblings, 5 replies; 12+ messages in thread
From: Jens Axboe @ 2022-05-08 23:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux
Hi,
Currently using direct descriptors with open or accept requires the
application to manage the descriptor space, picking which slot to use
for any given file. However, there are cases where it's useful to just
get a direct descriptor and not care about which value it is, instead
just return it like a normal open or accept would.
This will also be useful for multishot accept support, where allocated
direct descriptors are a requirement to make that feature work with
these kinds of files.
This adds support for allocating a new fixed descriptor. This is chosen
by passing in UINT_MAX as the fixed slot, which otherwise has a limit
of INT_MAX like any file descriptor does.
fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 94 insertions(+), 6 deletions(-)
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/4] io_uring: track fixed files with a bitmap
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
@ 2022-05-08 23:49 ` Jens Axboe
2022-05-09 13:26 ` Hao Xu
2022-05-08 23:49 ` [PATCH 2/4] io_uring: add basic fixed file allocator Jens Axboe
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2022-05-08 23:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
In preparation for adding a basic allocator for direct descriptors,
add helpers that set/clear whether a file slot is used.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b6d491c9a25f..6eac6629e7d4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -257,6 +257,7 @@ struct io_rsrc_put {
struct io_file_table {
struct io_fixed_file *files;
+ unsigned long *bitmap;
};
struct io_rsrc_node {
@@ -7573,6 +7574,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
io_req_set_rsrc_node(req, ctx, 0);
+ WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
out:
io_ring_submit_unlock(ctx, issue_flags);
return file;
@@ -8639,13 +8641,35 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{
table->files = kvcalloc(nr_files, sizeof(table->files[0]),
GFP_KERNEL_ACCOUNT);
- return !!table->files;
+ if (unlikely(!table->files))
+ return false;
+
+ table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
+ if (unlikely(!table->bitmap)) {
+ kvfree(table->files);
+ return false;
+ }
+
+ return true;
}
static void io_free_file_tables(struct io_file_table *table)
{
kvfree(table->files);
+ bitmap_free(table->bitmap);
table->files = NULL;
+ table->bitmap = NULL;
+}
+
+static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
+{
+ WARN_ON_ONCE(test_bit(bit, table->bitmap));
+ __set_bit(bit, table->bitmap);
+}
+
+static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
+{
+ __clear_bit(bit, table->bitmap);
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -8660,6 +8684,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
continue;
if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
continue;
+ io_file_bitmap_clear(&ctx->file_table, i);
fput(file);
}
#endif
@@ -9063,6 +9088,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
}
file_slot = io_fixed_file_slot(&ctx->file_table, i);
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
io_rsrc_node_switch(ctx, NULL);
@@ -9123,6 +9149,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
if (ret)
goto err;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, slot_index);
needs_switch = true;
}
@@ -9130,13 +9157,16 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
if (!ret) {
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, slot_index);
}
err:
if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data);
io_ring_submit_unlock(ctx, issue_flags);
- if (ret)
+ if (ret) {
+ io_file_bitmap_clear(&ctx->file_table, slot_index);
fput(file);
+ }
return ret;
}
@@ -9171,6 +9201,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
goto out;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, offset);
io_rsrc_node_switch(ctx, ctx->file_data);
ret = 0;
out:
@@ -9220,6 +9251,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (err)
break;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, i);
needs_switch = true;
}
if (fd != -1) {
@@ -9248,6 +9280,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
*io_get_tag_slot(data, i) = tag;
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/4] io_uring: add basic fixed file allocator
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
2022-05-08 23:49 ` [PATCH 1/4] io_uring: track fixed files with a bitmap Jens Axboe
@ 2022-05-08 23:49 ` Jens Axboe
2022-05-08 23:49 ` [PATCH 3/4] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
` (2 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2022-05-08 23:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
Applications currently always pick where they want fixed files to go.
In preparation for allowing these types of commands with multishot
support, add a basic allocator in the fixed file table.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6eac6629e7d4..6148bd562add 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -258,6 +258,7 @@ struct io_rsrc_put {
struct io_file_table {
struct io_fixed_file *files;
unsigned long *bitmap;
+ unsigned int alloc_hint;
};
struct io_rsrc_node {
@@ -4696,6 +4697,31 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_openat_prep(req, sqe);
}
+static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
+{
+ struct io_file_table *table = &ctx->file_table;
+ unsigned long nr = ctx->nr_user_files;
+ int ret;
+
+ if (table->alloc_hint >= nr)
+ table->alloc_hint = 0;
+
+ do {
+ ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
+ if (ret != nr) {
+ table->alloc_hint = ret + 1;
+ return ret;
+ }
+ if (!table->alloc_hint)
+ break;
+
+ nr = table->alloc_hint;
+ table->alloc_hint = 0;
+ } while (1);
+
+ return -ENFILE;
+}
+
static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
@@ -8665,11 +8691,14 @@ static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
{
WARN_ON_ONCE(test_bit(bit, table->bitmap));
__set_bit(bit, table->bitmap);
+ if (bit == table->alloc_hint)
+ table->alloc_hint++;
}
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
{
__clear_bit(bit, table->bitmap);
+ table->alloc_hint = bit;
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
--
2.35.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/4] io_uring: allow allocated fixed files for openat/openat2
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
2022-05-08 23:49 ` [PATCH 1/4] io_uring: track fixed files with a bitmap Jens Axboe
2022-05-08 23:49 ` [PATCH 2/4] io_uring: add basic fixed file allocator Jens Axboe
@ 2022-05-08 23:49 ` Jens Axboe
2022-05-08 23:49 ` [PATCH 4/4] io_uring: allow allocated fixed files for accept Jens Axboe
2022-05-09 13:20 ` [PATCHSET 0/4] Allow allocated direct descriptors Hao Xu
4 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2022-05-08 23:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
If the applications passes in UINT_MAX as the file_slot, then that's a
hint to allocate a fixed file descriptor rather than have one be passed
in directly.
This can be useful for having io_uring manage the direct descriptor space.
Normal open direct requests will complete with 0 for success, and < 0
in case of error. If io_uring is asked to allocated the direct descriptor,
then the direct descriptor is returned in case of success.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6148bd562add..986a6e82bc09 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_openat_prep(req, sqe);
}
-static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
+static int io_file_bitmap_get(struct io_ring_ctx *ctx)
{
struct io_file_table *table = &ctx->file_table;
unsigned long nr = ctx->nr_user_files;
@@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
return -ENFILE;
}
+static int io_fixed_file_install(struct io_kiocb *req, unsigned int issue_flags,
+ struct file *file, unsigned int file_slot)
+{
+ int alloc_slot = file_slot == UINT_MAX;
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret;
+
+ if (alloc_slot) {
+ io_ring_submit_lock(ctx, issue_flags);
+ file_slot = io_file_bitmap_get(ctx);
+ if (unlikely(file_slot < 0)) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ return file_slot;
+ }
+ }
+
+ ret = io_install_fixed_file(req, file, issue_flags, file_slot);
+ if (alloc_slot) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ if (!ret)
+ return file_slot;
+ }
+
+ return ret;
+}
+
static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
@@ -4777,8 +4803,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
if (!fixed)
fd_install(ret, file);
else
- ret = io_install_fixed_file(req, file, issue_flags,
- req->open.file_slot - 1);
+ ret = io_fixed_file_install(req, issue_flags, file,
+ req->open.file_slot);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
--
2.35.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/4] io_uring: allow allocated fixed files for accept
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
` (2 preceding siblings ...)
2022-05-08 23:49 ` [PATCH 3/4] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
@ 2022-05-08 23:49 ` Jens Axboe
2022-05-09 13:20 ` [PATCHSET 0/4] Allow allocated direct descriptors Hao Xu
4 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2022-05-08 23:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
If the applications passes in UINT_MAX as the file_slot, then that's a
hint to allocate a fixed file descriptor rather than have one be passed
in directly.
This can be useful for having io_uring manage the direct descriptor space,
and also allows multi-shot support to work with fixed files.
Normal accept direct requests will complete with 0 for success, and < 0
in case of error. If io_uring is asked to allocated the direct descriptor,
then the direct descriptor is returned in case of success.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 986a6e82bc09..9fc38c749492 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5802,8 +5802,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
fd_install(fd, file);
ret = fd;
} else {
- ret = io_install_fixed_file(req, file, issue_flags,
- accept->file_slot - 1);
+ ret = io_fixed_file_install(req, issue_flags, file,
+ accept->file_slot);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
--
2.35.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/4] Allow allocated direct descriptors
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
` (3 preceding siblings ...)
2022-05-08 23:49 ` [PATCH 4/4] io_uring: allow allocated fixed files for accept Jens Axboe
@ 2022-05-09 13:20 ` Hao Xu
2022-05-09 14:49 ` Jens Axboe
4 siblings, 1 reply; 12+ messages in thread
From: Hao Xu @ 2022-05-09 13:20 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 上午7:49, Jens Axboe 写道:
> Hi,
>
> Currently using direct descriptors with open or accept requires the
> application to manage the descriptor space, picking which slot to use
> for any given file. However, there are cases where it's useful to just
> get a direct descriptor and not care about which value it is, instead
> just return it like a normal open or accept would.
>
> This will also be useful for multishot accept support, where allocated
> direct descriptors are a requirement to make that feature work with
> these kinds of files.
>
> This adds support for allocating a new fixed descriptor. This is chosen
> by passing in UINT_MAX as the fixed slot, which otherwise has a limit
> of INT_MAX like any file descriptor does.
>
> fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 94 insertions(+), 6 deletions(-)
>
Hi Jens,
I've read this idea of leveraging bitmap, it looks great. a small flaw
of it is that when the file_table is very long, the bitmap searching
seems to be O({length of table}/BITS_PER_LONG), to make the time
complexity stable, I did a linked list version, could you have a look
when you're avalible. totally untested, just to show my idea. Basically
I use a list to link all the free slots, when we need a slot, just get
the head of it.
https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v5
(borrowed some commit message from your patches)
Thanks,
Hao
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/4] io_uring: track fixed files with a bitmap
2022-05-08 23:49 ` [PATCH 1/4] io_uring: track fixed files with a bitmap Jens Axboe
@ 2022-05-09 13:26 ` Hao Xu
2022-05-09 14:55 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Hao Xu @ 2022-05-09 13:26 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 上午7:49, Jens Axboe 写道:
> In preparation for adding a basic allocator for direct descriptors,
> add helpers that set/clear whether a file slot is used.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 37 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index b6d491c9a25f..6eac6629e7d4 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -257,6 +257,7 @@ struct io_rsrc_put {
>
> struct io_file_table {
> struct io_fixed_file *files;
> + unsigned long *bitmap;
> };
>
> struct io_rsrc_node {
> @@ -7573,6 +7574,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
> /* mask in overlapping REQ_F and FFS bits */
> req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
> io_req_set_rsrc_node(req, ctx, 0);
> + WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
> out:
> io_ring_submit_unlock(ctx, issue_flags);
> return file;
> @@ -8639,13 +8641,35 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
> {
> table->files = kvcalloc(nr_files, sizeof(table->files[0]),
> GFP_KERNEL_ACCOUNT);
> - return !!table->files;
> + if (unlikely(!table->files))
> + return false;
> +
> + table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
> + if (unlikely(!table->bitmap)) {
> + kvfree(table->files);
> + return false;
> + }
> +
> + return true;
> }
>
> static void io_free_file_tables(struct io_file_table *table)
> {
> kvfree(table->files);
> + bitmap_free(table->bitmap);
> table->files = NULL;
> + table->bitmap = NULL;
> +}
> +
> +static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
> +{
> + WARN_ON_ONCE(test_bit(bit, table->bitmap));
> + __set_bit(bit, table->bitmap);
> +}
> +
> +static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
> +{
> + __clear_bit(bit, table->bitmap);
> }
>
> static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
> @@ -8660,6 +8684,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
> continue;
> if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
> continue;
> + io_file_bitmap_clear(&ctx->file_table, i);
> fput(file);
> }
> #endif
> @@ -9063,6 +9088,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
> }
> file_slot = io_fixed_file_slot(&ctx->file_table, i);
> io_fixed_file_set(file_slot, file);
> + io_file_bitmap_set(&ctx->file_table, i);
> }
>
> io_rsrc_node_switch(ctx, NULL);
> @@ -9123,6 +9149,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
> if (ret)
> goto err;
> file_slot->file_ptr = 0;
> + io_file_bitmap_clear(&ctx->file_table, slot_index);
> needs_switch = true;
> }
>
> @@ -9130,13 +9157,16 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
> if (!ret) {
> *io_get_tag_slot(ctx->file_data, slot_index) = 0;
> io_fixed_file_set(file_slot, file);
> + io_file_bitmap_set(&ctx->file_table, slot_index);
[1]
> }
> err:
> if (needs_switch)
> io_rsrc_node_switch(ctx, ctx->file_data);
> io_ring_submit_unlock(ctx, issue_flags);
> - if (ret)
> + if (ret) {
> + io_file_bitmap_clear(&ctx->file_table, slot_index);
We don't need to clean it, since we get here only when we didn't
run [1], which means we haven't set it.
> fput(file);
> + }
> return ret;
> }
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/4] Allow allocated direct descriptors
2022-05-09 13:20 ` [PATCHSET 0/4] Allow allocated direct descriptors Hao Xu
@ 2022-05-09 14:49 ` Jens Axboe
2022-05-10 5:35 ` Hao Xu
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2022-05-09 14:49 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/9/22 7:20 AM, Hao Xu wrote:
> ? 2022/5/9 ??7:49, Jens Axboe ??:
>> Hi,
>>
>> Currently using direct descriptors with open or accept requires the
>> application to manage the descriptor space, picking which slot to use
>> for any given file. However, there are cases where it's useful to just
>> get a direct descriptor and not care about which value it is, instead
>> just return it like a normal open or accept would.
>>
>> This will also be useful for multishot accept support, where allocated
>> direct descriptors are a requirement to make that feature work with
>> these kinds of files.
>>
>> This adds support for allocating a new fixed descriptor. This is chosen
>> by passing in UINT_MAX as the fixed slot, which otherwise has a limit
>> of INT_MAX like any file descriptor does.
>>
>> fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 94 insertions(+), 6 deletions(-)
>>
> Hi Jens,
> I've read this idea of leveraging bitmap, it looks great. a small flaw
> of it is that when the file_table is very long, the bitmap searching
> seems to be O({length of table}/BITS_PER_LONG), to make the time
> complexity stable, I did a linked list version, could you have a look
> when you're avalible. totally untested, just to show my idea. Basically
> I use a list to link all the free slots, when we need a slot, just get
> the head of it.
> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v5
>
> (borrowed some commit message from your patches)
While that's certainly true, I'm skeptical that the list management will
be faster for most cases. It's worth nothing that the regular file
allocator is very much the same thing. A full scan is unlikely unless
you already got -ENFILE. Any clear in between will reset the hint and
it'll be O(1) again. So yes, the pathological case of having no
descriptors left and repeatedly trying to get one isn't optimal, but no
application should be running in that mode.
The downside is also that now each fixed file will take up 4 times as
much space (8 bytes -> 32 bytes), and that's a resource we'll
potentially have a lot of.
If the case of finding a new descriptor is slow for a mostly full space,
in the past I've done something like axmap [1] in fio, where you each
64-bit entry is representing by a single bit a layer up. That still has
very good space utilization and good cache layout, which the list very
much does not. But given the above, I don't think we need to worry about
that really.
As a side note, I do think we need to just bump the size of the max
direct descriptors we can have. With the file table potentially being
vmalloc backed, there's no reason to limit it to the current 32K.
[1] https://git.kernel.dk/cgit/fio/tree/lib/axmap.c
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/4] io_uring: track fixed files with a bitmap
2022-05-09 13:26 ` Hao Xu
@ 2022-05-09 14:55 ` Jens Axboe
0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2022-05-09 14:55 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/9/22 7:26 AM, Hao Xu wrote:
> ? 2022/5/9 ??7:49, Jens Axboe ??:
>> In preparation for adding a basic allocator for direct descriptors,
>> add helpers that set/clear whether a file slot is used.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> fs/io_uring.c | 37 +++++++++++++++++++++++++++++++++++--
>> 1 file changed, 35 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index b6d491c9a25f..6eac6629e7d4 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -257,6 +257,7 @@ struct io_rsrc_put {
>> struct io_file_table {
>> struct io_fixed_file *files;
>> + unsigned long *bitmap;
>> };
>> struct io_rsrc_node {
>> @@ -7573,6 +7574,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
>> /* mask in overlapping REQ_F and FFS bits */
>> req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
>> io_req_set_rsrc_node(req, ctx, 0);
>> + WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
>> out:
>> io_ring_submit_unlock(ctx, issue_flags);
>> return file;
>> @@ -8639,13 +8641,35 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
>> {
>> table->files = kvcalloc(nr_files, sizeof(table->files[0]),
>> GFP_KERNEL_ACCOUNT);
>> - return !!table->files;
>> + if (unlikely(!table->files))
>> + return false;
>> +
>> + table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
>> + if (unlikely(!table->bitmap)) {
>> + kvfree(table->files);
>> + return false;
>> + }
>> +
>> + return true;
>> }
>> static void io_free_file_tables(struct io_file_table *table)
>> {
>> kvfree(table->files);
>> + bitmap_free(table->bitmap);
>> table->files = NULL;
>> + table->bitmap = NULL;
>> +}
>> +
>> +static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
>> +{
>> + WARN_ON_ONCE(test_bit(bit, table->bitmap));
>> + __set_bit(bit, table->bitmap);
>> +}
>> +
>> +static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
>> +{
>> + __clear_bit(bit, table->bitmap);
>> }
>> static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
>> @@ -8660,6 +8684,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
>> continue;
>> if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
>> continue;
>> + io_file_bitmap_clear(&ctx->file_table, i);
>> fput(file);
>> }
>> #endif
>> @@ -9063,6 +9088,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
>> }
>> file_slot = io_fixed_file_slot(&ctx->file_table, i);
>> io_fixed_file_set(file_slot, file);
>> + io_file_bitmap_set(&ctx->file_table, i);
>> }
>> io_rsrc_node_switch(ctx, NULL);
>> @@ -9123,6 +9149,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
>> if (ret)
>> goto err;
>> file_slot->file_ptr = 0;
>> + io_file_bitmap_clear(&ctx->file_table, slot_index);
>> needs_switch = true;
>> }
>> @@ -9130,13 +9157,16 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
>> if (!ret) {
>> *io_get_tag_slot(ctx->file_data, slot_index) = 0;
>> io_fixed_file_set(file_slot, file);
>> + io_file_bitmap_set(&ctx->file_table, slot_index);
> [1]
>> }
>> err:
>> if (needs_switch)
>> io_rsrc_node_switch(ctx, ctx->file_data);
>> io_ring_submit_unlock(ctx, issue_flags);
>> - if (ret)
>> + if (ret) {
>> + io_file_bitmap_clear(&ctx->file_table, slot_index);
> We don't need to clean it, since we get here only when we didn't
> run [1], which means we haven't set it.
Good point, agree. I'll drop that one.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/4] Allow allocated direct descriptors
2022-05-09 14:49 ` Jens Axboe
@ 2022-05-10 5:35 ` Hao Xu
2022-05-10 12:26 ` Jens Axboe
0 siblings, 1 reply; 12+ messages in thread
From: Hao Xu @ 2022-05-10 5:35 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 下午10:49, Jens Axboe 写道:
> On 5/9/22 7:20 AM, Hao Xu wrote:
>> ? 2022/5/9 ??7:49, Jens Axboe ??:
>>> Hi,
>>>
>>> Currently using direct descriptors with open or accept requires the
>>> application to manage the descriptor space, picking which slot to use
>>> for any given file. However, there are cases where it's useful to just
>>> get a direct descriptor and not care about which value it is, instead
>>> just return it like a normal open or accept would.
>>>
>>> This will also be useful for multishot accept support, where allocated
>>> direct descriptors are a requirement to make that feature work with
>>> these kinds of files.
>>>
>>> This adds support for allocating a new fixed descriptor. This is chosen
>>> by passing in UINT_MAX as the fixed slot, which otherwise has a limit
>>> of INT_MAX like any file descriptor does.
>>>
>>> fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
>>> 1 file changed, 94 insertions(+), 6 deletions(-)
>>>
>> Hi Jens,
>> I've read this idea of leveraging bitmap, it looks great. a small flaw
>> of it is that when the file_table is very long, the bitmap searching
>> seems to be O({length of table}/BITS_PER_LONG), to make the time
>> complexity stable, I did a linked list version, could you have a look
>> when you're avalible. totally untested, just to show my idea. Basically
>> I use a list to link all the free slots, when we need a slot, just get
>> the head of it.
>> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v5
>>
>> (borrowed some commit message from your patches)
>
> While that's certainly true, I'm skeptical that the list management will
> be faster for most cases. It's worth nothing that the regular file
> allocator is very much the same thing. A full scan is unlikely unless
> you already got -ENFILE. Any clear in between will reset the hint and
> it'll be O(1) again. So yes, the pathological case of having no
it's not O(1) actually, and a full bitmap is not the only worst case.
For instance, the bitmap is like:
hint
|
1111111111111111111111111110000
then a bit is cleared and hint is updated:
hint
|
1110111111111111111111111110000
then next time the complexity is high
So in this kind of scenario(first allocate many in order, then clear
low bit and allocation goes on in turn), it would be slow. And I think
these cases are not rare since people usually allocate many fds then
free the early used fds from time to time.
I haven't look through the regular file allocator carefully, I'll look
into it later if I got some time.
> descriptors left and repeatedly trying to get one isn't optimal, but no
> application should be running in that mode.
>
> The downside is also that now each fixed file will take up 4 times as
> much space (8 bytes -> 32 bytes), and that's a resource we'll
> potentially have a lot of.
Agree, This is definitely a disadvantage of it which I should consider
when coding it..
>
> If the case of finding a new descriptor is slow for a mostly full space,
> in the past I've done something like axmap [1] in fio, where you each
> 64-bit entry is representing by a single bit a layer up. That still has
> very good space utilization and good cache layout, which the list very
> much does not. But given the above, I don't think we need to worry about
> that really.
>
> As a side note, I do think we need to just bump the size of the max
> direct descriptors we can have. With the file table potentially being
> vmalloc backed, there's no reason to limit it to the current 32K.
Agree.
>
> [1] https://git.kernel.dk/cgit/fio/tree/lib/axmap.c
>
Cool, I'll have a look.
Regards,
Hao
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/4] Allow allocated direct descriptors
2022-05-10 5:35 ` Hao Xu
@ 2022-05-10 12:26 ` Jens Axboe
2022-06-10 13:41 ` Victor Stewart
0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2022-05-10 12:26 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/9/22 11:35 PM, Hao Xu wrote:
> ? 2022/5/9 ??10:49, Jens Axboe ??:
>> On 5/9/22 7:20 AM, Hao Xu wrote:
>>> ? 2022/5/9 ??7:49, Jens Axboe ??:
>>>> Hi,
>>>>
>>>> Currently using direct descriptors with open or accept requires the
>>>> application to manage the descriptor space, picking which slot to use
>>>> for any given file. However, there are cases where it's useful to just
>>>> get a direct descriptor and not care about which value it is, instead
>>>> just return it like a normal open or accept would.
>>>>
>>>> This will also be useful for multishot accept support, where allocated
>>>> direct descriptors are a requirement to make that feature work with
>>>> these kinds of files.
>>>>
>>>> This adds support for allocating a new fixed descriptor. This is chosen
>>>> by passing in UINT_MAX as the fixed slot, which otherwise has a limit
>>>> of INT_MAX like any file descriptor does.
>>>>
>>>> fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
>>>> 1 file changed, 94 insertions(+), 6 deletions(-)
>>>>
>>> Hi Jens,
>>> I've read this idea of leveraging bitmap, it looks great. a small flaw
>>> of it is that when the file_table is very long, the bitmap searching
>>> seems to be O({length of table}/BITS_PER_LONG), to make the time
>>> complexity stable, I did a linked list version, could you have a look
>>> when you're avalible. totally untested, just to show my idea. Basically
>>> I use a list to link all the free slots, when we need a slot, just get
>>> the head of it.
>>> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v5
>>>
>>> (borrowed some commit message from your patches)
>>
>> While that's certainly true, I'm skeptical that the list management will
>> be faster for most cases. It's worth nothing that the regular file
>> allocator is very much the same thing. A full scan is unlikely unless
>> you already got -ENFILE. Any clear in between will reset the hint and
>> it'll be O(1) again. So yes, the pathological case of having no
>
> it's not O(1) actually, and a full bitmap is not the only worst case.
> For instance, the bitmap is like:
> hint
> |
> 1111111111111111111111111110000
>
> then a bit is cleared and hint is updated:
> hint
> |
> 1110111111111111111111111110000
>
> then next time the complexity is high
Next time it's fine, since the hint is that bit. If you do do, then yes
the second would be a slower.
> So in this kind of scenario(first allocate many in order, then clear
> low bit and allocation goes on in turn), it would be slow. And I think
> these cases are not rare since people usually allocate many fds then
> free the early used fds from time to time.
It's by no means perfect, but if it's good enough for the normal file
allocator, then I don't think it'd be wise to over-engineer this one
until there's a proven need to do so.
The single list items tracking free items is most certainly a LOT slower
for the common cases, so I don't think that's a good approach at all.
My suggestion would be to stick with the proposed approach until there's
evidence that the allocator needs improving. I did write a benchmark
that uses a 500K map and does opens and closes, and I don't see anything
to worry about in terms of overhead. The bitmap handling doesn't even
really register, dwarfed by the rest of the open path.
>> If the case of finding a new descriptor is slow for a mostly full space,
>> in the past I've done something like axmap [1] in fio, where you each
>> 64-bit entry is representing by a single bit a layer up. That still has
>> very good space utilization and good cache layout, which the list very
>> much does not. But given the above, I don't think we need to worry about
>> that really.
>>
>> As a side note, I do think we need to just bump the size of the max
>> direct descriptors we can have. With the file table potentially being
>> vmalloc backed, there's no reason to limit it to the current 32K.
>
> Agree.
>
>>
>> [1] https://git.kernel.dk/cgit/fio/tree/lib/axmap.c
>>
> Cool, I'll have a look.
It can get boiled down to something a bit simpler as the fio
implementation supports a variety of different use cases. For example, I
think it should be implemented as a single indexed array that holds all
the levels, rather than separate is it's done there. In short, it just
condenses everything down to one qword eventually, and finding a free
bit is always O(log64(N)).
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/4] Allow allocated direct descriptors
2022-05-10 12:26 ` Jens Axboe
@ 2022-06-10 13:41 ` Victor Stewart
0 siblings, 0 replies; 12+ messages in thread
From: Victor Stewart @ 2022-06-10 13:41 UTC (permalink / raw)
To: Jens Axboe; +Cc: Hao Xu, io-uring, Pavel Begunkov
On Tue, May 10, 2022 at 1:28 PM Jens Axboe <[email protected]> wrote:
>
> On 5/9/22 11:35 PM, Hao Xu wrote:
> > ? 2022/5/9 ??10:49, Jens Axboe ??:
> >> On 5/9/22 7:20 AM, Hao Xu wrote:
> >>> ? 2022/5/9 ??7:49, Jens Axboe ??:
> >>>> Hi,
> >>>>
> >>>> Currently using direct descriptors with open or accept requires the
> >>>> application to manage the descriptor space, picking which slot to use
> >>>> for any given file. However, there are cases where it's useful to just
> >>>> get a direct descriptor and not care about which value it is, instead
> >>>> just return it like a normal open or accept would.
> >>>>
> >>>> This will also be useful for multishot accept support, where allocated
> >>>> direct descriptors are a requirement to make that feature work with
> >>>> these kinds of files.
> >>>>
> >>>> This adds support for allocating a new fixed descriptor. This is chosen
> >>>> by passing in UINT_MAX as the fixed slot, which otherwise has a limit
> >>>> of INT_MAX like any file descriptor does.
> >>>>
> >>>> fs/io_uring.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++---
> >>>> 1 file changed, 94 insertions(+), 6 deletions(-)
> >>>>
> >>> Hi Jens,
> >>> I've read this idea of leveraging bitmap, it looks great. a small flaw
> >>> of it is that when the file_table is very long, the bitmap searching
> >>> seems to be O({length of table}/BITS_PER_LONG), to make the time
> >>> complexity stable, I did a linked list version, could you have a look
> >>> when you're avalible. totally untested, just to show my idea. Basically
> >>> I use a list to link all the free slots, when we need a slot, just get
> >>> the head of it.
> >>> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v5
> >>>
> >>> (borrowed some commit message from your patches)
> >>
> >> While that's certainly true, I'm skeptical that the list management will
> >> be faster for most cases. It's worth nothing that the regular file
> >> allocator is very much the same thing. A full scan is unlikely unless
> >> you already got -ENFILE. Any clear in between will reset the hint and
> >> it'll be O(1) again. So yes, the pathological case of having no
> >
> > it's not O(1) actually, and a full bitmap is not the only worst case.
> > For instance, the bitmap is like:
> > hint
> > |
> > 1111111111111111111111111110000
> >
> > then a bit is cleared and hint is updated:
> > hint
> > |
> > 1110111111111111111111111110000
> >
> > then next time the complexity is high
>
> Next time it's fine, since the hint is that bit. If you do do, then yes
> the second would be a slower.
>
> > So in this kind of scenario(first allocate many in order, then clear
> > low bit and allocation goes on in turn), it would be slow. And I think
> > these cases are not rare since people usually allocate many fds then
> > free the early used fds from time to time.
>
> It's by no means perfect, but if it's good enough for the normal file
> allocator, then I don't think it'd be wise to over-engineer this one
> until there's a proven need to do so.
>
> The single list items tracking free items is most certainly a LOT slower
> for the common cases, so I don't think that's a good approach at all.
>
> My suggestion would be to stick with the proposed approach until there's
> evidence that the allocator needs improving. I did write a benchmark
> that uses a 500K map and does opens and closes, and I don't see anything
> to worry about in terms of overhead. The bitmap handling doesn't even
> really register, dwarfed by the rest of the open path.
another angle of perspective on this is that we're sacrificing performance to
accommodate ambiguity on behalf of the application.
so another option is to make the bookkeeping an array with index == fd and
require the application to specify a static amount of open files it
supports. that
way exclusion is de facto provided by the choice of fd and we aren't duplicating
it.
this is the pattern i use in my application.
>
> >> If the case of finding a new descriptor is slow for a mostly full space,
> >> in the past I've done something like axmap [1] in fio, where you each
> >> 64-bit entry is representing by a single bit a layer up. That still has
> >> very good space utilization and good cache layout, which the list very
> >> much does not. But given the above, I don't think we need to worry about
> >> that really.
> >>
> >> As a side note, I do think we need to just bump the size of the max
> >> direct descriptors we can have. With the file table potentially being
> >> vmalloc backed, there's no reason to limit it to the current 32K.
> >
> > Agree.
> >
> >>
> >> [1] https://git.kernel.dk/cgit/fio/tree/lib/axmap.c
> >>
> > Cool, I'll have a look.
>
> It can get boiled down to something a bit simpler as the fio
> implementation supports a variety of different use cases. For example, I
> think it should be implemented as a single indexed array that holds all
> the levels, rather than separate is it's done there. In short, it just
> condenses everything down to one qword eventually, and finding a free
> bit is always O(log64(N)).
>
> --
> Jens Axboe
>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2022-06-10 13:41 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-05-08 23:49 [PATCHSET 0/4] Allow allocated direct descriptors Jens Axboe
2022-05-08 23:49 ` [PATCH 1/4] io_uring: track fixed files with a bitmap Jens Axboe
2022-05-09 13:26 ` Hao Xu
2022-05-09 14:55 ` Jens Axboe
2022-05-08 23:49 ` [PATCH 2/4] io_uring: add basic fixed file allocator Jens Axboe
2022-05-08 23:49 ` [PATCH 3/4] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
2022-05-08 23:49 ` [PATCH 4/4] io_uring: allow allocated fixed files for accept Jens Axboe
2022-05-09 13:20 ` [PATCHSET 0/4] Allow allocated direct descriptors Hao Xu
2022-05-09 14:49 ` Jens Axboe
2022-05-10 5:35 ` Hao Xu
2022-05-10 12:26 ` Jens Axboe
2022-06-10 13:41 ` Victor Stewart
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox