* [PATCH 1/6] io_uring: track fixed files with a bitmap
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-09 15:50 ` [PATCH 2/6] io_uring: add basic fixed file allocator Jens Axboe
` (6 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
In preparation for adding a basic allocator for direct descriptors,
add helpers that set/clear whether a file slot is used.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 22699cb359e9..f8a685cc0363 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -257,6 +257,7 @@ struct io_rsrc_put {
struct io_file_table {
struct io_fixed_file *files;
+ unsigned long *bitmap;
};
struct io_rsrc_node {
@@ -7572,6 +7573,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
io_req_set_rsrc_node(req, ctx, 0);
+ WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
out:
io_ring_submit_unlock(ctx, issue_flags);
return file;
@@ -8638,13 +8640,35 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{
table->files = kvcalloc(nr_files, sizeof(table->files[0]),
GFP_KERNEL_ACCOUNT);
- return !!table->files;
+ if (unlikely(!table->files))
+ return false;
+
+ table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
+ if (unlikely(!table->bitmap)) {
+ kvfree(table->files);
+ return false;
+ }
+
+ return true;
}
static void io_free_file_tables(struct io_file_table *table)
{
kvfree(table->files);
+ bitmap_free(table->bitmap);
table->files = NULL;
+ table->bitmap = NULL;
+}
+
+static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
+{
+ WARN_ON_ONCE(test_bit(bit, table->bitmap));
+ __set_bit(bit, table->bitmap);
+}
+
+static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
+{
+ __clear_bit(bit, table->bitmap);
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -8659,6 +8683,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
continue;
if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
continue;
+ io_file_bitmap_clear(&ctx->file_table, i);
fput(file);
}
#endif
@@ -9062,6 +9087,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
}
file_slot = io_fixed_file_slot(&ctx->file_table, i);
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
io_rsrc_node_switch(ctx, NULL);
@@ -9122,6 +9148,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
if (ret)
goto err;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, slot_index);
needs_switch = true;
}
@@ -9129,6 +9156,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
if (!ret) {
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, slot_index);
}
err:
if (needs_switch)
@@ -9170,6 +9198,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
goto out;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, offset);
io_rsrc_node_switch(ctx, ctx->file_data);
ret = 0;
out:
@@ -9219,6 +9248,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (err)
break;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, i);
needs_switch = true;
}
if (fd != -1) {
@@ -9247,6 +9277,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
*io_get_tag_slot(data, i) = tag;
io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
}
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 2/6] io_uring: add basic fixed file allocator
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
2022-05-09 15:50 ` [PATCH 1/6] io_uring: track fixed files with a bitmap Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-09 15:50 ` [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
` (5 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
Applications currently always pick where they want fixed files to go.
In preparation for allowing these types of commands with multishot
support, add a basic allocator in the fixed file table.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f8a685cc0363..8c40411a7e78 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -258,6 +258,7 @@ struct io_rsrc_put {
struct io_file_table {
struct io_fixed_file *files;
unsigned long *bitmap;
+ unsigned int alloc_hint;
};
struct io_rsrc_node {
@@ -4696,6 +4697,31 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_openat_prep(req, sqe);
}
+static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
+{
+ struct io_file_table *table = &ctx->file_table;
+ unsigned long nr = ctx->nr_user_files;
+ int ret;
+
+ if (table->alloc_hint >= nr)
+ table->alloc_hint = 0;
+
+ do {
+ ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
+ if (ret != nr) {
+ table->alloc_hint = ret + 1;
+ return ret;
+ }
+ if (!table->alloc_hint)
+ break;
+
+ nr = table->alloc_hint;
+ table->alloc_hint = 0;
+ } while (1);
+
+ return -ENFILE;
+}
+
static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
@@ -8664,11 +8690,14 @@ static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
{
WARN_ON_ONCE(test_bit(bit, table->bitmap));
__set_bit(bit, table->bitmap);
+ if (bit == table->alloc_hint)
+ table->alloc_hint++;
}
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
{
__clear_bit(bit, table->bitmap);
+ table->alloc_hint = bit;
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
2022-05-09 15:50 ` [PATCH 1/6] io_uring: track fixed files with a bitmap Jens Axboe
2022-05-09 15:50 ` [PATCH 2/6] io_uring: add basic fixed file allocator Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-12 8:21 ` Hao Xu
2022-05-13 4:38 ` Hao Xu
2022-05-09 15:50 ` [PATCH 4/6] io_uring: allow allocated fixed files for accept Jens Axboe
` (4 subsequent siblings)
7 siblings, 2 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
then that's a hint to allocate a fixed file descriptor rather than have
one be passed in directly.
This can be useful for having io_uring manage the direct descriptor space.
Normal open direct requests will complete with 0 for success, and < 0
in case of error. If io_uring is asked to allocated the direct descriptor,
then the direct descriptor is returned in case of success.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
include/uapi/linux/io_uring.h | 9 +++++++++
2 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8c40411a7e78..ef999d0e09de 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_openat_prep(req, sqe);
}
-static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
+static int io_file_bitmap_get(struct io_ring_ctx *ctx)
{
struct io_file_table *table = &ctx->file_table;
unsigned long nr = ctx->nr_user_files;
@@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
return -ENFILE;
}
+static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+ struct file *file, unsigned int file_slot)
+{
+ int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret;
+
+ if (alloc_slot) {
+ io_ring_submit_lock(ctx, issue_flags);
+ file_slot = io_file_bitmap_get(ctx);
+ if (unlikely(file_slot < 0)) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ return file_slot;
+ }
+ }
+
+ ret = io_install_fixed_file(req, file, issue_flags, file_slot);
+ if (alloc_slot) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ if (!ret)
+ return file_slot;
+ }
+
+ return ret;
+}
+
static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
@@ -4777,8 +4803,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
if (!fixed)
fd_install(ret, file);
else
- ret = io_install_fixed_file(req, file, issue_flags,
- req->open.file_slot - 1);
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ req->open.file_slot);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 06621a278cb6..b7f02a55032a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -63,6 +63,15 @@ struct io_uring_sqe {
__u64 __pad2[2];
};
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC (~0U)
+
enum {
IOSQE_FIXED_FILE_BIT,
IOSQE_IO_DRAIN_BIT,
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-09 15:50 ` [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
@ 2022-05-12 8:21 ` Hao Xu
2022-05-12 12:23 ` Jens Axboe
2022-05-13 4:38 ` Hao Xu
1 sibling, 1 reply; 21+ messages in thread
From: Hao Xu @ 2022-05-12 8:21 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 下午11:50, Jens Axboe 写道:
> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
> then that's a hint to allocate a fixed file descriptor rather than have
> one be passed in directly.
>
> This can be useful for having io_uring manage the direct descriptor space.
>
> Normal open direct requests will complete with 0 for success, and < 0
> in case of error. If io_uring is asked to allocated the direct descriptor,
> then the direct descriptor is returned in case of success.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
> include/uapi/linux/io_uring.h | 9 +++++++++
> 2 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 8c40411a7e78..ef999d0e09de 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return __io_openat_prep(req, sqe);
> }
>
> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
> {
> struct io_file_table *table = &ctx->file_table;
> unsigned long nr = ctx->nr_user_files;
> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
> return -ENFILE;
> }
>
> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
> + struct file *file, unsigned int file_slot)
> +{
> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
> + struct io_ring_ctx *ctx = req->ctx;
> + int ret;
> +
> + if (alloc_slot) {
> + io_ring_submit_lock(ctx, issue_flags);
> + file_slot = io_file_bitmap_get(ctx);
> + if (unlikely(file_slot < 0)) {
> + io_ring_submit_unlock(ctx, issue_flags);
> + return file_slot;
> + }
> + }
if (alloc_slot) {
...
} else {
file_slot -= 1;
}
Otherwise there is off-by-one error.
Others looks good,
Reviewed-by: Hao Xu <[email protected]>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-12 8:21 ` Hao Xu
@ 2022-05-12 12:23 ` Jens Axboe
2022-05-13 5:28 ` Hao Xu
0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2022-05-12 12:23 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/12/22 2:21 AM, Hao Xu wrote:
> ? 2022/5/9 ??11:50, Jens Axboe ??:
>> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
>> then that's a hint to allocate a fixed file descriptor rather than have
>> one be passed in directly.
>>
>> This can be useful for having io_uring manage the direct descriptor space.
>>
>> Normal open direct requests will complete with 0 for success, and < 0
>> in case of error. If io_uring is asked to allocated the direct descriptor,
>> then the direct descriptor is returned in case of success.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
>> include/uapi/linux/io_uring.h | 9 +++++++++
>> 2 files changed, 38 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 8c40411a7e78..ef999d0e09de 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> return __io_openat_prep(req, sqe);
>> }
>> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
>> {
>> struct io_file_table *table = &ctx->file_table;
>> unsigned long nr = ctx->nr_user_files;
>> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>> return -ENFILE;
>> }
>> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
>> + struct file *file, unsigned int file_slot)
>> +{
>> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
>> + struct io_ring_ctx *ctx = req->ctx;
>> + int ret;
>> +
>> + if (alloc_slot) {
>> + io_ring_submit_lock(ctx, issue_flags);
>> + file_slot = io_file_bitmap_get(ctx);
>> + if (unlikely(file_slot < 0)) {
>> + io_ring_submit_unlock(ctx, issue_flags);
>> + return file_slot;
>> + }
>> + }
>
> if (alloc_slot) {
> ...
> } else {
> file_slot -= 1;
> }
>
> Otherwise there is off-by-one error.
>
> Others looks good,
>
> Reviewed-by: Hao Xu <[email protected]>
Thanks, you are correct, I've folded that in.
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-12 12:23 ` Jens Axboe
@ 2022-05-13 5:28 ` Hao Xu
2022-05-13 12:25 ` Jens Axboe
0 siblings, 1 reply; 21+ messages in thread
From: Hao Xu @ 2022-05-13 5:28 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/12 下午8:23, Jens Axboe 写道:
> On 5/12/22 2:21 AM, Hao Xu wrote:
>> ? 2022/5/9 ??11:50, Jens Axboe ??:
>>> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
>>> then that's a hint to allocate a fixed file descriptor rather than have
>>> one be passed in directly.
>>>
>>> This can be useful for having io_uring manage the direct descriptor space.
>>>
>>> Normal open direct requests will complete with 0 for success, and < 0
>>> in case of error. If io_uring is asked to allocated the direct descriptor,
>>> then the direct descriptor is returned in case of success.
>>>
>>> Signed-off-by: Jens Axboe <[email protected]>
>>> ---
>>> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
>>> include/uapi/linux/io_uring.h | 9 +++++++++
>>> 2 files changed, 38 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 8c40411a7e78..ef999d0e09de 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>> return __io_openat_prep(req, sqe);
>>> }
>>> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
>>> {
>>> struct io_file_table *table = &ctx->file_table;
>>> unsigned long nr = ctx->nr_user_files;
>>> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>> return -ENFILE;
>>> }
>>> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
>>> + struct file *file, unsigned int file_slot)
>>> +{
>>> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
>>> + struct io_ring_ctx *ctx = req->ctx;
>>> + int ret;
>>> +
>>> + if (alloc_slot) {
>>> + io_ring_submit_lock(ctx, issue_flags);
>>> + file_slot = io_file_bitmap_get(ctx);
>>> + if (unlikely(file_slot < 0)) {
>>> + io_ring_submit_unlock(ctx, issue_flags);
>>> + return file_slot;
>>> + }
>>> + }
>>
>> if (alloc_slot) {
>> ...
>> } else {
>> file_slot -= 1;
>> }
>>
>> Otherwise there is off-by-one error.
>>
>> Others looks good,
>>
>> Reviewed-by: Hao Xu <[email protected]>
>
> Thanks, you are correct, I've folded that in.
>
Hi Jens,
I've rebased multishot accept based on your fixed-alloc branch:
https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v6
Let me know when ixed-alloc is ready, then I'll do the final rebase
for multishot accept and send it to the list, including the liburing
change.
Thanks,
Hao
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-13 5:28 ` Hao Xu
@ 2022-05-13 12:25 ` Jens Axboe
2022-05-13 12:56 ` Jens Axboe
0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2022-05-13 12:25 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/12/22 11:28 PM, Hao Xu wrote:
> ? 2022/5/12 ??8:23, Jens Axboe ??:
>> On 5/12/22 2:21 AM, Hao Xu wrote:
>>> ? 2022/5/9 ??11:50, Jens Axboe ??:
>>>> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
>>>> then that's a hint to allocate a fixed file descriptor rather than have
>>>> one be passed in directly.
>>>>
>>>> This can be useful for having io_uring manage the direct descriptor space.
>>>>
>>>> Normal open direct requests will complete with 0 for success, and < 0
>>>> in case of error. If io_uring is asked to allocated the direct descriptor,
>>>> then the direct descriptor is returned in case of success.
>>>>
>>>> Signed-off-by: Jens Axboe <[email protected]>
>>>> ---
>>>> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
>>>> include/uapi/linux/io_uring.h | 9 +++++++++
>>>> 2 files changed, 38 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>>> index 8c40411a7e78..ef999d0e09de 100644
>>>> --- a/fs/io_uring.c
>>>> +++ b/fs/io_uring.c
>>>> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>>> return __io_openat_prep(req, sqe);
>>>> }
>>>> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>> {
>>>> struct io_file_table *table = &ctx->file_table;
>>>> unsigned long nr = ctx->nr_user_files;
>>>> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>> return -ENFILE;
>>>> }
>>>> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
>>>> + struct file *file, unsigned int file_slot)
>>>> +{
>>>> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
>>>> + struct io_ring_ctx *ctx = req->ctx;
>>>> + int ret;
>>>> +
>>>> + if (alloc_slot) {
>>>> + io_ring_submit_lock(ctx, issue_flags);
>>>> + file_slot = io_file_bitmap_get(ctx);
>>>> + if (unlikely(file_slot < 0)) {
>>>> + io_ring_submit_unlock(ctx, issue_flags);
>>>> + return file_slot;
>>>> + }
>>>> + }
>>>
>>> if (alloc_slot) {
>>> ...
>>> } else {
>>> file_slot -= 1;
>>> }
>>>
>>> Otherwise there is off-by-one error.
>>>
>>> Others looks good,
>>>
>>> Reviewed-by: Hao Xu <[email protected]>
>>
>> Thanks, you are correct, I've folded that in.
>>
>
> Hi Jens,
> I've rebased multishot accept based on your fixed-alloc branch:
>
> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v6
>
> Let me know when ixed-alloc is ready, then I'll do the final rebase
> for multishot accept and send it to the list, including the liburing
> change.
Just base it against for-5.19/io_uring, I'm fixing the one-off and
pushing it out.
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-13 12:25 ` Jens Axboe
@ 2022-05-13 12:56 ` Jens Axboe
0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-13 12:56 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/13/22 6:25 AM, Jens Axboe wrote:
> On 5/12/22 11:28 PM, Hao Xu wrote:
>> ? 2022/5/12 ??8:23, Jens Axboe ??:
>>> On 5/12/22 2:21 AM, Hao Xu wrote:
>>>> ? 2022/5/9 ??11:50, Jens Axboe ??:
>>>>> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
>>>>> then that's a hint to allocate a fixed file descriptor rather than have
>>>>> one be passed in directly.
>>>>>
>>>>> This can be useful for having io_uring manage the direct descriptor space.
>>>>>
>>>>> Normal open direct requests will complete with 0 for success, and < 0
>>>>> in case of error. If io_uring is asked to allocated the direct descriptor,
>>>>> then the direct descriptor is returned in case of success.
>>>>>
>>>>> Signed-off-by: Jens Axboe <[email protected]>
>>>>> ---
>>>>> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
>>>>> include/uapi/linux/io_uring.h | 9 +++++++++
>>>>> 2 files changed, 38 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>>>> index 8c40411a7e78..ef999d0e09de 100644
>>>>> --- a/fs/io_uring.c
>>>>> +++ b/fs/io_uring.c
>>>>> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>>>> return __io_openat_prep(req, sqe);
>>>>> }
>>>>> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>>> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>>> {
>>>>> struct io_file_table *table = &ctx->file_table;
>>>>> unsigned long nr = ctx->nr_user_files;
>>>>> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>>>>> return -ENFILE;
>>>>> }
>>>>> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
>>>>> + struct file *file, unsigned int file_slot)
>>>>> +{
>>>>> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
>>>>> + struct io_ring_ctx *ctx = req->ctx;
>>>>> + int ret;
>>>>> +
>>>>> + if (alloc_slot) {
>>>>> + io_ring_submit_lock(ctx, issue_flags);
>>>>> + file_slot = io_file_bitmap_get(ctx);
>>>>> + if (unlikely(file_slot < 0)) {
>>>>> + io_ring_submit_unlock(ctx, issue_flags);
>>>>> + return file_slot;
>>>>> + }
>>>>> + }
>>>>
>>>> if (alloc_slot) {
>>>> ...
>>>> } else {
>>>> file_slot -= 1;
>>>> }
>>>>
>>>> Otherwise there is off-by-one error.
>>>>
>>>> Others looks good,
>>>>
>>>> Reviewed-by: Hao Xu <[email protected]>
>>>
>>> Thanks, you are correct, I've folded that in.
>>>
>>
>> Hi Jens,
>> I've rebased multishot accept based on your fixed-alloc branch:
>>
>> https://github.com/HowHsu/linux/commits/for-5.19/io_uring_multishot_accept_v6
>>
>> Let me know when ixed-alloc is ready, then I'll do the final rebase
>> for multishot accept and send it to the list, including the liburing
>> change.
>
> Just base it against for-5.19/io_uring, I'm fixing the one-off and
> pushing it out.
You can send it against now, I don't think there's any further changes
needed.
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-09 15:50 ` [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
2022-05-12 8:21 ` Hao Xu
@ 2022-05-13 4:38 ` Hao Xu
2022-05-13 12:28 ` Jens Axboe
1 sibling, 1 reply; 21+ messages in thread
From: Hao Xu @ 2022-05-13 4:38 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 下午11:50, Jens Axboe 写道:
> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
> then that's a hint to allocate a fixed file descriptor rather than have
> one be passed in directly.
>
> This can be useful for having io_uring manage the direct descriptor space.
>
> Normal open direct requests will complete with 0 for success, and < 0
> in case of error. If io_uring is asked to allocated the direct descriptor,
> then the direct descriptor is returned in case of success.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
> include/uapi/linux/io_uring.h | 9 +++++++++
> 2 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 8c40411a7e78..ef999d0e09de 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return __io_openat_prep(req, sqe);
> }
>
> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
> {
> struct io_file_table *table = &ctx->file_table;
> unsigned long nr = ctx->nr_user_files;
> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
> return -ENFILE;
> }
>
> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
> + struct file *file, unsigned int file_slot)
> +{
> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
> + struct io_ring_ctx *ctx = req->ctx;
> + int ret;
> +
> + if (alloc_slot) {
> + io_ring_submit_lock(ctx, issue_flags);
> + file_slot = io_file_bitmap_get(ctx);
> + if (unlikely(file_slot < 0)) {
> + io_ring_submit_unlock(ctx, issue_flags);
> + return file_slot;
> + }
> + }
> +
> + ret = io_install_fixed_file(req, file, issue_flags, file_slot);
> + if (alloc_slot) {
> + io_ring_submit_unlock(ctx, issue_flags);
> + if (!ret)
> + return file_slot;
Sorry, I missed onething, looks like this should be file_slot+1, as this
is returned to the userspace. I refer to the previous open/accept direct
feature, they see the file_index from userspace as number counted from
one, so it'd better to keep it consistent.
Regards,
Hao
> + }
> +
> + return ret;
> +}
> +
> static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
> {
> struct open_flags op;
> @@ -4777,8 +4803,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
> if (!fixed)
> fd_install(ret, file);
> else
> - ret = io_install_fixed_file(req, file, issue_flags,
> - req->open.file_slot - 1);
> + ret = io_fixed_fd_install(req, issue_flags, file,
> + req->open.file_slot);
> err:
> putname(req->open.filename);
> req->flags &= ~REQ_F_NEED_CLEANUP;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 06621a278cb6..b7f02a55032a 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -63,6 +63,15 @@ struct io_uring_sqe {
> __u64 __pad2[2];
> };
>
> +/*
> + * If sqe->file_index is set to this for opcodes that instantiate a new
> + * direct descriptor (like openat/openat2/accept), then io_uring will allocate
> + * an available direct descriptor instead of having the application pass one
> + * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
> + * if the space is full.
> + */
> +#define IORING_FILE_INDEX_ALLOC (~0U)
> +
> enum {
> IOSQE_FIXED_FILE_BIT,
> IOSQE_IO_DRAIN_BIT,
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2
2022-05-13 4:38 ` Hao Xu
@ 2022-05-13 12:28 ` Jens Axboe
0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-13 12:28 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/12/22 10:38 PM, Hao Xu wrote:
> ? 2022/5/9 ??11:50, Jens Axboe ??:
>> If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
>> then that's a hint to allocate a fixed file descriptor rather than have
>> one be passed in directly.
>>
>> This can be useful for having io_uring manage the direct descriptor space.
>>
>> Normal open direct requests will complete with 0 for success, and < 0
>> in case of error. If io_uring is asked to allocated the direct descriptor,
>> then the direct descriptor is returned in case of success.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> fs/io_uring.c | 32 +++++++++++++++++++++++++++++---
>> include/uapi/linux/io_uring.h | 9 +++++++++
>> 2 files changed, 38 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 8c40411a7e78..ef999d0e09de 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -4697,7 +4697,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> return __io_openat_prep(req, sqe);
>> }
>> -static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>> +static int io_file_bitmap_get(struct io_ring_ctx *ctx)
>> {
>> struct io_file_table *table = &ctx->file_table;
>> unsigned long nr = ctx->nr_user_files;
>> @@ -4722,6 +4722,32 @@ static int __maybe_unused io_file_bitmap_get(struct io_ring_ctx *ctx)
>> return -ENFILE;
>> }
>> +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
>> + struct file *file, unsigned int file_slot)
>> +{
>> + int alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
>> + struct io_ring_ctx *ctx = req->ctx;
>> + int ret;
>> +
>> + if (alloc_slot) {
>> + io_ring_submit_lock(ctx, issue_flags);
>> + file_slot = io_file_bitmap_get(ctx);
>> + if (unlikely(file_slot < 0)) {
>> + io_ring_submit_unlock(ctx, issue_flags);
>> + return file_slot;
>> + }
>> + }
>> +
>> + ret = io_install_fixed_file(req, file, issue_flags, file_slot);
>> + if (alloc_slot) {
>> + io_ring_submit_unlock(ctx, issue_flags);
>> + if (!ret)
>> + return file_slot;
>
> Sorry, I missed onething, looks like this should be file_slot+1, as this
> is returned to the userspace. I refer to the previous open/accept direct
> feature, they see the file_index from userspace as number counted from
> one, so it'd better to keep it consistent.
We need to return the actual slot, not slot + 1. It's kind of an ugly
API, but what we had left to deal with. The actual slot number is what
most other things would fill into sqe->fd and set IOSQE_FIXED_FILE with,
not slot + 1.
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 4/6] io_uring: allow allocated fixed files for accept
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
` (2 preceding siblings ...)
2022-05-09 15:50 ` [PATCH 3/6] io_uring: allow allocated fixed files for openat/openat2 Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-09 15:50 ` [PATCH 5/6] io_uring: bump max direct descriptor count to 1M Jens Axboe
` (3 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot,
then that's a hint to allocate a fixed file descriptor rather than have
one be passed in directly.
This can be useful for having io_uring manage the direct descriptor space,
and also allows multi-shot support to work with fixed files.
Normal accept direct requests will complete with 0 for success, and < 0
in case of error. If io_uring is asked to allocated the direct descriptor,
then the direct descriptor is returned in case of success.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ef999d0e09de..7356e80ffdbb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5801,8 +5801,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
fd_install(fd, file);
ret = fd;
} else {
- ret = io_install_fixed_file(req, file, issue_flags,
- accept->file_slot - 1);
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ accept->file_slot);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 5/6] io_uring: bump max direct descriptor count to 1M
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
` (3 preceding siblings ...)
2022-05-09 15:50 ` [PATCH 4/6] io_uring: allow allocated fixed files for accept Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-09 15:50 ` [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space Jens Axboe
` (2 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
We currently limit these to 32K, but since we're now backing the table
space with vmalloc when needed, there's no reason why we can't make it
bigger. The total space is limited by RLIMIT_NOFILE as well.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7356e80ffdbb..644f57a46c5f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -94,7 +94,7 @@
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
/* only define max */
-#define IORING_MAX_FIXED_FILES (1U << 15)
+#define IORING_MAX_FIXED_FILES (1U << 20)
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
` (4 preceding siblings ...)
2022-05-09 15:50 ` [PATCH 5/6] io_uring: bump max direct descriptor count to 1M Jens Axboe
@ 2022-05-09 15:50 ` Jens Axboe
2022-05-10 4:44 ` Hao Xu
2022-05-13 10:56 ` [PATCHSET v2 0/6] Allow allocated direct descriptors Hao Xu
2022-06-09 8:57 ` Stefan Metzmacher
7 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2022-05-09 15:50 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, haoxu.linux, Jens Axboe
Currently to setup a fully sparse descriptor space upfront, the app needs
to alloate an array of the full size and memset it to -1 and then pass
that in. Make this a bit easier by allowing a flag that simply does
this internally rather than needing to copy each slot separately.
This works with IORING_REGISTER_FILES2 as the flag is set in struct
io_uring_rsrc_register, and is only allow when the type is
IORING_RSRC_FILE as this doesn't make sense for registered buffers.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 15 ++++++++++++---
include/uapi/linux/io_uring.h | 8 +++++++-
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 644f57a46c5f..fe67fe939fac 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9107,12 +9107,12 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
struct io_fixed_file *file_slot;
- if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+ if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
ret = -EFAULT;
goto fail;
}
/* allow sparse sets */
- if (fd == -1) {
+ if (!fds || fd == -1) {
ret = -EINVAL;
if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
goto fail;
@@ -11755,14 +11755,20 @@ static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
memset(&rr, 0, sizeof(rr));
if (copy_from_user(&rr, arg, size))
return -EFAULT;
- if (!rr.nr || rr.resv || rr.resv2)
+ if (!rr.nr || rr.resv2)
+ return -EINVAL;
+ if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
return -EINVAL;
switch (type) {
case IORING_RSRC_FILE:
+ if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+ break;
return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
rr.nr, u64_to_user_ptr(rr.tags));
case IORING_RSRC_BUFFER:
+ if (rr.flags & IORING_RSRC_REGISTER_SPARSE)
+ break;
return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
rr.nr, u64_to_user_ptr(rr.tags));
}
@@ -11931,6 +11937,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_sqe_buffers_unregister(ctx);
break;
case IORING_REGISTER_FILES:
+ ret = -EFAULT;
+ if (!arg)
+ break;
ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
break;
case IORING_UNREGISTER_FILES:
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b7f02a55032a..d09cf7c0d1fe 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -396,9 +396,15 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
+/*
+ * Register a fully sparse file sparse, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
+
struct io_uring_rsrc_register {
__u32 nr;
- __u32 resv;
+ __u32 flags;
__u64 resv2;
__aligned_u64 data;
__aligned_u64 tags;
--
2.35.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space
2022-05-09 15:50 ` [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space Jens Axboe
@ 2022-05-10 4:44 ` Hao Xu
2022-05-10 12:27 ` Jens Axboe
0 siblings, 1 reply; 21+ messages in thread
From: Hao Xu @ 2022-05-10 4:44 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 下午11:50, Jens Axboe 写道:
> Currently to setup a fully sparse descriptor space upfront, the app needs
> to alloate an array of the full size and memset it to -1 and then pass
> that in. Make this a bit easier by allowing a flag that simply does
> this internally rather than needing to copy each slot separately.
>
> This works with IORING_REGISTER_FILES2 as the flag is set in struct
> io_uring_rsrc_register, and is only allow when the type is
> IORING_RSRC_FILE as this doesn't make sense for registered buffers.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 15 ++++++++++++---
> include/uapi/linux/io_uring.h | 8 +++++++-
> 2 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 644f57a46c5f..fe67fe939fac 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -9107,12 +9107,12 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
> for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
> struct io_fixed_file *file_slot;
>
> - if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
> + if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
> ret = -EFAULT;
> goto fail;
> }
> /* allow sparse sets */
> - if (fd == -1) {
> + if (!fds || fd == -1) {
> ret = -EINVAL;
> if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
> goto fail;
> @@ -11755,14 +11755,20 @@ static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
> memset(&rr, 0, sizeof(rr));
> if (copy_from_user(&rr, arg, size))
> return -EFAULT;
> - if (!rr.nr || rr.resv || rr.resv2)
> + if (!rr.nr || rr.resv2)
> + return -EINVAL;
> + if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
> return -EINVAL;
>
> switch (type) {
> case IORING_RSRC_FILE:
> + if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
> + break;
> return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
> rr.nr, u64_to_user_ptr(rr.tags));
> case IORING_RSRC_BUFFER:
> + if (rr.flags & IORING_RSRC_REGISTER_SPARSE)
> + break;
> return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
> rr.nr, u64_to_user_ptr(rr.tags));
> }
> @@ -11931,6 +11937,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
> ret = io_sqe_buffers_unregister(ctx);
> break;
> case IORING_REGISTER_FILES:
> + ret = -EFAULT;
> + if (!arg)
> + break;
> ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
> break;
> case IORING_UNREGISTER_FILES:
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index b7f02a55032a..d09cf7c0d1fe 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -396,9 +396,15 @@ struct io_uring_files_update {
> __aligned_u64 /* __s32 * */ fds;
> };
>
> +/*
> + * Register a fully sparse file sparse, rather than pass in an array of all
^space
> + * -1 file descriptors.
> + */
> +#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
> +
> struct io_uring_rsrc_register {
> __u32 nr;
> - __u32 resv;
> + __u32 flags;
> __u64 resv2;
> __aligned_u64 data;
> __aligned_u64 tags;
This looks promising, we may eliminate cqes for open/accept_direct reqs.
feel free to add,
Reviewed-by: Hao Xu <[email protected]>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space
2022-05-10 4:44 ` Hao Xu
@ 2022-05-10 12:27 ` Jens Axboe
0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2022-05-10 12:27 UTC (permalink / raw)
To: Hao Xu, io-uring; +Cc: asml.silence
On 5/9/22 10:44 PM, Hao Xu wrote:
>> @@ -11931,6 +11937,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
>> ret = io_sqe_buffers_unregister(ctx);
>> break;
>> case IORING_REGISTER_FILES:
>> + ret = -EFAULT;
>> + if (!arg)
>> + break;
>> ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
>> break;
>> case IORING_UNREGISTER_FILES:
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index b7f02a55032a..d09cf7c0d1fe 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -396,9 +396,15 @@ struct io_uring_files_update {
>> __aligned_u64 /* __s32 * */ fds;
>> };
>> +/*
>> + * Register a fully sparse file sparse, rather than pass in an array of all
>
> ^space
Thanks, fixed.
>> + * -1 file descriptors.
>> + */
>> +#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
>> +
>> struct io_uring_rsrc_register {
>> __u32 nr;
>> - __u32 resv;
>> + __u32 flags;
>> __u64 resv2;
>> __aligned_u64 data;
>> __aligned_u64 tags;
>
> This looks promising, we may eliminate cqes for open/accept_direct reqs.
>
> feel free to add,
> Reviewed-by: Hao Xu <[email protected]>
Added, thanks. Did you review the other ones too?
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCHSET v2 0/6] Allow allocated direct descriptors
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
` (5 preceding siblings ...)
2022-05-09 15:50 ` [PATCH 6/6] io_uring: add flag for allocating a fully sparse direct descriptor space Jens Axboe
@ 2022-05-13 10:56 ` Hao Xu
2022-06-09 8:57 ` Stefan Metzmacher
7 siblings, 0 replies; 21+ messages in thread
From: Hao Xu @ 2022-05-13 10:56 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence
在 2022/5/9 下午11:50, Jens Axboe 写道:
> Hi,
>
> Currently using direct descriptors with open or accept requires the
> application to manage the descriptor space, picking which slot to use
> for any given file. However, there are cases where it's useful to just
> get a direct descriptor and not care about which value it is, instead
> just return it like a normal open or accept would.
>
> This will also be useful for multishot accept support, where allocated
> direct descriptors are a requirement to make that feature work with
> these kinds of files.
>
> This adds support for allocating a new fixed descriptor. This is chosen
> by passing in IORING_FILE_INDEX_ALLOC as the fixed slot, which is beyond
> any valid value for the file_index.
>
> Can also be found here:
>
> https://git.kernel.dk/cgit/linux-block/log/?h=for-5.19/io_uring-fixed-alloc
>
> fs/io_uring.c | 113 +++++++++++++++++++++++++++++++---
> include/uapi/linux/io_uring.h | 17 ++++-
> 2 files changed, 120 insertions(+), 10 deletions(-)
>
> v2: - Fix unnecessary clear (Hao)
> - Add define for allocating a descriptor rather than use
> UINT_MAX.
> - Add patch bumping max file table size to 1M (from 32K)
> - Add ability to register a sparse file table upfront rather
> then have the application pass in a big array of -1.
>
Reviewed-by: Hao Xu <[email protected]>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCHSET v2 0/6] Allow allocated direct descriptors
2022-05-09 15:50 [PATCHSET v2 0/6] Allow allocated direct descriptors Jens Axboe
` (6 preceding siblings ...)
2022-05-13 10:56 ` [PATCHSET v2 0/6] Allow allocated direct descriptors Hao Xu
@ 2022-06-09 8:57 ` Stefan Metzmacher
2022-06-10 11:06 ` Hao Xu
7 siblings, 1 reply; 21+ messages in thread
From: Stefan Metzmacher @ 2022-06-09 8:57 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: asml.silence, haoxu.linux, Ralph Böhme, vl
Hi Jens,
this looks very useful, thanks!
I have an additional feature request to make this even more useful...
IO_OP_ACCEPT allows a fixed descriptor for the listen socket
and then can generate a fixed descriptor for the accepted connection,
correct?
It would be extremely useful to also allow that pattern
for IO_OP_OPENAT[2], which currently is not able to get
a fixed descriptor for the dirfd argument (this also applies to
IO_OP_STATX, IO_OP_UNLINK and all others taking a dirfd).
Being able use such a sequence:
OPENTAT2(AT_FDCWD, "directory") => 1 (fixed)
STATX(1 (fixed))
FGETXATTR(1 (fixed)
OPENAT2(1 (fixed), "file") => 2 (fixed)
STATX(2 (fixed))
FGETXATTR(2 (fixed))
CLOSE(1 (fixed)
DUP( 2 (fixed)) => per-process fd for ("file")
I looked briefly how to implement that.
But set_nameidata() takes 'int dfd' to store the value
and it's used later somewhere deep down the stack.
And makes it too complex for me to create patches :-(
It would great if someone could have a look how to make this work.
> Currently using direct descriptors with open or accept requires the
> application to manage the descriptor space, picking which slot to use
> for any given file. However, there are cases where it's useful to just
> get a direct descriptor and not care about which value it is, instead
> just return it like a normal open or accept would.
>
> This will also be useful for multishot accept support, where allocated
> direct descriptors are a requirement to make that feature work with
> these kinds of files.
>
> This adds support for allocating a new fixed descriptor. This is chosen
> by passing in IORING_FILE_INDEX_ALLOC as the fixed slot, which is beyond
> any valid value for the file_index.
I guess that would not work for linked request, as we don't know the
actual id before submitting the requests.
Thanks!
metze
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCHSET v2 0/6] Allow allocated direct descriptors
2022-06-09 8:57 ` Stefan Metzmacher
@ 2022-06-10 11:06 ` Hao Xu
2022-06-10 11:28 ` Stefan Metzmacher
0 siblings, 1 reply; 21+ messages in thread
From: Hao Xu @ 2022-06-10 11:06 UTC (permalink / raw)
To: Stefan Metzmacher, Jens Axboe, io-uring
Cc: asml.silence, haoxu.linux, Ralph Böhme, vl
Hi Stefan,
On 6/9/22 16:57, Stefan Metzmacher wrote:
>
> Hi Jens,
>
> this looks very useful, thanks!
>
> I have an additional feature request to make this even more useful...
>
> IO_OP_ACCEPT allows a fixed descriptor for the listen socket
> and then can generate a fixed descriptor for the accepted connection,
> correct?
Yes.
>
> It would be extremely useful to also allow that pattern
> for IO_OP_OPENAT[2], which currently is not able to get
> a fixed descriptor for the dirfd argument (this also applies to
> IO_OP_STATX, IO_OP_UNLINK and all others taking a dirfd).
>
> Being able use such a sequence:
>
> OPENTAT2(AT_FDCWD, "directory") => 1 (fixed)
> STATX(1 (fixed))
> FGETXATTR(1 (fixed)
> OPENAT2(1 (fixed), "file") => 2 (fixed)
> STATX(2 (fixed))
> FGETXATTR(2 (fixed))
> CLOSE(1 (fixed)
> DUP( 2 (fixed)) => per-process fd for ("file")
>
> I looked briefly how to implement that.
> But set_nameidata() takes 'int dfd' to store the value
> and it's used later somewhere deep down the stack.
> And makes it too complex for me to create patches :-(
>
Indeed.. dirfd is used in path_init() etc. For me, no idea how to tackle
it for now.We surely can register a fixed descriptor to the process
fdtable but that is against the purpose of fixed file..
> It would great if someone could have a look how to make this work.
>
>
>> Currently using direct descriptors with open or accept requires the
>> application to manage the descriptor space, picking which slot to use
>> for any given file. However, there are cases where it's useful to just
>> get a direct descriptor and not care about which value it is, instead
>> just return it like a normal open or accept would.
>>
>> This will also be useful for multishot accept support, where allocated
>> direct descriptors are a requirement to make that feature work with
>> these kinds of files.
>>
>> This adds support for allocating a new fixed descriptor. This is chosen
>> by passing in IORING_FILE_INDEX_ALLOC as the fixed slot, which is beyond
>> any valid value for the file_index.
>
> I guess that would not work for linked request, as we don't know the
> actual id before submitting the requests.
>
> Thanks!
> metze
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCHSET v2 0/6] Allow allocated direct descriptors
2022-06-10 11:06 ` Hao Xu
@ 2022-06-10 11:28 ` Stefan Metzmacher
2022-06-10 13:04 ` Hao Xu
0 siblings, 1 reply; 21+ messages in thread
From: Stefan Metzmacher @ 2022-06-10 11:28 UTC (permalink / raw)
To: Hao Xu, Jens Axboe, io-uring
Cc: asml.silence, haoxu.linux, Ralph Böhme, vl
Am 10.06.22 um 13:06 schrieb Hao Xu:
> Hi Stefan,
> On 6/9/22 16:57, Stefan Metzmacher wrote:
>>
>> Hi Jens,
>>
>> this looks very useful, thanks!
>>
>> I have an additional feature request to make this even more useful...
>>
>> IO_OP_ACCEPT allows a fixed descriptor for the listen socket
>> and then can generate a fixed descriptor for the accepted connection,
>> correct?
>
> Yes.
>
>>
>> It would be extremely useful to also allow that pattern
>> for IO_OP_OPENAT[2], which currently is not able to get
>> a fixed descriptor for the dirfd argument (this also applies to
>> IO_OP_STATX, IO_OP_UNLINK and all others taking a dirfd).
>>
>> Being able use such a sequence:
>>
>> OPENTAT2(AT_FDCWD, "directory") => 1 (fixed)
>> STATX(1 (fixed))
>> FGETXATTR(1 (fixed)
>> OPENAT2(1 (fixed), "file") => 2 (fixed)
>> STATX(2 (fixed))
>> FGETXATTR(2 (fixed))
>> CLOSE(1 (fixed)
>> DUP( 2 (fixed)) => per-process fd for ("file")
>>
>> I looked briefly how to implement that.
>> But set_nameidata() takes 'int dfd' to store the value
>> and it's used later somewhere deep down the stack.
>> And makes it too complex for me to create patches :-(
>>
>
> Indeed.. dirfd is used in path_init() etc. For me, no idea how to tackle
> it for now.We surely can register a fixed descriptor to the process
> fdtable but that is against the purpose of fixed file..
I looked at it a bit more and the good thing is that
'struct nameidata' is private to namei.c, which simplifies
getting an overview.
path_init() is the actual only user of nd.dfd
and it's used to fill nd.path, either from get_fs_pwd()
for AT_FDCWD and f.file->f_path otherwise.
So might be able to have a function that translated
the fd to struct path early and let the callers pass 'struct path'
instead of 'int dfd'...
metze
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCHSET v2 0/6] Allow allocated direct descriptors
2022-06-10 11:28 ` Stefan Metzmacher
@ 2022-06-10 13:04 ` Hao Xu
0 siblings, 0 replies; 21+ messages in thread
From: Hao Xu @ 2022-06-10 13:04 UTC (permalink / raw)
To: Stefan Metzmacher, Jens Axboe, io-uring
Cc: asml.silence, haoxu.linux, Ralph Böhme, vl
On 6/10/22 19:28, Stefan Metzmacher wrote:
>
> Am 10.06.22 um 13:06 schrieb Hao Xu:
>> Hi Stefan,
>> On 6/9/22 16:57, Stefan Metzmacher wrote:
>>>
>>> Hi Jens,
>>>
>>> this looks very useful, thanks!
>>>
>>> I have an additional feature request to make this even more useful...
>>>
>>> IO_OP_ACCEPT allows a fixed descriptor for the listen socket
>>> and then can generate a fixed descriptor for the accepted connection,
>>> correct?
>>
>> Yes.
>>
>>>
>>> It would be extremely useful to also allow that pattern
>>> for IO_OP_OPENAT[2], which currently is not able to get
>>> a fixed descriptor for the dirfd argument (this also applies to
>>> IO_OP_STATX, IO_OP_UNLINK and all others taking a dirfd).
>>>
>>> Being able use such a sequence:
>>>
>>> OPENTAT2(AT_FDCWD, "directory") => 1 (fixed)
>>> STATX(1 (fixed))
>>> FGETXATTR(1 (fixed)
>>> OPENAT2(1 (fixed), "file") => 2 (fixed)
>>> STATX(2 (fixed))
>>> FGETXATTR(2 (fixed))
>>> CLOSE(1 (fixed)
>>> DUP( 2 (fixed)) => per-process fd for ("file")
>>>
>>> I looked briefly how to implement that.
>>> But set_nameidata() takes 'int dfd' to store the value
>>> and it's used later somewhere deep down the stack.
>>> And makes it too complex for me to create patches :-(
>>>
>>
>> Indeed.. dirfd is used in path_init() etc. For me, no idea how to tackle
>> it for now.We surely can register a fixed descriptor to the process
>> fdtable but that is against the purpose of fixed file..
>
> I looked at it a bit more and the good thing is that
> 'struct nameidata' is private to namei.c, which simplifies
> getting an overview.
>
> path_init() is the actual only user of nd.dfd
^[1]
> and it's used to fill nd.path, either from get_fs_pwd()
> for AT_FDCWD and f.file->f_path otherwise.
>
> So might be able to have a function that translated
> the fd to struct path early and let the callers pass 'struct path'
> instead of 'int dfd'...
Yea, if [1] is true. I wrote something for your reference:
(totally unpolished and untested, just to show an idea)
diff --git a/fs/namei.c b/fs/namei.c
index 1f28d3f463c3..18e11717005c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2423,21 +2423,30 @@ static const char *path_init(struct nameidata
*nd, unsigned flags)
nd->inode = nd->path.dentry->d_inode;
}
} else {
- /* Caller must check execute permissions on the starting
path component */
- struct fd f = fdget_raw(nd->dfd);
struct dentry *dentry;
- if (!f.file)
- return ERR_PTR(-EBADF);
+ if (nd->dfd != -1) {
+ /* Caller must check execute permissions on the
starting path component */
+ struct fd f = fdget_raw(nd->dfd);
- dentry = f.file->f_path.dentry;
+ if (!f.file)
+ return ERR_PTR(-EBADF);
- if (*s && unlikely(!d_can_lookup(dentry))) {
- fdput(f);
- return ERR_PTR(-ENOTDIR);
+ dentry = f.file->f_path.dentry;
+
+ if (*s && unlikely(!d_can_lookup(dentry))) {
+ fdput(f);
+ return ERR_PTR(-ENOTDIR);
+ }
+
+ nd->path = f.file->f_path;
+ } else {
+ dentry = nd->path.dentry;
+
+ if (*s && unlikely(!d_can_lookup(dentry)))
+ return ERR_PTR(-ENOTDIR);
}
- nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
nd->inode = nd->path.dentry->d_inode;
nd->seq =
read_seqcount_begin(&nd->path.dentry->d_seq);
@@ -2445,7 +2454,9 @@ static const char *path_init(struct nameidata *nd,
unsigned flags)
path_get(&nd->path);
nd->inode = nd->path.dentry->d_inode;
}
- fdput(f);
+ if (dfd != -1)
+ fdput(f);
+
}
/* For scoped-lookups we need to set the root to the dirfd as
well. */
@@ -3686,6 +3697,48 @@ struct file *do_filp_open(int dfd, struct
filename *pathname,
return filp;
}
+static void __set_nameidata2(struct nameidata *p, struct path *path,
+ struct filename *name)
+{
+ struct nameidata *old = current->nameidata;
+ p->stack = p->internal;
+ p->depth = 0;
+ p->dfd = -1;
+ p->name = name;
+ p->path = *path;
+ p->total_link_count = old ? old->total_link_count : 0;
+ p->saved = old;
+ current->nameidata = p;
+}
+
+static inline void set_nameidata2(struct nameidata *p, struct path *path,
+ struct filename *name, const struct
path *root)
+{
+ __set_nameidata2(p, path, name);
+ p->state = 0;
+ if (unlikely(root)) {
+ p->state = ND_ROOT_PRESET;
+ p->root = *root;
+ }
+}
+
+struct file *do_filp_open_path(struct *path, struct filename *pathname,
+ const struct open_flags *op)
+{
+ struct nameidata nd;
+ int flags = op->lookup_flags;
+ struct file *filp;
+
+ set_nameidata2(&nd, path, pathname, NULL);
+ filp = path_openat(&nd, op, flags | LOOKUP_RCU);
+ if (unlikely(filp == ERR_PTR(-ECHILD)))
+ filp = path_openat(&nd, op, flags);
+ if (unlikely(filp == ERR_PTR(-ESTALE)))
+ filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
+ restore_nameidata();
+ return filp;
+}
+
struct file *do_file_open_root(const struct path *root,
const char *name, const struct open_flags *op)
{
^ permalink raw reply related [flat|nested] 21+ messages in thread