* [PATCH v4 01/26] io_uring: name rsrc bits consistently
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 02/26] io_uring: simplify io_rsrc_node_ref_zero Pavel Begunkov
` (25 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Keep resource related structs' and functions' naming consistent, in
particular use "io_rsrc" prefix for everything.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 150 ++++++++++++++++++++++++--------------------------
1 file changed, 71 insertions(+), 79 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8ad6ab708083..5dfd33753471 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -219,22 +219,22 @@ struct fixed_rsrc_table {
struct file **files;
};
-struct fixed_rsrc_ref_node {
+struct io_rsrc_node {
struct percpu_ref refs;
struct list_head node;
struct list_head rsrc_list;
- struct fixed_rsrc_data *rsrc_data;
+ struct io_rsrc_data *rsrc_data;
void (*rsrc_put)(struct io_ring_ctx *ctx,
struct io_rsrc_put *prsrc);
struct llist_node llist;
bool done;
};
-struct fixed_rsrc_data {
+struct io_rsrc_data {
struct fixed_rsrc_table *table;
struct io_ring_ctx *ctx;
- struct fixed_rsrc_ref_node *node;
+ struct io_rsrc_node *node;
struct percpu_ref refs;
struct completion done;
bool quiesce;
@@ -393,7 +393,7 @@ struct io_ring_ctx {
* readers must ensure that ->refs is alive as long as the file* is
* used. Only updated through io_uring_register(2).
*/
- struct fixed_rsrc_data *file_data;
+ struct io_rsrc_data *file_data;
unsigned nr_user_files;
/* if used, fixed mapped user buffers */
@@ -447,7 +447,7 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
- struct fixed_rsrc_ref_node *rsrc_backup_node;
+ struct io_rsrc_node *rsrc_backup_node;
struct io_restriction restrictions;
@@ -1024,9 +1024,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files);
static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
-static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node);
-static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
- struct io_ring_ctx *ctx);
+static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);
+static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -1072,7 +1071,7 @@ EXPORT_SYMBOL(io_uring_get_socket);
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
-static inline void io_set_resource_node(struct io_kiocb *req)
+static inline void io_req_set_rsrc_node(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -6272,7 +6271,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
#endif
#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
-static inline struct file **io_fixed_file_slot(struct fixed_rsrc_data *file_data,
+static inline struct file **io_fixed_file_slot(struct io_rsrc_data *file_data,
unsigned i)
{
struct fixed_rsrc_table *table;
@@ -6306,7 +6305,7 @@ static struct file *io_file_get(struct io_submit_state *state,
file_ptr &= ~FFS_MASK;
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_ASYNC_READ_BIT);
- io_set_resource_node(req);
+ io_req_set_rsrc_node(req);
} else {
trace_io_uring_file_get(ctx, fd);
file = __io_file_get(state, fd);
@@ -7060,9 +7059,8 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_rsrc_data_ref_zero(struct percpu_ref *ref)
{
- struct fixed_rsrc_data *data;
+ struct io_rsrc_data *data = container_of(ref, struct io_rsrc_data, refs);
- data = container_of(ref, struct fixed_rsrc_data, refs);
complete(&data->done);
}
@@ -7076,20 +7074,20 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
spin_unlock_bh(&ctx->rsrc_ref_lock);
}
-static void io_sqe_rsrc_set_node(struct io_ring_ctx *ctx,
- struct fixed_rsrc_data *rsrc_data,
- struct fixed_rsrc_ref_node *ref_node)
+static void io_rsrc_node_set(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *rsrc_data,
+ struct io_rsrc_node *rsrc_node)
{
io_rsrc_ref_lock(ctx);
- rsrc_data->node = ref_node;
- list_add_tail(&ref_node->node, &ctx->rsrc_ref_list);
+ rsrc_data->node = rsrc_node;
+ list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
io_rsrc_ref_unlock(ctx);
percpu_ref_get(&rsrc_data->refs);
}
-static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_data *data)
+static void io_rsrc_node_kill(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
{
- struct fixed_rsrc_ref_node *ref_node = NULL;
+ struct io_rsrc_node *ref_node = NULL;
io_rsrc_ref_lock(ctx);
ref_node = data->node;
@@ -7099,21 +7097,21 @@ static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_dat
percpu_ref_kill(&ref_node->refs);
}
-static int io_rsrc_refnode_prealloc(struct io_ring_ctx *ctx)
+static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
- ctx->rsrc_backup_node = alloc_fixed_rsrc_ref_node(ctx);
+ ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
-static struct fixed_rsrc_ref_node *
-io_rsrc_refnode_get(struct io_ring_ctx *ctx,
- struct fixed_rsrc_data *rsrc_data,
- void (*rsrc_put)(struct io_ring_ctx *ctx,
- struct io_rsrc_put *prsrc))
+static struct io_rsrc_node *
+io_rsrc_node_get(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *rsrc_data,
+ void (*rsrc_put)(struct io_ring_ctx *ctx,
+ struct io_rsrc_put *prsrc))
{
- struct fixed_rsrc_ref_node *node = ctx->rsrc_backup_node;
+ struct io_rsrc_node *node = ctx->rsrc_backup_node;
WARN_ON_ONCE(!node);
@@ -7123,12 +7121,12 @@ io_rsrc_refnode_get(struct io_ring_ctx *ctx,
return node;
}
-static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
+static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
struct io_ring_ctx *ctx,
void (*rsrc_put)(struct io_ring_ctx *ctx,
struct io_rsrc_put *prsrc))
{
- struct fixed_rsrc_ref_node *node;
+ struct io_rsrc_node *node;
int ret;
if (data->quiesce)
@@ -7136,10 +7134,10 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
data->quiesce = true;
do {
- ret = io_rsrc_refnode_prealloc(ctx);
+ ret = io_rsrc_node_prealloc(ctx);
if (ret)
break;
- io_sqe_rsrc_kill_node(ctx, data);
+ io_rsrc_node_kill(ctx, data);
percpu_ref_kill(&data->refs);
flush_delayed_work(&ctx->rsrc_put_work);
@@ -7148,8 +7146,8 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
break;
percpu_ref_resurrect(&data->refs);
- node = io_rsrc_refnode_get(ctx, data, rsrc_put);
- io_sqe_rsrc_set_node(ctx, data, node);
+ node = io_rsrc_node_get(ctx, data, rsrc_put);
+ io_rsrc_node_set(ctx, data, node);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -7161,9 +7159,9 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
return ret;
}
-static struct fixed_rsrc_data *alloc_fixed_rsrc_data(struct io_ring_ctx *ctx)
+static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx)
{
- struct fixed_rsrc_data *data;
+ struct io_rsrc_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
@@ -7179,7 +7177,7 @@ static struct fixed_rsrc_data *alloc_fixed_rsrc_data(struct io_ring_ctx *ctx)
return data;
}
-static void free_fixed_rsrc_data(struct fixed_rsrc_data *data)
+static void io_rsrc_data_free(struct io_rsrc_data *data)
{
percpu_ref_exit(&data->refs);
kfree(data->table);
@@ -7188,7 +7186,7 @@ static void free_fixed_rsrc_data(struct fixed_rsrc_data *data)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
- struct fixed_rsrc_data *data = ctx->file_data;
+ struct io_rsrc_data *data = ctx->file_data;
unsigned nr_tables, i;
int ret;
@@ -7207,7 +7205,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
for (i = 0; i < nr_tables; i++)
kfree(data->table[i].files);
- free_fixed_rsrc_data(data);
+ io_rsrc_data_free(data);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
return 0;
@@ -7436,7 +7434,7 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
}
#endif
-static int io_sqe_alloc_file_tables(struct fixed_rsrc_data *file_data,
+static int io_sqe_alloc_file_tables(struct io_rsrc_data *file_data,
unsigned nr_tables, unsigned nr_files)
{
int i;
@@ -7526,9 +7524,9 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
#endif
}
-static void __io_rsrc_put_work(struct fixed_rsrc_ref_node *ref_node)
+static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
{
- struct fixed_rsrc_data *rsrc_data = ref_node->rsrc_data;
+ struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
struct io_ring_ctx *ctx = rsrc_data->ctx;
struct io_rsrc_put *prsrc, *tmp;
@@ -7552,10 +7550,10 @@ static void io_rsrc_put_work(struct work_struct *work)
node = llist_del_all(&ctx->rsrc_put_llist);
while (node) {
- struct fixed_rsrc_ref_node *ref_node;
+ struct io_rsrc_node *ref_node;
struct llist_node *next = node->next;
- ref_node = llist_entry(node, struct fixed_rsrc_ref_node, llist);
+ ref_node = llist_entry(node, struct io_rsrc_node, llist);
__io_rsrc_put_work(ref_node);
node = next;
}
@@ -7563,27 +7561,23 @@ static void io_rsrc_put_work(struct work_struct *work)
static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
{
- struct fixed_rsrc_ref_node *ref_node;
- struct fixed_rsrc_data *data;
- struct io_ring_ctx *ctx;
+ struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
+ struct io_rsrc_data *data = node->rsrc_data;
+ struct io_ring_ctx *ctx = data->ctx;
bool first_add = false;
int delay = HZ;
- ref_node = container_of(ref, struct fixed_rsrc_ref_node, refs);
- data = ref_node->rsrc_data;
- ctx = data->ctx;
-
io_rsrc_ref_lock(ctx);
- ref_node->done = true;
+ node->done = true;
while (!list_empty(&ctx->rsrc_ref_list)) {
- ref_node = list_first_entry(&ctx->rsrc_ref_list,
- struct fixed_rsrc_ref_node, node);
+ node = list_first_entry(&ctx->rsrc_ref_list,
+ struct io_rsrc_node, node);
/* recycle ref nodes in order */
- if (!ref_node->done)
+ if (!node->done)
break;
- list_del(&ref_node->node);
- first_add |= llist_add(&ref_node->llist, &ctx->rsrc_put_llist);
+ list_del(&node->node);
+ first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
}
io_rsrc_ref_unlock(ctx);
@@ -7596,10 +7590,9 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
queue_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
-static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
- struct io_ring_ctx *ctx)
+static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
{
- struct fixed_rsrc_ref_node *ref_node;
+ struct io_rsrc_node *ref_node;
ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
if (!ref_node)
@@ -7617,19 +7610,18 @@ static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
}
static void init_fixed_file_ref_node(struct io_ring_ctx *ctx,
- struct fixed_rsrc_ref_node *ref_node)
+ struct io_rsrc_node *ref_node)
{
ref_node->rsrc_data = ctx->file_data;
ref_node->rsrc_put = io_ring_file_put;
}
-static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
+static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
{
percpu_ref_exit(&ref_node->refs);
kfree(ref_node);
}
-
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
@@ -7637,8 +7629,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_tables, i;
struct file *file;
int fd, ret = -ENOMEM;
- struct fixed_rsrc_ref_node *ref_node;
- struct fixed_rsrc_data *file_data;
+ struct io_rsrc_node *ref_node;
+ struct io_rsrc_data *file_data;
if (ctx->file_data)
return -EBUSY;
@@ -7647,7 +7639,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
- file_data = alloc_fixed_rsrc_data(ctx);
+ file_data = io_rsrc_data_alloc(ctx);
if (!file_data)
return -ENOMEM;
ctx->file_data = file_data;
@@ -7704,14 +7696,14 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- ref_node = alloc_fixed_rsrc_ref_node(ctx);
+ ref_node = io_rsrc_node_alloc(ctx);
if (!ref_node) {
io_sqe_files_unregister(ctx);
return -ENOMEM;
}
init_fixed_file_ref_node(ctx, ref_node);
- io_sqe_rsrc_set_node(ctx, file_data, ref_node);
+ io_rsrc_node_set(ctx, file_data, ref_node);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7723,7 +7715,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
kfree(file_data->table[i].files);
ctx->nr_user_files = 0;
out_free:
- free_fixed_rsrc_data(ctx->file_data);
+ io_rsrc_data_free(ctx->file_data);
ctx->file_data = NULL;
return ret;
}
@@ -7771,10 +7763,10 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
#endif
}
-static int io_queue_rsrc_removal(struct fixed_rsrc_data *data, void *rsrc)
+static int io_queue_rsrc_removal(struct io_rsrc_data *data, void *rsrc)
{
struct io_rsrc_put *prsrc;
- struct fixed_rsrc_ref_node *ref_node = data->node;
+ struct io_rsrc_node *ref_node = data->node;
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
if (!prsrc)
@@ -7786,7 +7778,7 @@ static int io_queue_rsrc_removal(struct fixed_rsrc_data *data, void *rsrc)
return 0;
}
-static inline int io_queue_file_removal(struct fixed_rsrc_data *data,
+static inline int io_queue_file_removal(struct io_rsrc_data *data,
struct file *file)
{
return io_queue_rsrc_removal(data, (void *)file);
@@ -7796,8 +7788,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update *up,
unsigned nr_args)
{
- struct fixed_rsrc_data *data = ctx->file_data;
- struct fixed_rsrc_ref_node *ref_node;
+ struct io_rsrc_data *data = ctx->file_data;
+ struct io_rsrc_node *ref_node;
struct file *file, **file_slot;
__s32 __user *fds;
int fd, i, err;
@@ -7808,7 +7800,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
- err = io_rsrc_refnode_prealloc(ctx);
+ err = io_rsrc_node_prealloc(ctx);
if (err)
return err;
@@ -7864,8 +7856,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
- ref_node = io_rsrc_refnode_get(ctx, data, io_ring_file_put);
- io_sqe_rsrc_set_node(ctx, data, ref_node);
+ ref_node = io_rsrc_node_get(ctx, data, io_ring_file_put);
+ io_rsrc_node_set(ctx, data, ref_node);
}
return done ? done : err;
}
@@ -8541,7 +8533,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_destroy_buffers(ctx);
if (ctx->rsrc_backup_node)
- destroy_fixed_rsrc_ref_node(ctx->rsrc_backup_node);
+ io_rsrc_node_destroy(ctx->rsrc_backup_node);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 02/26] io_uring: simplify io_rsrc_node_ref_zero
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 01/26] io_uring: name rsrc bits consistently Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 03/26] io_uring: use rsrc prealloc infra for files reg Pavel Begunkov
` (24 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Replace queue_delayed_work() with mod_delayed_work() in
io_rsrc_node_ref_zero() as the later one can schedule a new work, and
cleanup it further for better readability.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dfd33753471..f1a96988c3f5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7565,7 +7565,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
struct io_rsrc_data *data = node->rsrc_data;
struct io_ring_ctx *ctx = data->ctx;
bool first_add = false;
- int delay = HZ;
+ int delay;
io_rsrc_ref_lock(ctx);
node->done = true;
@@ -7581,13 +7581,9 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
}
io_rsrc_ref_unlock(ctx);
- if (percpu_ref_is_dying(&data->refs))
- delay = 0;
-
- if (!delay)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, 0);
- else if (first_add)
- queue_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
+ delay = percpu_ref_is_dying(&data->refs) ? 0 : HZ;
+ if (first_add || !delay)
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}
static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 03/26] io_uring: use rsrc prealloc infra for files reg
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 01/26] io_uring: name rsrc bits consistently Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 02/26] io_uring: simplify io_rsrc_node_ref_zero Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 04/26] io_uring: encapsulate rsrc node manipulations Pavel Begunkov
` (23 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Keep it consistent with update and use io_rsrc_node_prealloc() +
io_rsrc_node_get() in io_sqe_files_register() as well, that will be used
in future patches, not as error prone and allows to deduplicate
rsrc_node init.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 21 ++++++---------------
1 file changed, 6 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1a96988c3f5..b53ccac47440 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7605,13 +7605,6 @@ static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
return ref_node;
}
-static void init_fixed_file_ref_node(struct io_ring_ctx *ctx,
- struct io_rsrc_node *ref_node)
-{
- ref_node->rsrc_data = ctx->file_data;
- ref_node->rsrc_put = io_ring_file_put;
-}
-
static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
{
percpu_ref_exit(&ref_node->refs);
@@ -7624,7 +7617,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
__s32 __user *fds = (__s32 __user *) arg;
unsigned nr_tables, i;
struct file *file;
- int fd, ret = -ENOMEM;
+ int fd, ret;
struct io_rsrc_node *ref_node;
struct io_rsrc_data *file_data;
@@ -7634,12 +7627,16 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
+ ret = io_rsrc_node_prealloc(ctx);
+ if (ret)
+ return ret;
file_data = io_rsrc_data_alloc(ctx);
if (!file_data)
return -ENOMEM;
ctx->file_data = file_data;
+ ret = -ENOMEM;
nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
GFP_KERNEL);
@@ -7692,13 +7689,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- ref_node = io_rsrc_node_alloc(ctx);
- if (!ref_node) {
- io_sqe_files_unregister(ctx);
- return -ENOMEM;
- }
- init_fixed_file_ref_node(ctx, ref_node);
-
+ ref_node = io_rsrc_node_get(ctx, ctx->file_data, io_ring_file_put);
io_rsrc_node_set(ctx, file_data, ref_node);
return ret;
out_fput:
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 04/26] io_uring: encapsulate rsrc node manipulations
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (2 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 03/26] io_uring: use rsrc prealloc infra for files reg Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 05/26] io_uring: move rsrc_put callback into io_rsrc_data Pavel Begunkov
` (22 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_rsrc_node_get() and io_rsrc_node_set() are always used together,
merge them into one so most users don't even see io_rsrc_node and don't
need to care about it.
It helped to catch io_sqe_files_register() inferring rsrc data argument
for get and set differently, not a problem but a good sign.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 39 +++++++++++++--------------------------
1 file changed, 13 insertions(+), 26 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b53ccac47440..3d9b58d8eb90 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7076,8 +7076,17 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
static void io_rsrc_node_set(struct io_ring_ctx *ctx,
struct io_rsrc_data *rsrc_data,
- struct io_rsrc_node *rsrc_node)
+ void (*rsrc_put)(struct io_ring_ctx *ctx,
+ struct io_rsrc_put *prsrc))
{
+ struct io_rsrc_node *rsrc_node = ctx->rsrc_backup_node;
+
+ WARN_ON_ONCE(!rsrc_node);
+
+ ctx->rsrc_backup_node = NULL;
+ rsrc_node->rsrc_data = rsrc_data;
+ rsrc_node->rsrc_put = rsrc_put;
+
io_rsrc_ref_lock(ctx);
rsrc_data->node = rsrc_node;
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
@@ -7105,28 +7114,11 @@ static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
-static struct io_rsrc_node *
-io_rsrc_node_get(struct io_ring_ctx *ctx,
- struct io_rsrc_data *rsrc_data,
- void (*rsrc_put)(struct io_ring_ctx *ctx,
- struct io_rsrc_put *prsrc))
-{
- struct io_rsrc_node *node = ctx->rsrc_backup_node;
-
- WARN_ON_ONCE(!node);
-
- ctx->rsrc_backup_node = NULL;
- node->rsrc_data = rsrc_data;
- node->rsrc_put = rsrc_put;
- return node;
-}
-
static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
struct io_ring_ctx *ctx,
void (*rsrc_put)(struct io_ring_ctx *ctx,
struct io_rsrc_put *prsrc))
{
- struct io_rsrc_node *node;
int ret;
if (data->quiesce)
@@ -7146,8 +7138,7 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
break;
percpu_ref_resurrect(&data->refs);
- node = io_rsrc_node_get(ctx, data, rsrc_put);
- io_rsrc_node_set(ctx, data, node);
+ io_rsrc_node_set(ctx, data, rsrc_put);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -7618,7 +7609,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_tables, i;
struct file *file;
int fd, ret;
- struct io_rsrc_node *ref_node;
struct io_rsrc_data *file_data;
if (ctx->file_data)
@@ -7689,8 +7679,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- ref_node = io_rsrc_node_get(ctx, ctx->file_data, io_ring_file_put);
- io_rsrc_node_set(ctx, file_data, ref_node);
+ io_rsrc_node_set(ctx, file_data, io_ring_file_put);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7776,7 +7765,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
unsigned nr_args)
{
struct io_rsrc_data *data = ctx->file_data;
- struct io_rsrc_node *ref_node;
struct file *file, **file_slot;
__s32 __user *fds;
int fd, i, err;
@@ -7843,8 +7831,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
- ref_node = io_rsrc_node_get(ctx, data, io_ring_file_put);
- io_rsrc_node_set(ctx, data, ref_node);
+ io_rsrc_node_set(ctx, data, io_ring_file_put);
}
return done ? done : err;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 05/26] io_uring: move rsrc_put callback into io_rsrc_data
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (3 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 04/26] io_uring: encapsulate rsrc node manipulations Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 06/26] io_uring: refactor io_queue_rsrc_removal() Pavel Begunkov
` (21 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_rsrc_node's callback operates only on a single io_rsrc_data and only
with its resources, so rsrc_put() callback is actually a property of
io_rsrc_data. Move it there, it makes code much nicecr.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 31 ++++++++++++++-----------------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3d9b58d8eb90..42c9ef85800e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -224,16 +224,17 @@ struct io_rsrc_node {
struct list_head node;
struct list_head rsrc_list;
struct io_rsrc_data *rsrc_data;
- void (*rsrc_put)(struct io_ring_ctx *ctx,
- struct io_rsrc_put *prsrc);
struct llist_node llist;
bool done;
};
+typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
+
struct io_rsrc_data {
struct fixed_rsrc_table *table;
struct io_ring_ctx *ctx;
+ rsrc_put_fn *do_put;
struct io_rsrc_node *node;
struct percpu_ref refs;
struct completion done;
@@ -7075,9 +7076,7 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
}
static void io_rsrc_node_set(struct io_ring_ctx *ctx,
- struct io_rsrc_data *rsrc_data,
- void (*rsrc_put)(struct io_ring_ctx *ctx,
- struct io_rsrc_put *prsrc))
+ struct io_rsrc_data *rsrc_data)
{
struct io_rsrc_node *rsrc_node = ctx->rsrc_backup_node;
@@ -7085,7 +7084,6 @@ static void io_rsrc_node_set(struct io_ring_ctx *ctx,
ctx->rsrc_backup_node = NULL;
rsrc_node->rsrc_data = rsrc_data;
- rsrc_node->rsrc_put = rsrc_put;
io_rsrc_ref_lock(ctx);
rsrc_data->node = rsrc_node;
@@ -7114,10 +7112,7 @@ static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
-static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
- struct io_ring_ctx *ctx,
- void (*rsrc_put)(struct io_ring_ctx *ctx,
- struct io_rsrc_put *prsrc))
+static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx)
{
int ret;
@@ -7138,7 +7133,7 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
break;
percpu_ref_resurrect(&data->refs);
- io_rsrc_node_set(ctx, data, rsrc_put);
+ io_rsrc_node_set(ctx, data);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -7150,7 +7145,8 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
return ret;
}
-static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx)
+static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
+ rsrc_put_fn *do_put)
{
struct io_rsrc_data *data;
@@ -7164,6 +7160,7 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx)
return NULL;
}
data->ctx = ctx;
+ data->do_put = do_put;
init_completion(&data->done);
return data;
}
@@ -7188,7 +7185,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
*/
if (!data || percpu_ref_is_dying(&data->refs))
return -ENXIO;
- ret = io_rsrc_ref_quiesce(data, ctx, io_ring_file_put);
+ ret = io_rsrc_ref_quiesce(data, ctx);
if (ret)
return ret;
@@ -7523,7 +7520,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) {
list_del(&prsrc->list);
- ref_node->rsrc_put(ctx, prsrc);
+ rsrc_data->do_put(ctx, prsrc);
kfree(prsrc);
}
@@ -7621,7 +7618,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (ret)
return ret;
- file_data = io_rsrc_data_alloc(ctx);
+ file_data = io_rsrc_data_alloc(ctx, io_ring_file_put);
if (!file_data)
return -ENOMEM;
ctx->file_data = file_data;
@@ -7679,7 +7676,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- io_rsrc_node_set(ctx, file_data, io_ring_file_put);
+ io_rsrc_node_set(ctx, file_data);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7831,7 +7828,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
- io_rsrc_node_set(ctx, data, io_ring_file_put);
+ io_rsrc_node_set(ctx, data);
}
return done ? done : err;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 06/26] io_uring: refactor io_queue_rsrc_removal()
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (4 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 05/26] io_uring: move rsrc_put callback into io_rsrc_data Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 07/26] io_uring: ctx-wide rsrc nodes Pavel Begunkov
` (20 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Pass rsrc_node into io_queue_rsrc_removal() explicitly. Just a
simple preparation patch, makes following changes nicer.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 42c9ef85800e..5dc4f6bb643a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7736,27 +7736,20 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
#endif
}
-static int io_queue_rsrc_removal(struct io_rsrc_data *data, void *rsrc)
+static int io_queue_rsrc_removal(struct io_rsrc_data *data,
+ struct io_rsrc_node *node, void *rsrc)
{
struct io_rsrc_put *prsrc;
- struct io_rsrc_node *ref_node = data->node;
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
if (!prsrc)
return -ENOMEM;
prsrc->rsrc = rsrc;
- list_add(&prsrc->list, &ref_node->rsrc_list);
-
+ list_add(&prsrc->list, &node->rsrc_list);
return 0;
}
-static inline int io_queue_file_removal(struct io_rsrc_data *data,
- struct file *file)
-{
- return io_queue_rsrc_removal(data, (void *)file);
-}
-
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update *up,
unsigned nr_args)
@@ -7791,7 +7784,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (*file_slot) {
file = (struct file *) ((unsigned long) *file_slot & FFS_MASK);
- err = io_queue_file_removal(data, file);
+ err = io_queue_rsrc_removal(data, data->node, file);
if (err)
break;
*file_slot = NULL;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 07/26] io_uring: ctx-wide rsrc nodes
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (5 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 06/26] io_uring: refactor io_queue_rsrc_removal() Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 08/26] io_uring: reuse io_rsrc_node_destroy() Pavel Begunkov
` (19 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
If we're going to ever support multiple types of resources we need
shared rsrc nodes to not bloat requests, that is implemented in this
patch. It also gives a nicer API and saves one pointer dereference
in io_req_set_rsrc_node().
We may say that all requests bound to a resource belong to one and only
one rsrc node, and considering that nodes are removed and recycled
strictly in-order, this separates requests into generations, where
generation are changed on each node switch (i.e. io_rsrc_node_switch()).
The API is simple, io_rsrc_node_switch() switches to a new generation if
needed, and also optionally kills a passed in io_rsrc_data. Each call to
io_rsrc_node_switch() have to be preceded with
io_rsrc_node_switch_start(). The start function is idempotent and should
not necessarily be followed by switch.
One difference is that once a node was set it will always retain a valid
rsrc node, even on unregister. It may be a nuisance at the moment, but
makes much sense for multiple types of resources. Another thing changed
is that nodes are bound to/associated with a io_rsrc_data later just
before killing (i.e. switching).
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 73 ++++++++++++++++++++++++++-------------------------
1 file changed, 37 insertions(+), 36 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dc4f6bb643a..47c76ec422ba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -235,7 +235,6 @@ struct io_rsrc_data {
struct io_ring_ctx *ctx;
rsrc_put_fn *do_put;
- struct io_rsrc_node *node;
struct percpu_ref refs;
struct completion done;
bool quiesce;
@@ -448,6 +447,7 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
+ struct io_rsrc_node *rsrc_node;
struct io_rsrc_node *rsrc_backup_node;
struct io_restriction restrictions;
@@ -1077,7 +1077,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
if (!req->fixed_rsrc_refs) {
- req->fixed_rsrc_refs = &ctx->file_data->node->refs;
+ req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
percpu_ref_get(req->fixed_rsrc_refs);
}
}
@@ -7075,36 +7075,32 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
spin_unlock_bh(&ctx->rsrc_ref_lock);
}
-static void io_rsrc_node_set(struct io_ring_ctx *ctx,
- struct io_rsrc_data *rsrc_data)
+static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data_to_kill)
{
- struct io_rsrc_node *rsrc_node = ctx->rsrc_backup_node;
+ WARN_ON_ONCE(!ctx->rsrc_backup_node);
+ WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
- WARN_ON_ONCE(!rsrc_node);
+ if (data_to_kill) {
+ struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
- ctx->rsrc_backup_node = NULL;
- rsrc_node->rsrc_data = rsrc_data;
+ rsrc_node->rsrc_data = data_to_kill;
+ io_rsrc_ref_lock(ctx);
+ list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
+ io_rsrc_ref_unlock(ctx);
- io_rsrc_ref_lock(ctx);
- rsrc_data->node = rsrc_node;
- list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
- io_rsrc_ref_unlock(ctx);
- percpu_ref_get(&rsrc_data->refs);
-}
-
-static void io_rsrc_node_kill(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
-{
- struct io_rsrc_node *ref_node = NULL;
+ percpu_ref_get(&data_to_kill->refs);
+ percpu_ref_kill(&rsrc_node->refs);
+ ctx->rsrc_node = NULL;
+ }
- io_rsrc_ref_lock(ctx);
- ref_node = data->node;
- data->node = NULL;
- io_rsrc_ref_unlock(ctx);
- if (ref_node)
- percpu_ref_kill(&ref_node->refs);
+ if (!ctx->rsrc_node) {
+ ctx->rsrc_node = ctx->rsrc_backup_node;
+ ctx->rsrc_backup_node = NULL;
+ }
}
-static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
+static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
@@ -7121,10 +7117,11 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
data->quiesce = true;
do {
- ret = io_rsrc_node_prealloc(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
if (ret)
break;
- io_rsrc_node_kill(ctx, data);
+ io_rsrc_node_switch(ctx, data);
+
percpu_ref_kill(&data->refs);
flush_delayed_work(&ctx->rsrc_put_work);
@@ -7133,7 +7130,6 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
break;
percpu_ref_resurrect(&data->refs);
- io_rsrc_node_set(ctx, data);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@@ -7614,7 +7610,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
- ret = io_rsrc_node_prealloc(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
@@ -7676,7 +7672,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
- io_rsrc_node_set(ctx, file_data);
+ io_rsrc_node_switch(ctx, NULL);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7765,7 +7761,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
- err = io_rsrc_node_prealloc(ctx);
+ err = io_rsrc_node_switch_start(ctx);
if (err)
return err;
@@ -7784,7 +7780,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (*file_slot) {
file = (struct file *) ((unsigned long) *file_slot & FFS_MASK);
- err = io_queue_rsrc_removal(data, data->node, file);
+ err = io_queue_rsrc_removal(data, ctx->rsrc_node, file);
if (err)
break;
*file_slot = NULL;
@@ -7819,10 +7815,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
}
- if (needs_switch) {
- percpu_ref_kill(&data->node->refs);
- io_rsrc_node_set(ctx, data);
- }
+ if (needs_switch)
+ io_rsrc_node_switch(ctx, data);
return done ? done : err;
}
@@ -8496,8 +8490,15 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
+ /* there are no registered resources left, nobody uses it */
+ if (ctx->rsrc_node)
+ io_rsrc_node_destroy(ctx->rsrc_node);
if (ctx->rsrc_backup_node)
io_rsrc_node_destroy(ctx->rsrc_backup_node);
+ flush_delayed_work(&ctx->rsrc_put_work);
+
+ WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
+ WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 08/26] io_uring: reuse io_rsrc_node_destroy()
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (6 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 07/26] io_uring: ctx-wide rsrc nodes Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 09/26] io_uring: remove useless is_dying check on quiesce Pavel Begunkov
` (18 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Reuse io_rsrc_node_destroy() in __io_rsrc_put_work(). Also move it to a
more appropriate place -- to the other node routines, and remove forward
declaration.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 47c76ec422ba..17e7bed2e945 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1025,7 +1025,6 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files);
static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node);
static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
@@ -7075,6 +7074,12 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
spin_unlock_bh(&ctx->rsrc_ref_lock);
}
+static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
+{
+ percpu_ref_exit(&ref_node->refs);
+ kfree(ref_node);
+}
+
static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
struct io_rsrc_data *data_to_kill)
{
@@ -7520,8 +7525,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
kfree(prsrc);
}
- percpu_ref_exit(&ref_node->refs);
- kfree(ref_node);
+ io_rsrc_node_destroy(ref_node);
percpu_ref_put(&rsrc_data->refs);
}
@@ -7589,12 +7593,6 @@ static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
return ref_node;
}
-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
-{
- percpu_ref_exit(&ref_node->refs);
- kfree(ref_node);
-}
-
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 09/26] io_uring: remove useless is_dying check on quiesce
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (7 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 08/26] io_uring: reuse io_rsrc_node_destroy() Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 10/26] io_uring: refactor rw reissue Pavel Begunkov
` (17 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
rsrc_data refs should always be valid for potential submitters,
io_rsrc_ref_quiesce() restores it before unlocking, so
percpu_ref_is_dying() check in io_sqe_files_unregister() does nothing
and misleading. Concurrent quiesce is prevented with
struct io_rsrc_data::quiesce.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 17e7bed2e945..e8d95feed75f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7117,6 +7117,7 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
{
int ret;
+ /* As we may drop ->uring_lock, other task may have started quiesce */
if (data->quiesce)
return -ENXIO;
@@ -7179,12 +7180,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
unsigned nr_tables, i;
int ret;
- /*
- * percpu_ref_is_dying() is to stop parallel files unregister
- * Since we possibly drop uring lock later in this function to
- * run task work.
- */
- if (!data || percpu_ref_is_dying(&data->refs))
+ if (!data)
return -ENXIO;
ret = io_rsrc_ref_quiesce(data, ctx);
if (ret)
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 10/26] io_uring: refactor rw reissue
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (8 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 09/26] io_uring: remove useless is_dying check on quiesce Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 11/26] io_uring: combine lock/unlock sections on exit Pavel Begunkov
` (16 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Move io_rw_should_reissue() check into io_resubmit_prep(), so we don't
need, so we can remove it from io_rw_reissue() and
io_complete_rw_iopoll().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 37 ++++++++++++++++---------------------
1 file changed, 16 insertions(+), 21 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e8d95feed75f..e9bfe137270c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2441,17 +2441,6 @@ static void kiocb_end_write(struct io_kiocb *req)
}
#ifdef CONFIG_BLOCK
-static bool io_resubmit_prep(struct io_kiocb *req)
-{
- struct io_async_rw *rw = req->async_data;
-
- if (!rw)
- return !io_req_prep_async(req);
- /* may have left rw->iter inconsistent on -EIOCBQUEUED */
- iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
- return true;
-}
-
static bool io_rw_should_reissue(struct io_kiocb *req)
{
umode_t mode = file_inode(req->file)->i_mode;
@@ -2467,26 +2456,34 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
* Don't attempt to reissue from that path, just let it fail with
* -EAGAIN.
*/
- if (percpu_ref_is_dying(&ctx->refs))
- return false;
- return true;
+ return !percpu_ref_is_dying(&ctx->refs);
}
-#endif
-static bool io_rw_reissue(struct io_kiocb *req)
+static bool io_resubmit_prep(struct io_kiocb *req)
{
-#ifdef CONFIG_BLOCK
+ struct io_async_rw *rw = req->async_data;
+
if (!io_rw_should_reissue(req))
return false;
lockdep_assert_held(&req->ctx->uring_lock);
+ if (!rw)
+ return !io_req_prep_async(req);
+ /* may have left rw->iter inconsistent on -EIOCBQUEUED */
+ iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
+ return true;
+}
+#endif
+
+static bool io_rw_reissue(struct io_kiocb *req)
+{
+#ifdef CONFIG_BLOCK
if (io_resubmit_prep(req)) {
req_ref_get(req);
io_queue_async_work(req);
return true;
}
- req_set_fail_links(req);
#endif
return false;
}
@@ -2525,9 +2522,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
bool fail = true;
#ifdef CONFIG_BLOCK
- if (res == -EAGAIN && io_rw_should_reissue(req) &&
- io_resubmit_prep(req))
- fail = false;
+ fail = res != -EAGAIN || !io_resubmit_prep(req);
#endif
if (fail) {
req_set_fail_links(req);
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 11/26] io_uring: combine lock/unlock sections on exit
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (9 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 10/26] io_uring: refactor rw reissue Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 12/26] io_uring: better ref handling in poll_remove_one Pavel Begunkov
` (15 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_ring_exit_work() already does uring_lock lock/unlock, no need to
repeat it for lock waiting trick in io_ring_ctx_free(). Move the waiting
with comments and spinlocking into io_ring_exit_work.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e9bfe137270c..9ebdd288653f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8455,16 +8455,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
- /*
- * Some may use context even when all refs and requests have been put,
- * and they are free to do so while still holding uring_lock or
- * completion_lock, see __io_req_task_submit(). Wait for them to finish.
- */
- mutex_lock(&ctx->uring_lock);
- mutex_unlock(&ctx->uring_lock);
- spin_lock_irq(&ctx->completion_lock);
- spin_unlock_irq(&ctx->completion_lock);
-
io_sq_thread_finish(ctx);
io_sqe_buffers_unregister(ctx);
@@ -8615,6 +8605,12 @@ static void io_ring_exit_work(struct work_struct *work)
WARN_ON_ONCE(time_after(jiffies, timeout));
} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+ /*
+ * Some may use context even when all refs and requests have been put,
+ * and they are free to do so while still holding uring_lock or
+ * completion_lock, see __io_req_task_submit(). Apart from other work,
+ * this lock/unlock section also waits them to finish.
+ */
mutex_lock(&ctx->uring_lock);
while (!list_empty(&ctx->tctx_list)) {
WARN_ON_ONCE(time_after(jiffies, timeout));
@@ -8635,6 +8631,8 @@ static void io_ring_exit_work(struct work_struct *work)
mutex_lock(&ctx->uring_lock);
}
mutex_unlock(&ctx->uring_lock);
+ spin_lock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->completion_lock);
io_ring_ctx_free(ctx);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 12/26] io_uring: better ref handling in poll_remove_one
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (10 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 11/26] io_uring: combine lock/unlock sections on exit Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 13/26] io_uring: remove unused hash_wait Pavel Begunkov
` (14 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Instead of io_put_req() to drop not a final ref, use req_ref_put(),
which is slimmer and will also check the invariant.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ebdd288653f..bf3eeabda71d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5224,7 +5224,7 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
/* non-poll requests have submit ref still */
do_complete = __io_poll_remove_one(req, &apoll->poll, true);
if (do_complete) {
- io_put_req(req);
+ req_ref_put(req);
kfree(apoll->double_poll);
kfree(apoll);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 13/26] io_uring: remove unused hash_wait
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (11 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 12/26] io_uring: better ref handling in poll_remove_one Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 14/26] io_uring: refactor io_async_cancel() Pavel Begunkov
` (13 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
No users of io_uring_ctx::hash_wait left, kill it.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bf3eeabda71d..4314e738c2ad 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -455,8 +455,6 @@ struct io_ring_ctx {
/* exit task_work */
struct callback_head *exit_task_work;
- struct wait_queue_head hash_wait;
-
/* Keep this last, we don't need it for the fast path */
struct work_struct exit_work;
struct list_head tctx_list;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 14/26] io_uring: refactor io_async_cancel()
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (12 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 13/26] io_uring: remove unused hash_wait Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 15/26] io_uring: improve import_fixed overflow checks Pavel Begunkov
` (12 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Remove extra tctx==NULL checks that are already done by
io_async_cancel_one().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4314e738c2ad..c3cbc3dfa7f3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5804,8 +5804,6 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
struct io_uring_task *tctx = node->task->io_uring;
- if (!tctx || !tctx->io_wq)
- continue;
ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
if (ret != -ENOENT)
break;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 15/26] io_uring: improve import_fixed overflow checks
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (13 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 14/26] io_uring: refactor io_async_cancel() Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 16/26] io_uring: store reg buffer end instead of length Pavel Begunkov
` (11 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Replace a hand-coded overflow check with a specialised function. Even
though compilers are smart enough to generate identical binary (i.e.
check carry bit), but it's more foolproof and conveys the intention
better.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c3cbc3dfa7f3..053baa4ca02e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2771,8 +2771,8 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
u16 index, buf_index = req->buf_index;
+ u64 buf_end, buf_addr = req->rw.addr;
size_t offset;
- u64 buf_addr;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
@@ -2780,11 +2780,10 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
imu = &ctx->user_bufs[index];
buf_addr = req->rw.addr;
- /* overflow */
- if (buf_addr + len < buf_addr)
+ if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
return -EFAULT;
/* not inside the mapped region */
- if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len)
+ if (buf_addr < imu->ubuf || buf_end > imu->ubuf + imu->len)
return -EFAULT;
/*
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 16/26] io_uring: store reg buffer end instead of length
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (14 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 15/26] io_uring: improve import_fixed overflow checks Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 17/26] io_uring: kill unused forward decls Pavel Begunkov
` (10 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
It's a bit more convenient for us to store a registered buffer end
address instead of length, see struct io_mapped_ubuf, as it allow to not
recompute it every time.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 053baa4ca02e..bafe84ad5b32 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -194,7 +194,7 @@ enum io_uring_cmd_flags {
struct io_mapped_ubuf {
u64 ubuf;
- size_t len;
+ u64 ubuf_end;
struct bio_vec *bvec;
unsigned int nr_bvecs;
unsigned long acct_pages;
@@ -2783,7 +2783,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
return -EFAULT;
/* not inside the mapped region */
- if (buf_addr < imu->ubuf || buf_end > imu->ubuf + imu->len)
+ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end))
return -EFAULT;
/*
@@ -8296,7 +8296,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
}
/* store original address for later verification */
imu->ubuf = ubuf;
- imu->len = iov->iov_len;
+ imu->ubuf_end = ubuf + iov->iov_len;
imu->nr_bvecs = nr_pages;
ret = 0;
done:
@@ -9353,9 +9353,9 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
+ unsigned int len = buf->ubuf_end - buf->ubuf;
- seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
- (unsigned int) buf->len);
+ seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len);
}
if (has_lock && !xa_empty(&ctx->personalities)) {
unsigned long index;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 17/26] io_uring: kill unused forward decls
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (15 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 16/26] io_uring: store reg buffer end instead of length Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 18/26] io_uring: lock annotate timeouts and poll Pavel Begunkov
` (9 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Kill unused forward declarations for io_ring_file_put() and
io_queue_next(). Also btw rename the first one.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bafe84ad5b32..352c231571dd 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1024,14 +1024,12 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct files_struct *files);
static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
-static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req);
static void io_put_req_deferred(struct io_kiocb *req, int nr);
static void io_dismantle_req(struct io_kiocb *req);
static void io_put_task(struct task_struct *task, int nr);
-static void io_queue_next(struct io_kiocb *req);
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
@@ -7436,7 +7434,7 @@ static int io_sqe_alloc_file_tables(struct io_rsrc_data *file_data,
return 1;
}
-static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
+static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
{
struct file *file = prsrc->file;
#if defined(CONFIG_UNIX)
@@ -7598,7 +7596,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (ret)
return ret;
- file_data = io_rsrc_data_alloc(ctx, io_ring_file_put);
+ file_data = io_rsrc_data_alloc(ctx, io_rsrc_file_put);
if (!file_data)
return -ENOMEM;
ctx->file_data = file_data;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 18/26] io_uring: lock annotate timeouts and poll
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (16 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 17/26] io_uring: kill unused forward decls Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 19/26] io_uring: simplify overflow handling Pavel Begunkov
` (8 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Add timeout and poll ->comletion_lock annotations for Sparse, makes life
easier while looking at the functions.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 352c231571dd..683db49a766e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4865,6 +4865,7 @@ static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
}
static void io_poll_remove_double(struct io_kiocb *req)
+ __must_hold(&req->ctx->completion_lock)
{
struct io_poll_iocb *poll = io_poll_get_double(req);
@@ -4883,6 +4884,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
}
static bool io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
+ __must_hold(&req->ctx->completion_lock)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned flags = IORING_CQE_F_MORE;
@@ -5188,6 +5190,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
static bool __io_poll_remove_one(struct io_kiocb *req,
struct io_poll_iocb *poll, bool do_cancel)
+ __must_hold(&req->ctx->completion_lock)
{
bool do_complete = false;
@@ -5206,6 +5209,7 @@ static bool __io_poll_remove_one(struct io_kiocb *req,
}
static bool io_poll_remove_waitqs(struct io_kiocb *req)
+ __must_hold(&req->ctx->completion_lock)
{
bool do_complete;
@@ -5229,6 +5233,7 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
}
static bool io_poll_remove_one(struct io_kiocb *req)
+ __must_hold(&req->ctx->completion_lock)
{
bool do_complete;
@@ -5272,6 +5277,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
}
static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr)
+ __must_hold(&ctx->completion_lock)
{
struct hlist_head *list;
struct io_kiocb *req;
@@ -5287,6 +5293,7 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr)
}
static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
+ __must_hold(&ctx->completion_lock)
{
struct io_kiocb *req;
@@ -5493,6 +5500,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
__u64 user_data)
+ __must_hold(&ctx->completion_lock)
{
struct io_timeout_data *io;
struct io_kiocb *req;
@@ -5517,6 +5525,7 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
}
static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+ __must_hold(&ctx->completion_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
@@ -5531,6 +5540,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
struct timespec64 *ts, enum hrtimer_mode mode)
+ __must_hold(&ctx->completion_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
struct io_timeout_data *data;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 19/26] io_uring: simplify overflow handling
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (17 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 18/26] io_uring: lock annotate timeouts and poll Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:43 ` [PATCH v4 20/26] io_uring: put link timeout req consistently Pavel Begunkov
` (7 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Overflowed CQEs doesn't lock requests anymore, so we don't care so much
about cancelling them, so kill cq_overflow_flushed and simplify the
code.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 683db49a766e..a621582a2f11 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -335,7 +335,6 @@ struct io_ring_ctx {
struct {
unsigned int flags;
unsigned int compat: 1;
- unsigned int cq_overflow_flushed: 1;
unsigned int drain_next: 1;
unsigned int eventfd_async: 1;
unsigned int restricted: 1;
@@ -1522,8 +1521,7 @@ static bool __io_cqring_fill_event(struct io_kiocb *req, long res,
WRITE_ONCE(cqe->flags, cflags);
return true;
}
- if (!ctx->cq_overflow_flushed &&
- !atomic_read(&req->task->io_uring->in_idle)) {
+ if (!atomic_read(&req->task->io_uring->in_idle)) {
struct io_overflow_cqe *ocqe;
ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
@@ -8468,6 +8466,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
mutex_lock(&ctx->uring_lock);
io_sqe_files_unregister(ctx);
+ if (ctx->rings)
+ __io_cqring_overflow_flush(ctx, true);
mutex_unlock(&ctx->uring_lock);
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
@@ -8669,8 +8669,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
- /* if force is set, the ring is going away. always drop after that */
- ctx->cq_overflow_flushed = 1;
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true);
xa_for_each(&ctx->personalities, index, creds)
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 20/26] io_uring: put link timeout req consistently
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (18 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 19/26] io_uring: simplify overflow handling Pavel Begunkov
@ 2021-04-01 14:43 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 21/26] io_uring: deduplicate NOSIGNAL setting Pavel Begunkov
` (6 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:43 UTC (permalink / raw)
To: Jens Axboe, io-uring
Don't put linked timeout req in io_async_find_and_cancel() but do it in
io_link_timeout_fn(), so we have only one point for that and won't have
to do it differently as it's now (put vs put_deferred). Btw, improve a
bit io_async_find_and_cancel()'s locking.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a621582a2f11..dcd2f206e058 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5746,12 +5746,9 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
int ret;
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT) {
- spin_lock_irqsave(&ctx->completion_lock, flags);
- goto done;
- }
-
spin_lock_irqsave(&ctx->completion_lock, flags);
+ if (ret != -ENOENT)
+ goto done;
ret = io_timeout_cancel(ctx, sqe_addr);
if (ret != -ENOENT)
goto done;
@@ -5766,7 +5763,6 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
if (ret < 0)
req_set_fail_links(req);
- io_put_req(req);
}
static int io_async_cancel_prep(struct io_kiocb *req,
@@ -6341,8 +6337,8 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
io_put_req_deferred(prev, 1);
} else {
io_req_complete_post(req, -ETIME, 0);
- io_put_req_deferred(req, 1);
}
+ io_put_req_deferred(req, 1);
return HRTIMER_NORESTART;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 21/26] io_uring: deduplicate NOSIGNAL setting
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (19 preceding siblings ...)
2021-04-01 14:43 ` [PATCH v4 20/26] io_uring: put link timeout req consistently Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 22/26] io_uring: set proper FFS* flags on reg file update Pavel Begunkov
` (5 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
Set MSG_NOSIGNAL and REQ_F_NOWAIT in send/recv prep routines and don't
duplicate it in all four send/recv handlers.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 36 ++++++++++++++----------------------
1 file changed, 14 insertions(+), 22 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index dcd2f206e058..421e9d7d02fd 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4303,9 +4303,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+ if (sr->msg_flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
@@ -4334,12 +4336,9 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
- flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (issue_flags & IO_URING_F_NONBLOCK)
+ flags = req->sr_msg.msg_flags;
+ if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
-
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
@@ -4382,12 +4381,9 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_controllen = 0;
msg.msg_namelen = 0;
- flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (issue_flags & IO_URING_F_NONBLOCK)
+ flags = req->sr_msg.msg_flags;
+ if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
-
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
@@ -4530,10 +4526,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
sr->bgid = READ_ONCE(sqe->buf_group);
+ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+ if (sr->msg_flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
@@ -4574,12 +4572,9 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1, req->sr_msg.len);
}
- flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
+ flags = req->sr_msg.msg_flags;
+ if (force_nonblock)
flags |= MSG_DONTWAIT;
-
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
@@ -4637,12 +4632,9 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_iocb = NULL;
msg.msg_flags = 0;
- flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
+ flags = req->sr_msg.msg_flags;
+ if (force_nonblock)
flags |= MSG_DONTWAIT;
-
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 22/26] io_uring: set proper FFS* flags on reg file update
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (20 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 21/26] io_uring: deduplicate NOSIGNAL setting Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 23/26] io_uring: don't quiesce intial files register Pavel Begunkov
` (4 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
Set FFS_* flags (e.g. FFS_ASYNC_READ) not only in initial registration
but also on registered files update. Not a bug, but may miss getting
profit out of the feature.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 421e9d7d02fd..c5dd00babf59 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6272,6 +6272,19 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return (struct file *) ((unsigned long) *file_slot & FFS_MASK);
}
+static void io_fixed_file_set(struct file **file_slot, struct file *file)
+{
+ unsigned long file_ptr = (unsigned long) file;
+
+ if (__io_file_supports_async(file, READ))
+ file_ptr |= FFS_ASYNC_READ;
+ if (__io_file_supports_async(file, WRITE))
+ file_ptr |= FFS_ASYNC_WRITE;
+ if (S_ISREG(file_inode(file)->i_mode))
+ file_ptr |= FFS_ISREG;
+ *file_slot = (struct file *)file_ptr;
+}
+
static struct file *io_file_get(struct io_submit_state *state,
struct io_kiocb *req, int fd, bool fixed)
{
@@ -7608,8 +7621,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto out_free;
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
- unsigned long file_ptr;
-
if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
ret = -EFAULT;
goto out_fput;
@@ -7634,14 +7645,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
fput(file);
goto out_fput;
}
- file_ptr = (unsigned long) file;
- if (__io_file_supports_async(file, READ))
- file_ptr |= FFS_ASYNC_READ;
- if (__io_file_supports_async(file, WRITE))
- file_ptr |= FFS_ASYNC_WRITE;
- if (S_ISREG(file_inode(file)->i_mode))
- file_ptr |= FFS_ISREG;
- *io_fixed_file_slot(file_data, i) = (struct file *) file_ptr;
+ io_fixed_file_set(io_fixed_file_slot(file_data, i), file);
}
ret = io_sqe_files_scm(ctx);
@@ -7783,7 +7787,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- *file_slot = file;
+ io_fixed_file_set(file_slot, file);
err = io_sqe_file_register(ctx, file, i);
if (err) {
*file_slot = NULL;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 23/26] io_uring: don't quiesce intial files register
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (21 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 22/26] io_uring: set proper FFS* flags on reg file update Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 24/26] io_uring: refactor file tables alloc/free Pavel Begunkov
` (3 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
There is no reason why we would want to fully quiesce ring on
IORING_REGISTER_FILES, if it's already registered we fail.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c5dd00babf59..2b8496f76baa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9790,6 +9790,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
+ case IORING_REGISTER_FILES:
case IORING_UNREGISTER_FILES:
case IORING_REGISTER_FILES_UPDATE:
case IORING_REGISTER_PROBE:
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 24/26] io_uring: refactor file tables alloc/free
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (22 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 23/26] io_uring: don't quiesce intial files register Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 25/26] io_uring: encapsulate fixed files into struct Pavel Begunkov
` (2 subsequent siblings)
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
Introduce a heler io_free_file_tables() doing all the cleaning, there
are several places where it's hand coded. Also move all allocations into
io_sqe_alloc_file_tables() and rename it, so all of it is in one place.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 52 +++++++++++++++++++++++++--------------------------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2b8496f76baa..a9984ca025ba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7031,6 +7031,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}
+static void io_free_file_tables(struct io_rsrc_data *data, unsigned nr_files)
+{
+ unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
+
+ for (i = 0; i < nr_tables; i++)
+ kfree(data->table[i].files);
+ kfree(data->table);
+ data->table = NULL;
+}
+
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
#if defined(CONFIG_UNIX)
@@ -7167,14 +7177,12 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
static void io_rsrc_data_free(struct io_rsrc_data *data)
{
percpu_ref_exit(&data->refs);
- kfree(data->table);
kfree(data);
}
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
struct io_rsrc_data *data = ctx->file_data;
- unsigned nr_tables, i;
int ret;
if (!data)
@@ -7184,9 +7192,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return ret;
__io_sqe_files_unregister(ctx);
- nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
- for (i = 0; i < nr_tables; i++)
- kfree(data->table[i].files);
+ io_free_file_tables(data, ctx->nr_user_files);
io_rsrc_data_free(data);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
@@ -7416,16 +7422,20 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
}
#endif
-static int io_sqe_alloc_file_tables(struct io_rsrc_data *file_data,
- unsigned nr_tables, unsigned nr_files)
+static bool io_alloc_file_tables(struct io_rsrc_data *file_data,
+ unsigned nr_files)
{
- int i;
+ unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
+
+ file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
+ GFP_KERNEL);
+ if (!file_data->table)
+ return false;
for (i = 0; i < nr_tables; i++) {
struct fixed_rsrc_table *table = &file_data->table[i];
- unsigned this_files;
+ unsigned int this_files = min(nr_files, IORING_MAX_FILES_TABLE);
- this_files = min(nr_files, IORING_MAX_FILES_TABLE);
table->files = kcalloc(this_files, sizeof(struct file *),
GFP_KERNEL);
if (!table->files)
@@ -7434,13 +7444,10 @@ static int io_sqe_alloc_file_tables(struct io_rsrc_data *file_data,
}
if (i == nr_tables)
- return 0;
+ return true;
- for (i = 0; i < nr_tables; i++) {
- struct fixed_rsrc_table *table = &file_data->table[i];
- kfree(table->files);
- }
- return 1;
+ io_free_file_tables(file_data, nr_tables * IORING_MAX_FILES_TABLE);
+ return false;
}
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
@@ -7590,9 +7597,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
__s32 __user *fds = (__s32 __user *) arg;
- unsigned nr_tables, i;
struct file *file;
int fd, ret;
+ unsigned i;
struct io_rsrc_data *file_data;
if (ctx->file_data)
@@ -7611,13 +7618,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
ctx->file_data = file_data;
ret = -ENOMEM;
- nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
- file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
- GFP_KERNEL);
- if (!file_data->table)
- goto out_free;
-
- if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args))
+ if (!io_alloc_file_tables(file_data, nr_args))
goto out_free;
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
@@ -7662,8 +7663,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (file)
fput(file);
}
- for (i = 0; i < nr_tables; i++)
- kfree(file_data->table[i].files);
+ io_free_file_tables(file_data, nr_args);
ctx->nr_user_files = 0;
out_free:
io_rsrc_data_free(ctx->file_data);
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 25/26] io_uring: encapsulate fixed files into struct
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (23 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 24/26] io_uring: refactor file tables alloc/free Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-01 14:44 ` [PATCH v4 26/26] io_uring: kill outdated comment about splice punt Pavel Begunkov
2021-04-04 19:16 ` [PATCH v4 00/26] ctx wide rsrc nodes + Jens Axboe
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
Add struct io_fixed_file representing a single registered file, first to
hide ugly struct file **, which may be misleading, and secondly to
retype it to unsigned long as conversions to it and back to file * for
handling and masking FFS_* flags are getting nasty.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a9984ca025ba..c1d9fface7f4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -207,6 +207,11 @@ struct io_overflow_cqe {
struct list_head list;
};
+struct io_fixed_file {
+ /* file * with additional FFS_* flags */
+ unsigned long file_ptr;
+};
+
struct io_rsrc_put {
struct list_head list;
union {
@@ -216,7 +221,7 @@ struct io_rsrc_put {
};
struct fixed_rsrc_table {
- struct file **files;
+ struct io_fixed_file *files;
};
struct io_rsrc_node {
@@ -6255,8 +6260,8 @@ static void io_wq_submit_work(struct io_wq_work *work)
#endif
#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
-static inline struct file **io_fixed_file_slot(struct io_rsrc_data *file_data,
- unsigned i)
+static inline struct io_fixed_file *io_fixed_file_slot(struct io_rsrc_data *file_data,
+ unsigned i)
{
struct fixed_rsrc_table *table;
@@ -6267,12 +6272,12 @@ static inline struct file **io_fixed_file_slot(struct io_rsrc_data *file_data,
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
int index)
{
- struct file **file_slot = io_fixed_file_slot(ctx->file_data, index);
+ struct io_fixed_file *slot = io_fixed_file_slot(ctx->file_data, index);
- return (struct file *) ((unsigned long) *file_slot & FFS_MASK);
+ return (struct file *) (slot->file_ptr & FFS_MASK);
}
-static void io_fixed_file_set(struct file **file_slot, struct file *file)
+static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file)
{
unsigned long file_ptr = (unsigned long) file;
@@ -6282,7 +6287,7 @@ static void io_fixed_file_set(struct file **file_slot, struct file *file)
file_ptr |= FFS_ASYNC_WRITE;
if (S_ISREG(file_inode(file)->i_mode))
file_ptr |= FFS_ISREG;
- *file_slot = (struct file *)file_ptr;
+ file_slot->file_ptr = file_ptr;
}
static struct file *io_file_get(struct io_submit_state *state,
@@ -6297,7 +6302,7 @@ static struct file *io_file_get(struct io_submit_state *state,
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
return NULL;
fd = array_index_nospec(fd, ctx->nr_user_files);
- file_ptr = (unsigned long) *io_fixed_file_slot(ctx->file_data, fd);
+ file_ptr = io_fixed_file_slot(ctx->file_data, fd)->file_ptr;
file = (struct file *) (file_ptr & FFS_MASK);
file_ptr &= ~FFS_MASK;
/* mask in overlapping REQ_F and FFS bits */
@@ -7733,7 +7738,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
unsigned nr_args)
{
struct io_rsrc_data *data = ctx->file_data;
- struct file *file, **file_slot;
+ struct io_fixed_file *file_slot;
+ struct file *file;
__s32 __user *fds;
int fd, i, err;
__u32 done;
@@ -7760,12 +7766,12 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
i = array_index_nospec(up->offset + done, ctx->nr_user_files);
file_slot = io_fixed_file_slot(ctx->file_data, i);
- if (*file_slot) {
- file = (struct file *) ((unsigned long) *file_slot & FFS_MASK);
+ if (file_slot->file_ptr) {
+ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
err = io_queue_rsrc_removal(data, ctx->rsrc_node, file);
if (err)
break;
- *file_slot = NULL;
+ file_slot->file_ptr = 0;
needs_switch = true;
}
if (fd != -1) {
@@ -7790,7 +7796,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
io_fixed_file_set(file_slot, file);
err = io_sqe_file_register(ctx, file, i);
if (err) {
- *file_slot = NULL;
+ file_slot->file_ptr = 0;
fput(file);
break;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH v4 26/26] io_uring: kill outdated comment about splice punt
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (24 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 25/26] io_uring: encapsulate fixed files into struct Pavel Begunkov
@ 2021-04-01 14:44 ` Pavel Begunkov
2021-04-04 19:16 ` [PATCH v4 00/26] ctx wide rsrc nodes + Jens Axboe
26 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-01 14:44 UTC (permalink / raw)
To: Jens Axboe, io-uring
The splice/tee comment in io_prep_async_work() isn't relevant since the
section was moved, delete it.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c1d9fface7f4..b20fec2e2be6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1224,10 +1224,6 @@ static void io_prep_async_work(struct io_kiocb *req)
switch (req->opcode) {
case IORING_OP_SPLICE:
case IORING_OP_TEE:
- /*
- * Splice operation will be punted aync, and here need to
- * modify io_wq_work.flags, so initialize io_wq_work firstly.
- */
if (!S_ISREG(file_inode(req->splice.file_in)->i_mode))
req->work.flags |= IO_WQ_WORK_UNBOUND;
break;
--
2.24.0
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH v4 00/26] ctx wide rsrc nodes +
2021-04-01 14:43 [PATCH v4 00/26] ctx wide rsrc nodes + Pavel Begunkov
` (25 preceding siblings ...)
2021-04-01 14:44 ` [PATCH v4 26/26] io_uring: kill outdated comment about splice punt Pavel Begunkov
@ 2021-04-04 19:16 ` Jens Axboe
2021-04-04 19:22 ` Pavel Begunkov
26 siblings, 1 reply; 29+ messages in thread
From: Jens Axboe @ 2021-04-04 19:16 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 4/1/21 8:43 AM, Pavel Begunkov wrote:
> 1-7 implement ctx wide rsrc nodes. The main idea here is to make make
> rsrc nodes (aka ref nodes) to be per ctx rather than per rsrc_data, that
> is a requirement for having multiple resource types. All the meat to it
> in 7/7. Btw improve rsrc API, because it was too easy to misuse.
>
> Others are further cleanups
Applied 1-9, and 10-26 - #10 needs some love with the recent changes.
--
Jens Axboe
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v4 00/26] ctx wide rsrc nodes +
2021-04-04 19:16 ` [PATCH v4 00/26] ctx wide rsrc nodes + Jens Axboe
@ 2021-04-04 19:22 ` Pavel Begunkov
0 siblings, 0 replies; 29+ messages in thread
From: Pavel Begunkov @ 2021-04-04 19:22 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 04/04/2021 20:16, Jens Axboe wrote:
> On 4/1/21 8:43 AM, Pavel Begunkov wrote:
>> 1-7 implement ctx wide rsrc nodes. The main idea here is to make make
>> rsrc nodes (aka ref nodes) to be per ctx rather than per rsrc_data, that
>> is a requirement for having multiple resource types. All the meat to it
>> in 7/7. Btw improve rsrc API, because it was too easy to misuse.
>>
>> Others are further cleanups
>
> Applied 1-9, and 10-26 - #10 needs some love with the recent changes.
Perfect, thanks. Was rebasing and testing myself.
#10 is not needed at all now, so let's keep it dropped
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 29+ messages in thread