From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH 1/2] io_uring: allocate req->work dynamically
Date: Sat, 25 Jul 2020 11:31:22 +0300 [thread overview]
Message-ID: <e42b99fa6acc36ea5880f611182768fee9eb583b.1595664743.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
req->work takes a lot of space and is not needed in hot path, don't
embed it into struct io_kiocb and allocate dynamically. The changes are
pretty straightforward, the only noticible thing is a ->private field
in io_wq_work growing it to 64B, and used to store a reference to the
request.
That shrinks io_kiocb to 200 bytes
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io-wq.h | 1 +
fs/io_uring.c | 203 +++++++++++++++++++++++++-------------------------
2 files changed, 104 insertions(+), 100 deletions(-)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index ddaf9614cf9b..1358e022ed4b 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -85,6 +85,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
+ void *private;
struct files_struct *files;
struct mm_struct *mm;
const struct cred *creds;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c7e8e9a1b27b..ef4c6e50aa4f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -544,7 +544,6 @@ enum {
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
- REQ_F_WORK_INITIALIZED_BIT,
REQ_F_TASK_PINNED_BIT,
/* not a real bit, just to check we're not overflowing the space */
@@ -591,8 +590,6 @@ enum {
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
/* doesn't need file table for this request */
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
- /* io_wq_work is initialized */
- REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
/* req->task is refcounted */
REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT),
};
@@ -600,7 +597,6 @@ enum {
struct async_poll {
struct io_poll_iocb poll;
struct io_poll_iocb *double_poll;
- struct io_wq_work work;
};
/*
@@ -657,19 +653,13 @@ struct io_kiocb {
struct percpu_ref *fixed_file_refs;
- union {
- /*
- * Only commands that never go async can use the below fields,
- * obviously. Right now only IORING_OP_POLL_ADD uses them, and
- * async armed poll handlers for regular commands. The latter
- * restore the work, if needed.
- */
- struct {
- struct hlist_node hash_node;
- struct async_poll *apoll;
- };
- struct io_wq_work work;
- };
+ /*
+ * Right now only IORING_OP_POLL_ADD uses it, and
+ * async armed poll handlers for regular commands.
+ */
+ struct hlist_node hash_node;
+ struct async_poll *apoll;
+ struct io_wq_work *work;
struct callback_head task_work;
};
@@ -902,6 +892,7 @@ enum io_mem_account {
ACCT_PINNED,
};
+static void io_req_complete(struct io_kiocb *req, long res);
static bool io_rw_reissue(struct io_kiocb *req, long res);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req);
@@ -1008,13 +999,12 @@ static inline void req_set_fail_links(struct io_kiocb *req)
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
*/
-static inline void io_req_init_async(struct io_kiocb *req)
+static inline bool io_req_init_async(struct io_kiocb *req)
{
- if (req->flags & REQ_F_WORK_INITIALIZED)
- return;
-
- memset(&req->work, 0, sizeof(req->work));
- req->flags |= REQ_F_WORK_INITIALIZED;
+ if (req->work)
+ return true;
+ req->work = kzalloc(sizeof(*req->work), GFP_KERNEL);
+ return req->work != NULL;
}
static inline bool io_async_submit(struct io_ring_ctx *ctx)
@@ -1121,72 +1111,85 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static void io_req_clean_work(struct io_kiocb *req)
{
- if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ struct io_wq_work *work = req->work;
+
+ if (!work)
return;
- if (req->work.mm) {
- mmdrop(req->work.mm);
- req->work.mm = NULL;
+ if (work->mm) {
+ mmdrop(work->mm);
+ work->mm = NULL;
}
- if (req->work.creds) {
- put_cred(req->work.creds);
- req->work.creds = NULL;
+ if (work->creds) {
+ put_cred(work->creds);
+ work->creds = NULL;
}
- if (req->work.fs) {
- struct fs_struct *fs = req->work.fs;
+ if (work->fs) {
+ struct fs_struct *fs = work->fs;
- spin_lock(&req->work.fs->lock);
+ spin_lock(&work->fs->lock);
if (--fs->users)
fs = NULL;
- spin_unlock(&req->work.fs->lock);
+ spin_unlock(&work->fs->lock);
if (fs)
free_fs_struct(fs);
}
+ kfree(work);
}
-static void io_prep_async_work(struct io_kiocb *req)
+static bool io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
+ struct io_wq_work *work;
- io_req_init_async(req);
+ if (!io_req_init_async(req))
+ return false;
+ work = req->work;
+ work->private = req;
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file)
- io_wq_hash_work(&req->work, file_inode(req->file));
+ io_wq_hash_work(work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
- req->work.flags |= IO_WQ_WORK_UNBOUND;
+ work->flags |= IO_WQ_WORK_UNBOUND;
}
- if (!req->work.mm && def->needs_mm) {
+ if (!work->mm && def->needs_mm) {
mmgrab(current->mm);
- req->work.mm = current->mm;
+ work->mm = current->mm;
}
- if (!req->work.creds)
- req->work.creds = get_current_cred();
- if (!req->work.fs && def->needs_fs) {
+ if (!work->creds)
+ work->creds = get_current_cred();
+ if (!work->fs && def->needs_fs) {
spin_lock(¤t->fs->lock);
if (!current->fs->in_exec) {
- req->work.fs = current->fs;
- req->work.fs->users++;
+ work->fs = current->fs;
+ work->fs->users++;
} else {
- req->work.flags |= IO_WQ_WORK_CANCEL;
+ work->flags |= IO_WQ_WORK_CANCEL;
}
spin_unlock(¤t->fs->lock);
}
if (def->needs_fsize)
- req->work.fsize = rlimit(RLIMIT_FSIZE);
+ work->fsize = rlimit(RLIMIT_FSIZE);
else
- req->work.fsize = RLIM_INFINITY;
+ work->fsize = RLIM_INFINITY;
+ return true;
}
-static void io_prep_async_link(struct io_kiocb *req)
+static bool io_prep_async_link(struct io_kiocb *req)
{
struct io_kiocb *cur;
- io_prep_async_work(req);
- if (req->flags & REQ_F_LINK_HEAD)
- list_for_each_entry(cur, &req->link_list, link_list)
- io_prep_async_work(cur);
+ if (!io_prep_async_work(req))
+ return false;
+ if (!(req->flags & REQ_F_LINK_HEAD))
+ return true;
+
+ list_for_each_entry(cur, &req->link_list, link_list)
+ if (!io_prep_async_work(cur))
+ return false;
+ return true;
}
static void __io_queue_async_work(struct io_kiocb *req)
@@ -1194,9 +1197,9 @@ static void __io_queue_async_work(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
- &req->work, req->flags);
- io_wq_enqueue(ctx->io_wq, &req->work);
+ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(req->work), req,
+ req->work, req->flags);
+ io_wq_enqueue(ctx->io_wq, req->work);
if (link)
io_queue_linked_timeout(link);
@@ -1205,7 +1208,12 @@ static void __io_queue_async_work(struct io_kiocb *req)
static void io_queue_async_work(struct io_kiocb *req)
{
/* init ->work of the whole link before punting */
- io_prep_async_link(req);
+ if (!io_prep_async_link(req)) {
+ req_set_fail_links(req);
+ io_put_req(req);
+ io_req_complete(req, -ENOMEM);
+ return;
+ }
__io_queue_async_work(req);
}
@@ -1898,7 +1906,7 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
return NULL;
nxt = io_req_find_next(req);
- return nxt ? &nxt->work : NULL;
+ return nxt ? nxt->work : NULL;
}
/*
@@ -3226,8 +3234,9 @@ static int __io_splice_prep(struct io_kiocb *req,
* Splice operation will be punted aync, and here need to
* modify io_wq_work.flags, so initialize io_wq_work firstly.
*/
- io_req_init_async(req);
- req->work.flags |= IO_WQ_WORK_UNBOUND;
+ if (!io_req_init_async(req))
+ return -ENOMEM;
+ req->work->flags |= IO_WQ_WORK_UNBOUND;
}
return 0;
@@ -3804,8 +3813,9 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
* leave the 'file' in an undeterminate state, and here need to modify
* io_wq_work.flags, so initialize io_wq_work firstly.
*/
- io_req_init_async(req);
- req->work.flags |= IO_WQ_WORK_NO_CANCEL;
+ if (!io_req_init_async(req))
+ return -ENOMEM;
+ req->work->flags |= IO_WQ_WORK_NO_CANCEL;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
@@ -3847,7 +3857,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock,
}
/* No ->flush() or already async, safely close from here */
- ret = filp_close(close->put_file, req->work.files);
+ ret = filp_close(close->put_file, req->work->files);
if (ret < 0)
req_set_fail_links(req);
fput(close->put_file);
@@ -4666,10 +4676,6 @@ static void io_async_task_func(struct callback_head *cb)
io_poll_remove_double(req, apoll->double_poll);
spin_unlock_irq(&ctx->completion_lock);
- /* restore ->work in case we need to retry again */
- if (req->flags & REQ_F_WORK_INITIALIZED)
- memcpy(&req->work, &apoll->work, sizeof(req->work));
-
if (!READ_ONCE(apoll->poll.canceled))
__io_req_task_submit(req);
else
@@ -4761,9 +4767,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
apoll->double_poll = NULL;
req->flags |= REQ_F_POLLED;
- if (req->flags & REQ_F_WORK_INITIALIZED)
- memcpy(&apoll->work, &req->work, sizeof(req->work));
-
io_get_req_task(req);
req->apoll = apoll;
INIT_HLIST_NODE(&req->hash_node);
@@ -4782,8 +4785,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
if (ret) {
io_poll_remove_double(req, apoll->double_poll);
spin_unlock_irq(&ctx->completion_lock);
- if (req->flags & REQ_F_WORK_INITIALIZED)
- memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll->double_poll);
kfree(apoll);
return false;
@@ -4826,14 +4827,6 @@ static bool io_poll_remove_one(struct io_kiocb *req)
do_complete = __io_poll_remove_one(req, &apoll->poll);
if (do_complete) {
io_put_req(req);
- /*
- * restore ->work because we will call
- * io_req_clean_work below when dropping the
- * final reference.
- */
- if (req->flags & REQ_F_WORK_INITIALIZED)
- memcpy(&req->work, &apoll->work,
- sizeof(req->work));
kfree(apoll->double_poll);
kfree(apoll);
}
@@ -5166,7 +5159,7 @@ static int io_timeout(struct io_kiocb *req)
static bool io_cancel_cb(struct io_wq_work *work, void *data)
{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_kiocb *req = work->private;
return req->user_data == (unsigned long) data;
}
@@ -5434,7 +5427,9 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (ret)
return ret;
}
- io_prep_async_link(req);
+ if (!io_prep_async_link(req))
+ return -ENOMEM;
+
de = kmalloc(sizeof(*de), GFP_KERNEL);
if (!de)
return -ENOMEM;
@@ -5757,7 +5752,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_kiocb *req = work->private;
struct io_kiocb *timeout;
int ret = 0;
@@ -5847,9 +5842,10 @@ static int io_grab_files(struct io_kiocb *req)
int ret = -EBADF;
struct io_ring_ctx *ctx = req->ctx;
- io_req_init_async(req);
+ if (!io_req_init_async(req))
+ return -ENOMEM;
- if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
+ if (req->work->files || (req->flags & REQ_F_NO_FILE_TABLE))
return 0;
if (!ctx->ring_file)
return -EBADF;
@@ -5865,7 +5861,7 @@ static int io_grab_files(struct io_kiocb *req)
if (fcheck(ctx->ring_fd) == ctx->ring_file) {
list_add(&req->inflight_entry, &ctx->inflight_list);
req->flags |= REQ_F_INFLIGHT;
- req->work.files = current->files;
+ req->work->files = current->files;
ret = 0;
}
spin_unlock_irq(&ctx->inflight_lock);
@@ -5964,19 +5960,20 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt;
const struct cred *old_creds = NULL;
+ struct io_wq_work *work;
int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
- if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
- req->work.creds != current_cred()) {
+ work = req->work;
+ if (work && work->creds && work->creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
- if (old_creds == req->work.creds)
+ if (old_creds == work->creds)
old_creds = NULL; /* restored original creds */
else
- old_creds = override_creds(req->work.creds);
+ old_creds = override_creds(work->creds);
}
ret = io_issue_sqe(req, sqe, true, cs);
@@ -6050,12 +6047,16 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
goto fail_req;
}
+ if (!io_req_init_async(req)) {
+ ret = -ENOMEM;
+ goto fail_req;
+ }
+
/*
* Never try inline submit of IOSQE_ASYNC is set, go straight
* to async execution.
*/
- io_req_init_async(req);
- req->work.flags |= IO_WQ_WORK_CONCURRENT;
+ req->work->flags |= IO_WQ_WORK_CONCURRENT;
io_queue_async_work(req);
} else {
__io_queue_sqe(req, sqe, cs);
@@ -6231,6 +6232,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->file = NULL;
req->ctx = ctx;
req->flags = 0;
+ req->work = NULL;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
req->task = current;
@@ -6253,11 +6255,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality);
if (id) {
- io_req_init_async(req);
- req->work.creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!req->work.creds))
+ if (!io_req_init_async(req))
+ return -ENOMEM;
+ req->work->creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!req->work->creds))
return -EINVAL;
- get_cred(req->work.creds);
+ get_cred(req->work->creds);
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
@@ -7237,7 +7240,7 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
static void io_free_work(struct io_wq_work *work)
{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_kiocb *req = work->private;
/* Consider that io_steal_work() relies on this ref */
io_put_req(req);
@@ -7853,7 +7856,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
- if (req->work.files != files)
+ if (req->work->files != files)
continue;
/* req is being completed, ignore */
if (!refcount_inc_not_zero(&req->refs))
@@ -7894,7 +7897,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
continue;
}
} else {
- io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
+ io_wq_cancel_work(ctx->io_wq, cancel_req->work);
io_put_req(cancel_req);
}
@@ -7905,7 +7908,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_kiocb *req = work->private;
struct task_struct *task = data;
return req->task == task;
--
2.24.0
next prev parent reply other threads:[~2020-07-25 8:33 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-25 8:31 [RFC 0/2] 3 cacheline io_kiocb Pavel Begunkov
2020-07-25 8:31 ` Pavel Begunkov [this message]
2020-07-25 8:31 ` [PATCH 2/2] io_uring: unionise ->apoll and ->work Pavel Begunkov
2020-07-25 15:45 ` [RFC 0/2] 3 cacheline io_kiocb Jens Axboe
2020-07-25 18:24 ` Pavel Begunkov
2020-07-25 19:40 ` Jens Axboe
2020-07-25 20:14 ` Pavel Begunkov
2020-07-25 20:25 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e42b99fa6acc36ea5880f611182768fee9eb583b.1595664743.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox