From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, Zhang Qiang <[email protected]>
Subject: [PATCH 2/4] io-wq: re-set NUMA node affinities if CPUs come online
Date: Thu, 22 Oct 2020 16:24:45 -0600 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
We correctly set io-wq NUMA node affinities when the io-wq context is
setup, but if an entire node CPU set is offlined and then brought back
online, the per node affinities are broken. Ensure that we set them
again whenever a CPU comes online. This ensures that we always track
the right node affinity. The usual cpuhp notifiers are used to drive it.
Reported-by: Zhang Qiang <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 54 insertions(+), 4 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 4012ff541b7b..d3165ce339c2 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -19,6 +19,7 @@
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>
+#include <linux/cpu.h>
#include "io-wq.h"
@@ -123,9 +124,13 @@ struct io_wq {
refcount_t refs;
struct completion done;
+ struct hlist_node cpuhp_node;
+
refcount_t use_refs;
};
+static enum cpuhp_state io_wq_online;
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -1091,10 +1096,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
- if (!wq->wqes) {
- kfree(wq);
- return ERR_PTR(-ENOMEM);
- }
+ if (!wq->wqes)
+ goto err_wq;
+
+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+ if (ret)
+ goto err_wqes;
wq->free_work = data->free_work;
wq->do_work = data->do_work;
@@ -1102,6 +1109,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
/* caller must already hold a reference to this */
wq->user = data->user;
+ ret = -ENOMEM;
for_each_node(node) {
struct io_wqe *wqe;
int alloc_node = node;
@@ -1145,9 +1153,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager);
complete(&wq->done);
err:
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
+err_wqes:
kfree(wq->wqes);
+err_wq:
kfree(wq);
return ERR_PTR(ret);
}
@@ -1164,6 +1175,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{
int node;
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager)
kthread_stop(wq->manager);
@@ -1191,3 +1204,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{
return wq->manager;
}
+
+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+{
+ struct task_struct *task = worker->task;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
+ task->flags |= PF_NO_SETAFFINITY;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return false;
+}
+
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i)
+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+ rcu_read_unlock();
+ return 0;
+}
+
+static __init int io_wq_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+ io_wq_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
+ io_wq_online = ret;
+ return 0;
+}
+subsys_initcall(io_wq_init);
--
2.29.0
next prev parent reply other threads:[~2020-10-22 22:24 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-22 22:24 Fixes for 5.10 v2 Jens Axboe
2020-10-22 22:24 ` [PATCH 1/4] io_uring: unify fsize with def->work_flags Jens Axboe
2020-10-22 22:24 ` Jens Axboe [this message]
2020-10-22 22:24 ` [PATCH 3/4] io_uring: make loop_rw_iter() use original user supplied pointers Jens Axboe
2020-10-22 22:24 ` [PATCH 4/4] splice: change exported internal do_splice() helper to take kernel offset Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox