* [PATCH 1/2] io-wq: use private CPU mask
2021-06-17 16:29 [PATCHSET 0/2] Allow io-wq user configurable CPU masks Jens Axboe
@ 2021-06-17 16:29 ` Jens Axboe
2021-06-17 16:29 ` [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-06-17 16:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
In preparation for allowing user specific CPU masks for IO thread
creation, switch to using a mask embedded in the per-node wqe
structure.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 43 insertions(+), 7 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 897b94530b57..2af8e1df4646 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -94,6 +94,8 @@ struct io_wqe {
struct io_wq *wq;
struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
+
+ cpumask_var_t cpu_mask;
};
/*
@@ -638,7 +640,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
tsk->pf_io_worker = worker;
worker->task = tsk;
- set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+ set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
raw_spin_lock_irq(&wqe->lock);
@@ -922,6 +924,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
if (!wqe)
goto err;
+ if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
+ goto err;
+ cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
wq->wqes[node] = wqe;
wqe->node = alloc_node;
wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
@@ -947,8 +952,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
err:
io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- for_each_node(node)
+ for_each_node(node) {
+ if (!wq->wqes[node])
+ continue;
+ free_cpumask_var(wq->wqes[node]->cpu_mask);
kfree(wq->wqes[node]);
+ }
err_wq:
kfree(wq);
return ERR_PTR(ret);
@@ -1018,6 +1027,7 @@ static void io_wq_destroy(struct io_wq *wq)
.cancel_all = true,
};
io_wqe_cancel_pending_work(wqe, &match);
+ free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
io_wq_put_hash(wq->hash);
@@ -1032,31 +1042,57 @@ void io_wq_put_and_exit(struct io_wq *wq)
io_wq_destroy(wq);
}
+struct online_data {
+ unsigned int cpu;
+ bool online;
+};
+
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
{
- set_cpus_allowed_ptr(worker->task, cpumask_of_node(worker->wqe->node));
+ struct online_data *od = data;
+ if (od->online)
+ cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
+ else
+ cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
return false;
}
-static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
{
- struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+ struct online_data od = {
+ .cpu = cpu,
+ .online = online
+ };
int i;
rcu_read_lock();
for_each_node(i)
- io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
rcu_read_unlock();
return 0;
}
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+
+ return __io_wq_cpu_online(wq, cpu, true);
+}
+
+static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+
+ return __io_wq_cpu_online(wq, cpu, false);
+}
+
static __init int io_wq_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
- io_wq_cpu_online, NULL);
+ io_wq_cpu_online, io_wq_cpu_offline);
if (ret < 0)
return ret;
io_wq_online = ret;
--
2.32.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity
2021-06-17 16:29 [PATCHSET 0/2] Allow io-wq user configurable CPU masks Jens Axboe
2021-06-17 16:29 ` [PATCH 1/2] io-wq: use private CPU mask Jens Axboe
@ 2021-06-17 16:29 ` Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-06-17 16:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
io-wq defaults to per-node masks for IO workers. This works fine by
default, but isn't particularly handy for workloads that prefer more
specific affinities, for either performance or isolation reasons.
This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU
mask that is then applied to IO thread workers, and an
IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the
default of per-node.
Note that no care is given to existing IO threads, they will need to go
through a reschedule before the affinity is correct if they are already
running or sleeping.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 17 ++++++++++++
fs/io-wq.h | 2 ++
fs/io_uring.c | 51 +++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 4 +++
4 files changed, 74 insertions(+)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2af8e1df4646..bb4d3ee9592e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
return __io_wq_cpu_online(wq, cpu, false);
}
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
+{
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ if (mask)
+ cpumask_copy(wqe->cpu_mask, mask);
+ else
+ cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
static __init int io_wq_init(void)
{
int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index af2df0680ee2..02299cdcf55c 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
return work->flags & IO_WQ_WORK_HASHED;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d916eb2cef09..46a25a7cb70a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
}
+static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned len)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ cpumask_var_t new_mask;
+ int ret;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(new_mask);
+ if (len > cpumask_size())
+ len = cpumask_size();
+
+ if (copy_from_user(new_mask, arg, len)) {
+ free_cpumask_var(new_mask);
+ return -EFAULT;
+ }
+
+ ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
+ free_cpumask_var(new_mask);
+ return ret;
+}
+
+static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+
+ return io_wq_cpu_affinity(tctx->io_wq, NULL);
+}
+
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
case IORING_REGISTER_FILES_UPDATE2:
case IORING_REGISTER_BUFFERS2:
case IORING_REGISTER_BUFFERS_UPDATE:
+ case IORING_REGISTER_IOWQ_AFF:
+ case IORING_UNREGISTER_IOWQ_AFF:
return false;
default:
return true;
@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_register_rsrc_update(ctx, arg, nr_args,
IORING_RSRC_BUFFER);
break;
+ case IORING_REGISTER_IOWQ_AFF:
+ ret = -EINVAL;
+ if (!arg || !nr_args)
+ break;
+ ret = io_register_iowq_aff(ctx, arg, nr_args);
+ break;
+ case IORING_UNREGISTER_IOWQ_AFF:
+ ret = -EINVAL;
+ if (arg || nr_args)
+ break;
+ ret = io_unregister_iowq_aff(ctx);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 162ff99ed2cb..f1f9ac114b51 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -306,6 +306,10 @@ enum {
IORING_REGISTER_BUFFERS2 = 15,
IORING_REGISTER_BUFFERS_UPDATE = 16,
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
/* this goes last */
IORING_REGISTER_LAST
};
--
2.32.0
^ permalink raw reply related [flat|nested] 3+ messages in thread