public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCHSET 0/2] Allow io-wq user configurable CPU masks
@ 2021-06-17 16:29 Jens Axboe
  2021-06-17 16:29 ` [PATCH 1/2] io-wq: use private CPU mask Jens Axboe
  2021-06-17 16:29 ` [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Jens Axboe @ 2021-06-17 16:29 UTC (permalink / raw)
  To: io-uring

Hi,

This adds IORING_REGISTER_IOWQ_AFF that allows an application to
specificy CPU affinities for any IO threads that may get created to
service requests. Explanation in patch 2, patch 1 is just a prep patch
that makes this easier to accomplish.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] io-wq: use private CPU mask
  2021-06-17 16:29 [PATCHSET 0/2] Allow io-wq user configurable CPU masks Jens Axboe
@ 2021-06-17 16:29 ` Jens Axboe
  2021-06-17 16:29 ` [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-06-17 16:29 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

In preparation for allowing user specific CPU masks for IO thread
creation, switch to using a mask embedded in the per-node wqe
structure.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 897b94530b57..2af8e1df4646 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -94,6 +94,8 @@ struct io_wqe {
 
 	struct io_wq *wq;
 	struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
+
+	cpumask_var_t cpu_mask;
 };
 
 /*
@@ -638,7 +640,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 
 	tsk->pf_io_worker = worker;
 	worker->task = tsk;
-	set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+	set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
 	tsk->flags |= PF_NO_SETAFFINITY;
 
 	raw_spin_lock_irq(&wqe->lock);
@@ -922,6 +924,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 		wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
 		if (!wqe)
 			goto err;
+		if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
+			goto err;
+		cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
 		wq->wqes[node] = wqe;
 		wqe->node = alloc_node;
 		wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
@@ -947,8 +952,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 err:
 	io_wq_put_hash(data->hash);
 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
-	for_each_node(node)
+	for_each_node(node) {
+		if (!wq->wqes[node])
+			continue;
+		free_cpumask_var(wq->wqes[node]->cpu_mask);
 		kfree(wq->wqes[node]);
+	}
 err_wq:
 	kfree(wq);
 	return ERR_PTR(ret);
@@ -1018,6 +1027,7 @@ static void io_wq_destroy(struct io_wq *wq)
 			.cancel_all	= true,
 		};
 		io_wqe_cancel_pending_work(wqe, &match);
+		free_cpumask_var(wqe->cpu_mask);
 		kfree(wqe);
 	}
 	io_wq_put_hash(wq->hash);
@@ -1032,31 +1042,57 @@ void io_wq_put_and_exit(struct io_wq *wq)
 	io_wq_destroy(wq);
 }
 
+struct online_data {
+	unsigned int cpu;
+	bool online;
+};
+
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
 {
-	set_cpus_allowed_ptr(worker->task, cpumask_of_node(worker->wqe->node));
+	struct online_data *od = data;
 
+	if (od->online)
+		cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
+	else
+		cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
 	return false;
 }
 
-static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
 {
-	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+	struct online_data od = {
+		.cpu = cpu,
+		.online = online
+	};
 	int i;
 
 	rcu_read_lock();
 	for_each_node(i)
-		io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+		io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
 	rcu_read_unlock();
 	return 0;
 }
 
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+
+	return __io_wq_cpu_online(wq, cpu, true);
+}
+
+static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+
+	return __io_wq_cpu_online(wq, cpu, false);
+}
+
 static __init int io_wq_init(void)
 {
 	int ret;
 
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
-					io_wq_cpu_online, NULL);
+					io_wq_cpu_online, io_wq_cpu_offline);
 	if (ret < 0)
 		return ret;
 	io_wq_online = ret;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity
  2021-06-17 16:29 [PATCHSET 0/2] Allow io-wq user configurable CPU masks Jens Axboe
  2021-06-17 16:29 ` [PATCH 1/2] io-wq: use private CPU mask Jens Axboe
@ 2021-06-17 16:29 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-06-17 16:29 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

io-wq defaults to per-node masks for IO workers. This works fine by
default, but isn't particularly handy for workloads that prefer more
specific affinities, for either performance or isolation reasons.

This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU
mask that is then applied to IO thread workers, and an
IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the
default of per-node.

Note that no care is given to existing IO threads, they will need to go
through a reschedule before the affinity is correct if they are already
running or sleeping.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c                    | 17 ++++++++++++
 fs/io-wq.h                    |  2 ++
 fs/io_uring.c                 | 51 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  4 +++
 4 files changed, 74 insertions(+)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2af8e1df4646..bb4d3ee9592e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
 	return __io_wq_cpu_online(wq, cpu, false);
 }
 
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
+{
+	int i;
+
+	rcu_read_lock();
+	for_each_node(i) {
+		struct io_wqe *wqe = wq->wqes[i];
+
+		if (mask)
+			cpumask_copy(wqe->cpu_mask, mask);
+		else
+			cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 static __init int io_wq_init(void)
 {
 	int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index af2df0680ee2..02299cdcf55c 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
 
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
 	return work->flags & IO_WQ_WORK_HASHED;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d916eb2cef09..46a25a7cb70a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 	return -EINVAL;
 }
 
+static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
+				unsigned len)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	cpumask_var_t new_mask;
+	int ret;
+
+	if (!tctx || !tctx->io_wq)
+		return -EINVAL;
+
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_clear(new_mask);
+	if (len > cpumask_size())
+		len = cpumask_size();
+
+	if (copy_from_user(new_mask, arg, len)) {
+		free_cpumask_var(new_mask);
+		return -EFAULT;
+	}
+
+	ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
+	free_cpumask_var(new_mask);
+	return ret;
+}
+
+static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
+{
+	struct io_uring_task *tctx = current->io_uring;
+
+	if (!tctx || !tctx->io_wq)
+		return -EINVAL;
+
+	return io_wq_cpu_affinity(tctx->io_wq, NULL);
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_FILES_UPDATE2:
 	case IORING_REGISTER_BUFFERS2:
 	case IORING_REGISTER_BUFFERS_UPDATE:
+	case IORING_REGISTER_IOWQ_AFF:
+	case IORING_UNREGISTER_IOWQ_AFF:
 		return false;
 	default:
 		return true;
@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_register_rsrc_update(ctx, arg, nr_args,
 					      IORING_RSRC_BUFFER);
 		break;
+	case IORING_REGISTER_IOWQ_AFF:
+		ret = -EINVAL;
+		if (!arg || !nr_args)
+			break;
+		ret = io_register_iowq_aff(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_IOWQ_AFF:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_unregister_iowq_aff(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 162ff99ed2cb..f1f9ac114b51 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -306,6 +306,10 @@ enum {
 	IORING_REGISTER_BUFFERS2		= 15,
 	IORING_REGISTER_BUFFERS_UPDATE		= 16,
 
+	/* set/clear io-wq thread affinities */
+	IORING_REGISTER_IOWQ_AFF		= 17,
+	IORING_UNREGISTER_IOWQ_AFF		= 18,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-17 16:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-06-17 16:29 [PATCHSET 0/2] Allow io-wq user configurable CPU masks Jens Axboe
2021-06-17 16:29 ` [PATCH 1/2] io-wq: use private CPU mask Jens Axboe
2021-06-17 16:29 ` [PATCH 2/2] io_uring: allow user configurable IO thread CPU affinity Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox