public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 6/8] io_uring: split SQPOLL data into separate structure
Date: Wed,  2 Sep 2020 20:20:51 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Move all the necessary state out of io_ring_ctx, and into a new
structure, io_sq_data. The latter now deals with any state or
variables associated with the SQPOLL thread itself.

In preparation for supporting more than one io_ring_ctx per SQPOLL
thread.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 104 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 31 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8ce1b4247120..35ea69aad9c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -229,6 +229,12 @@ struct io_restriction {
 	bool registered;
 };
 
+struct io_sq_data {
+	refcount_t		refs;
+	struct task_struct	*thread;
+	struct wait_queue_head	wait;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -275,13 +281,7 @@ struct io_ring_ctx {
 
 	/* IO offload */
 	struct io_wq		*io_wq;
-	struct task_struct	*sqo_thread;	/* if using sq thread polling */
 	struct mm_struct	*sqo_mm;
-	struct wait_queue_head	*sqo_wait;
-	struct wait_queue_head	__sqo_wait;
-	struct wait_queue_entry	sqo_wait_entry;
-
-
 	/*
 	 * For SQPOLL usage - no reference is held to this file table, we
 	 * rely on fops->flush() and our callback there waiting for the users
@@ -289,6 +289,10 @@ struct io_ring_ctx {
 	 */
 	struct files_struct	*sqo_files;
 
+	struct wait_queue_entry	sqo_wait_entry;
+
+	struct io_sq_data	*sq_data;	/* if using sq thread polling */
+
 	/*
 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
 	 * readers must ensure that ->refs is alive as long as the file* is
@@ -1086,8 +1090,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
-	init_waitqueue_head(&ctx->__sqo_wait);
-	ctx->sqo_wait = &ctx->__sqo_wait;
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	init_completion(&ctx->ref_comp);
@@ -1350,8 +1352,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
-	if (waitqueue_active(ctx->sqo_wait))
-		wake_up(ctx->sqo_wait);
+	if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
+		wake_up(&ctx->sq_data->wait);
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
 }
@@ -2415,8 +2417,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 	else
 		list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
 
-	if ((ctx->flags & IORING_SETUP_SQPOLL) && wq_has_sleeper(ctx->sqo_wait))
-		wake_up(ctx->sqo_wait);
+	if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+	    wq_has_sleeper(&ctx->sq_data->wait))
+		wake_up(&ctx->sq_data->wait);
 }
 
 static void __io_state_file_put(struct io_submit_state *state)
@@ -6635,6 +6638,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
 				  unsigned long start_jiffies)
 {
 	unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
+	struct io_sq_data *sqd = ctx->sq_data;
 	unsigned int to_submit;
 	int ret = 0;
 
@@ -6675,7 +6679,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
 		    !percpu_ref_is_dying(&ctx->refs)))
 			return SQT_SPIN;
 
-		prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry,
+		prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry,
 					TASK_INTERRUPTIBLE);
 
 		/*
@@ -6687,7 +6691,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
 		 */
 		if ((ctx->flags & IORING_SETUP_IOPOLL) &&
 		    !list_empty_careful(&ctx->iopoll_list)) {
-			finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+			finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
 			goto again;
 		}
 
@@ -6697,7 +6701,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
 		if (!to_submit || ret == -EBUSY)
 			return SQT_IDLE;
 
-		finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+		finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
 		io_ring_clear_wakeup_flag(ctx);
 	}
 
@@ -6925,18 +6929,46 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	return 0;
 }
 
-static void io_sq_thread_stop(struct io_ring_ctx *ctx)
+static void io_put_sq_data(struct io_sq_data *sqd)
 {
-	if (ctx->sqo_thread) {
-		wait_for_completion(&ctx->sq_thread_comp);
+	if (refcount_dec_and_test(&sqd->refs)) {
 		/*
 		 * The park is a bit of a work-around, without it we get
 		 * warning spews on shutdown with SQPOLL set and affinity
 		 * set to a single CPU.
 		 */
-		kthread_park(ctx->sqo_thread);
-		kthread_stop(ctx->sqo_thread);
-		ctx->sqo_thread = NULL;
+		if (sqd->thread) {
+			kthread_park(sqd->thread);
+			kthread_stop(sqd->thread);
+		}
+
+		kfree(sqd);
+	}
+}
+
+static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
+{
+	struct io_sq_data *sqd;
+
+	sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
+	if (!sqd)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&sqd->refs, 1);
+	init_waitqueue_head(&sqd->wait);
+	return sqd;
+}
+
+static void io_sq_thread_stop(struct io_ring_ctx *ctx)
+{
+	struct io_sq_data *sqd = ctx->sq_data;
+
+	if (sqd) {
+		if (sqd->thread)
+			wait_for_completion(&ctx->sq_thread_comp);
+
+		io_put_sq_data(sqd);
+		ctx->sq_data = NULL;
 	}
 }
 
@@ -7576,10 +7608,19 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 	int ret;
 
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		struct io_sq_data *sqd;
+
 		ret = -EPERM;
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
 			goto err;
 
+		sqd = io_get_sq_data(p);
+		if (IS_ERR(sqd)) {
+			ret = PTR_ERR(sqd);
+			goto err;
+		}
+		ctx->sq_data = sqd;
+
 		/*
 		 * We will exit the sqthread before current exits, so we can
 		 * avoid taking a reference here and introducing weird
@@ -7600,16 +7641,15 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 			if (!cpu_online(cpu))
 				goto err;
 
-			ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
-							ctx, cpu,
-							"io_uring-sq");
+			sqd->thread = kthread_create_on_cpu(io_sq_thread, ctx,
+							cpu, "io_uring-sq");
 		} else {
-			ctx->sqo_thread = kthread_create(io_sq_thread, ctx,
+			sqd->thread = kthread_create(io_sq_thread, ctx,
 							"io_uring-sq");
 		}
-		if (IS_ERR(ctx->sqo_thread)) {
-			ret = PTR_ERR(ctx->sqo_thread);
-			ctx->sqo_thread = NULL;
+		if (IS_ERR(sqd->thread)) {
+			ret = PTR_ERR(sqd->thread);
+			sqd->thread = NULL;
 			goto err;
 		}
 	} else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -7631,8 +7671,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 
 static void io_sq_offload_start(struct io_ring_ctx *ctx)
 {
-	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread)
-		wake_up_process(ctx->sqo_thread);
+	struct io_sq_data *sqd = ctx->sq_data;
+
+	if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
+		wake_up_process(sqd->thread);
 }
 
 static inline void __io_unaccount_mem(struct user_struct *user,
@@ -8396,7 +8438,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		if (!list_empty_careful(&ctx->cq_overflow_list))
 			io_cqring_overflow_flush(ctx, false);
 		if (flags & IORING_ENTER_SQ_WAKEUP)
-			wake_up(ctx->sqo_wait);
+			wake_up(&ctx->sq_data->wait);
 		submitted = to_submit;
 	} else if (to_submit) {
 		mutex_lock(&ctx->uring_lock);
-- 
2.28.0


  parent reply	other threads:[~2020-09-03  2:21 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-03  2:20 [PATCHSET for-next 0/8] io_uring SQPOLL improvements Jens Axboe
2020-09-03  2:20 ` [PATCH 1/8] io_uring: io_sq_thread() doesn't need to flush signals Jens Axboe
2020-09-03  2:20 ` [PATCH 2/8] io_uring: allow SQPOLL with CAP_SYS_NICE privileges Jens Axboe
2020-09-03  2:20 ` [PATCH 3/8] io_uring: use private ctx wait queue entries for SQPOLL Jens Axboe
2020-09-03  2:20 ` [PATCH 4/8] io_uring: move SQPOLL post-wakeup ring need wakeup flag into wake handler Jens Axboe
2020-09-03  2:20 ` [PATCH 5/8] io_uring: split work handling part of SQPOLL into helper Jens Axboe
2020-09-03  2:20 ` Jens Axboe [this message]
2020-09-03  2:20 ` [PATCH 7/8] io_uring: base SQPOLL handling off io_sq_data Jens Axboe
2020-09-03  2:20 ` [PATCH 8/8] io_uring: enable IORING_SETUP_ATTACH_WQ to attach to SQPOLL thread too Jens Axboe
2020-09-07  8:56   ` Xiaoguang Wang
2020-09-07 14:00     ` Pavel Begunkov
2020-09-07 16:11       ` Jens Axboe
2020-09-07 16:14     ` Jens Axboe
2020-09-07 16:18       ` Jens Axboe
2020-09-08  2:28         ` Xiaoguang Wang
2020-09-08  2:53       ` Xiaoguang Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox