public inbox for [email protected]
 help / color / mirror / Atom feed
From: Xiaoguang Wang <[email protected]>
To: [email protected]
Cc: [email protected], [email protected], [email protected]
Subject: Re: [RFC PATCH for-next v2] io_uring: support multiple rings to share same poll thread by specifying same cpu
Date: Sun, 27 Sep 2020 10:25:45 +0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

hi,

> We have already supported multiple rings to share one same poll thread
> by passing IORING_SETUP_ATTACH_WQ, but it's not that convenient to use.
> IORING_SETUP_ATTACH_WQ needs users to ensure that a parent ring instance
> has already existed, that means it will require app to regulate the
> creation oder between uring instances.
> 
> Currently we can make this a bit simpler, for those rings which will
> have SQPOLL enabled and are willing to be bound to one same cpu, add a
> capability that these rings can share one poll thread by specifying
> a new IORING_SETUP_SQPOLL_PERCPU flag, then we have 3 cases
>    1, IORING_SETUP_ATTACH_WQ: if user specifies this flag, we'll always
> try to attach this ring to an existing ring's corresponding poll thread,
> no matter whether IORING_SETUP_SQ_AFF or IORING_SETUP_SQPOLL_PERCPU is
> set.
>    2, IORING_SETUP_SQ_AFF and IORING_SETUP_SQPOLL_PERCPU are both enabled,
> for this case, we'll create a single poll thread to be shared by these
> rings, and this poll thread is bound to a fixed cpu.
>    3, for any other cases, we'll just create one new poll thread for the
> corresponding ring.
> 
> And for case 2, don't need to regulate creation oder of multiple uring
> instances, we use a mutex to synchronize creation, for example, say five
> rings which all have IORING_SETUP_SQ_AFF & IORING_SETUP_SQPOLL_PERCPU
> enabled, and are willing to be bound same cpu, one ring that gets the
> mutex lock will create one poll thread, the other four rings will just
> attach themselves the previous created poll thread.
> 
> To implement above function, add one global hlist_head hash table, only
> sqd that is created for IORING_SETUP_SQ_AFF & IORING_SETUP_SQPOLL_PERCPU
> will be added to this global list, and its search key are current->files
> and cpu number.
This is a gentle ping.
I just want to know whether you like this method, then I can do rebase work
accordingly and resend it.

Regards,
Xiaoguang Wang

> 
> Signed-off-by: Xiaoguang Wang <[email protected]>
> 
> ---
> V2:
>    1, rebase to for-5.10/io_uring branch.
> ---
>   fs/io_uring.c                 | 129 +++++++++++++++++++++++++++++-----
>   include/uapi/linux/io_uring.h |   1 +
>   2 files changed, 111 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5ddb2cddc695..ae83d887c24d 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -233,6 +233,10 @@ struct io_sq_data {
>   	refcount_t		refs;
>   	struct mutex		lock;
>   
> +	struct hlist_node	hash;
> +	struct files_struct	*files;
> +	int			sq_thread_cpu;
> +
>   	/* ctx's that are using this sqd */
>   	struct list_head	ctx_list;
>   	struct list_head	ctx_new_list;
> @@ -242,6 +246,12 @@ struct io_sq_data {
>   	struct wait_queue_head	wait;
>   };
>   
> +static DEFINE_MUTEX(sqd_lock);
> +
> +#define	IORING_SQD_HASHTABLE_SIZE	256
> +#define	IORING_SQD_HASHTABLE_BITS	8
> +static struct hlist_head *sqd_hashtable;
> +
>   struct io_ring_ctx {
>   	struct {
>   		struct percpu_ref	refs;
> @@ -7045,6 +7055,11 @@ static void io_put_sq_data(struct io_sq_data *sqd)
>   			kthread_stop(sqd->thread);
>   		}
>   
> +		if (!hlist_unhashed(&sqd->hash)) {
> +			mutex_lock(&sqd_lock);
> +			hlist_del(&sqd->hash);
> +			mutex_unlock(&sqd_lock);
> +		}
>   		kfree(sqd);
>   	}
>   }
> @@ -7075,13 +7090,10 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
>   	return sqd;
>   }
>   
> -static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
> +static struct io_sq_data *io_alloc_sq_data(void)
>   {
>   	struct io_sq_data *sqd;
>   
> -	if (p->flags & IORING_SETUP_ATTACH_WQ)
> -		return io_attach_sq_data(p);
> -
>   	sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
>   	if (!sqd)
>   		return ERR_PTR(-ENOMEM);
> @@ -7089,6 +7101,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
>   	refcount_set(&sqd->refs, 1);
>   	INIT_LIST_HEAD(&sqd->ctx_list);
>   	INIT_LIST_HEAD(&sqd->ctx_new_list);
> +	INIT_HLIST_NODE(&sqd->hash);
>   	mutex_init(&sqd->ctx_lock);
>   	mutex_init(&sqd->lock);
>   	init_waitqueue_head(&sqd->wait);
> @@ -7114,6 +7127,64 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
>   	kthread_park(sqd->thread);
>   }
>   
> +static inline void io_attach_ctx_to_sqd(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
> +{
> +	io_sq_thread_park(sqd);
> +	ctx->sq_data = sqd;
> +	mutex_lock(&sqd->ctx_lock);
> +	list_add(&ctx->sqd_list, &sqd->ctx_new_list);
> +	mutex_unlock(&sqd->ctx_lock);
> +	io_sq_thread_unpark(sqd);
> +}
> +
> +/*
> + * Only if IORING_SETUP_ATTACH_WQ is not specified and IORING_SETUP_SQ_AFF &
> + * IORING_SETUP_SQPOLL_PERCPU are both enabled, can this function be called.
> + *
> + * This function finds the corresponding sqd in the global hash list in the
> + * key of current->files and cpu number, if not find one, create a new sqd
> + * and insert to the global hash list.
> + */
> +static struct io_sq_data *io_find_or_create_sq_data(struct io_ring_ctx *ctx,
> +				struct io_uring_params *p)
> +{
> +	struct io_sq_data *sqd;
> +	struct hlist_head *head;
> +	int cpu = p->sq_thread_cpu;
> +	struct task_struct *tsk;
> +	struct files_struct *files = current->files;
> +
> +	mutex_lock(&sqd_lock);
> +	head = sqd_hashtable + hash_ptr(files, IORING_SQD_HASHTABLE_BITS);
> +	hlist_for_each_entry(sqd, head, hash) {
> +		if ((sqd->files == files) && (sqd->sq_thread_cpu == cpu)) {
> +			refcount_inc(&sqd->refs);
> +			mutex_unlock(&sqd_lock);
> +			return sqd;
> +		}
> +	}
> +
> +	sqd = io_alloc_sq_data();
> +	if (IS_ERR(sqd))
> +		goto out;
> +
> +	tsk = kthread_create_on_cpu(io_sq_thread, sqd, cpu, "io_uring-sq");
> +	if (IS_ERR(tsk)) {
> +		kfree(sqd);
> +		sqd = ERR_PTR(PTR_ERR(tsk));
> +		goto out;
> +	}
> +
> +	sqd->thread = tsk;
> +	sqd->files = files;
> +	sqd->sq_thread_cpu = cpu;
> +	hlist_add_head(&sqd->hash, head);
> +
> +out:
> +	mutex_unlock(&sqd_lock);
> +	return sqd;
> +}
> +
>   static void io_sq_thread_stop(struct io_ring_ctx *ctx)
>   {
>   	struct io_sq_data *sqd = ctx->sq_data;
> @@ -7790,19 +7861,18 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>   		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
>   			goto err;
>   
> -		sqd = io_get_sq_data(p);
> -		if (IS_ERR(sqd)) {
> -			ret = PTR_ERR(sqd);
> -			goto err;
> +		if (p->flags & IORING_SETUP_ATTACH_WQ) {
> +			sqd = io_attach_sq_data(p);
> +			if (IS_ERR(sqd)) {
> +				ret = PTR_ERR(sqd);
> +				goto err;
> +			}
> +			io_attach_ctx_to_sqd(sqd, ctx);
> +			WARN_ON(!sqd->thread);
> +			if (sqd->thread)
> +				goto done;
>   		}
>   
> -		io_sq_thread_park(sqd);
> -		ctx->sq_data = sqd;
> -		mutex_lock(&sqd->ctx_lock);
> -		list_add(&ctx->sqd_list, &sqd->ctx_new_list);
> -		mutex_unlock(&sqd->ctx_lock);
> -		io_sq_thread_unpark(sqd);
> -
>   		/*
>   		 * We will exit the sqthread before current exits, so we can
>   		 * avoid taking a reference here and introducing weird
> @@ -7814,8 +7884,14 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>   		if (!ctx->sq_thread_idle)
>   			ctx->sq_thread_idle = HZ;
>   
> -		if (sqd->thread)
> -			goto done;
> +		if (!(p->flags & IORING_SETUP_SQ_AFF) ||
> +		    !(p->flags & IORING_SETUP_SQPOLL_PERCPU)) {
> +			sqd = io_alloc_sq_data();
> +			if (IS_ERR(sqd)) {
> +				ret = PTR_ERR(sqd);
> +				goto err;
> +			}
> +		}
>   
>   		if (p->flags & IORING_SETUP_SQ_AFF) {
>   			int cpu = p->sq_thread_cpu;
> @@ -7826,7 +7902,14 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>   			if (!cpu_online(cpu))
>   				goto err;
>   
> -			sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
> +			if (p->flags & IORING_SETUP_SQPOLL_PERCPU) {
> +				sqd = io_find_or_create_sq_data(ctx, p);
> +				if (IS_ERR(sqd)) {
> +					ret = PTR_ERR(sqd);
> +					goto err;
> +				}
> +			} else
> +				sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
>   							cpu, "io_uring-sq");
>   		} else {
>   			sqd->thread = kthread_create(io_sq_thread, sqd,
> @@ -7837,6 +7920,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>   			sqd->thread = NULL;
>   			goto err;
>   		}
> +		io_attach_ctx_to_sqd(sqd, ctx);
>   	} else if (p->flags & IORING_SETUP_SQ_AFF) {
>   		/* Can't have SQ_AFF without SQPOLL */
>   		ret = -EINVAL;
> @@ -9119,7 +9203,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
>   	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
>   			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
>   			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
> -			IORING_SETUP_R_DISABLED))
> +			IORING_SETUP_R_DISABLED | IORING_SETUP_SQPOLL_PERCPU))
>   		return -EINVAL;
>   
>   	return  io_uring_create(entries, &p, params);
> @@ -9454,6 +9538,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
>   
>   static int __init io_uring_init(void)
>   {
> +	int i;
> +
>   #define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
>   	BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
>   	BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
> @@ -9494,6 +9580,11 @@ static int __init io_uring_init(void)
>   	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
>   	BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
>   	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
> +
> +	sqd_hashtable = kvmalloc_array(IORING_SQD_HASHTABLE_SIZE,
> +				sizeof(sqd_hashtable[0]), GFP_KERNEL);
> +	for (i = 0; i < IORING_SQD_HASHTABLE_SIZE; i++)
> +		INIT_HLIST_HEAD(&sqd_hashtable[i]);
>   	return 0;
>   };
>   __initcall(io_uring_init);
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 2301c37e86cb..4147e5a5f752 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -96,6 +96,7 @@ enum {
>   #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
>   #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
>   #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
> +#define IORING_SETUP_SQPOLL_PERCPU	(1U << 7)	/* use percpu SQ poll thread */
>   
>   enum {
>   	IORING_OP_NOP,
> 

      reply	other threads:[~2020-09-27  2:26 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-13 13:05 [RFC PATCH for-next v2] io_uring: support multiple rings to share same poll thread by specifying same cpu Xiaoguang Wang
2020-09-27  2:25 ` Xiaoguang Wang [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=612daaf4-a4a0-7c18-d919-bbc227ec7a79@linux.alibaba.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox