public inbox for [email protected]
 help / color / mirror / Atom feed
From: Xiaoguang Wang <[email protected]>
To: Pavel Begunkov <[email protected]>, [email protected]
Cc: [email protected], [email protected]
Subject: Re: [PATCH v3 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline
Date: Sat, 30 May 2020 21:36:03 +0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

hi,

> On 28/05/2020 12:15, Xiaoguang Wang wrote:
>> If requests can be submitted and completed inline, we don't need to
>> initialize whole io_wq_work in io_init_req(), which is an expensive
>> operation, add a new 'REQ_F_WORK_INITIALIZED' to control whether
>> io_wq_work is initialized.
> 
> It looks nicer. Especially if you'd add a helper as Jens supposed.
Sure, I'll add a helper in V4, thanks.

> 
> The other thing, even though I hate treating a part of the fields differently
> from others, I don't like ->creds tossing either.
> 
> Did you consider trying using only ->work.creds without adding req->creds? like
> in the untested incremental below. init_io_work() there is misleading, should be
> somehow played around better.
But if not adding a new req->creds, I think there will be some potential risks.
In current io_uring mainline codes, look at io_kiocb's memory layout
crash> struct -o io_kiocb
struct io_kiocb {
         union {
     [0]     struct file *file;
     [0]     struct io_rw rw;
     [0]     struct io_poll_iocb poll;
     [0]     struct io_accept accept;
     [0]     struct io_sync sync;
     [0]     struct io_cancel cancel;
     [0]     struct io_timeout timeout;
     [0]     struct io_connect connect;
     [0]     struct io_sr_msg sr_msg;
     [0]     struct io_open open;
     [0]     struct io_close close;
     [0]     struct io_files_update files_update;
     [0]     struct io_fadvise fadvise;
     [0]     struct io_madvise madvise;
     [0]     struct io_epoll epoll;
     [0]     struct io_splice splice;
     [0]     struct io_provide_buf pbuf;
         };
    [64] struct io_async_ctx *io;
    [72] int cflags;
    [76] u8 opcode;
    [78] u16 buf_index;
    [80] struct io_ring_ctx *ctx;
    [88] struct list_head list;
   [104] unsigned int flags;
   [108] refcount_t refs;
   [112] struct task_struct *task;
   [120] unsigned long fsize;
   [128] u64 user_data;
   [136] u32 result;
   [140] u32 sequence;
   [144] struct list_head link_list;
   [160] struct list_head inflight_entry;
   [176] struct percpu_ref *fixed_file_refs;
         union {
             struct {
   [184]         struct callback_head task_work;
   [200]         struct hlist_node hash_node;
   [216]         struct async_poll *apoll;
             };
   [184]     struct io_wq_work work;
         };
}
SIZE: 240

struct io_wq_work {
    [0] struct io_wq_work_node list;
    [8] void (*func)(struct io_wq_work **);
   [16] struct files_struct *files;
   [24] struct mm_struct *mm;
   [32] const struct cred *creds;
   [40] struct fs_struct *fs;
   [48] unsigned int flags;
   [52] pid_t task_pid;
}
SIZE: 56

The risk mainly comes from the union:
union {
	/*
	 * Only commands that never go async can use the below fields,
	 * obviously. Right now only IORING_OP_POLL_ADD uses them, and
	 * async armed poll handlers for regular commands. The latter
	 * restore the work, if needed.
	 */
	struct {
		struct callback_head	task_work;
		struct hlist_node	hash_node;
		struct async_poll	*apoll;
	};
	struct io_wq_work	work;
};

1, apoll and creds are in same memory offset, for 'async armed poll handlers' case,
apoll will be used, that means creds will be overwrited. In patch "io_uring: avoid
unnecessary io_wq_work copy for fast poll feature", I use REQ_F_WORK_INITIALIZED
to control whether to do io_wq_work restore, then your below codes will break:

static inline void io_req_work_drop_env(struct io_kiocb *req)
{
	/* always init'ed, put before REQ_F_WORK_INITIALIZED check */
	if (req->work.creds) {
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here req->work.creds will be invalid, or I still need to use some space
to record original req->work.creds, and do creds restore.

		put_cred(req->work.creds);
		req->work.creds = NULL;
	}
	if (!(req->flags & REQ_F_WORK_INITIALIZED))
  		return;

2, For IORING_OP_POLL_ADD case, current mainline codes will use task_work and hash_node,
32 bytes, that means io_wq_work's member list, func, files and mm would be overwrited,
but will not touch creds, it's safe now. But if we will add some new member to
struct {
	struct callback_head	task_work;
	struct hlist_node	hash_node;
	struct async_poll	*apoll;
};
say callback_head adds a new member, our check will still break.

3. IMO, io_wq_work is just to describe needed running environment for reqs that will be
punted to io-wq, for reqs submitted and completed inline should not touch this struct
from software design view, and current io_kiocb is 240 bytes, and a new pointer will be
248 bytes, still 4 cache lines for cache line 64 bytes.


Regards,
Xiaoguang Wang

> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 4dd3295d74f6..4086561ce444 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -643,7 +643,6 @@ struct io_kiocb {
>   	unsigned int		flags;
>   	refcount_t		refs;
>   	struct task_struct	*task;
> -	const struct cred	*creds;
>   	unsigned long		fsize;
>   	u64			user_data;
>   	u32			result;
> @@ -894,8 +893,16 @@ static const struct file_operations io_uring_fops;
>   static inline void init_io_work(struct io_kiocb *req,
>   			void (*func)(struct io_wq_work **))
>   {
> -	req->work = (struct io_wq_work){ .func = func };
> -	req->flags |= REQ_F_WORK_INITIALIZED;
> +	struct io_wq_work *work = &req->work;
> +
> +	/* work->creds are already initialised by a user */
> +	work->list.next = NULL;
> +	work->func = func;
> +	work->files = NULL;
> +	work->mm = NULL;
> +	work->fs = NULL;
> +	work->flags = REQ_F_WORK_INITIALIZED;
> +	work->task_pid = 0;
>   }
>   struct sock *io_uring_get_socket(struct file *file)
>   {
> @@ -1019,15 +1026,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
>   		mmgrab(current->mm);
>   		req->work.mm = current->mm;
>   	}
> +	if (!req->work.creds)
> +		req->work.creds = get_current_cred();
> 
> -	if (!req->work.creds) {
> -		if (!req->creds)
> -			req->work.creds = get_current_cred();
> -		else {
> -			req->work.creds = req->creds;
> -			req->creds = NULL;
> -		}
> -	}
>   	if (!req->work.fs && def->needs_fs) {
>   		spin_lock(&current->fs->lock);
>   		if (!current->fs->in_exec) {
> @@ -1044,6 +1045,12 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
> 
>   static inline void io_req_work_drop_env(struct io_kiocb *req)
>   {
> +	/* always init'ed, put before REQ_F_WORK_INITIALIZED check */
> +	if (req->work.creds) {
> +		put_cred(req->work.creds);
> +		req->work.creds = NULL;
> +	}
> +
>   	if (!(req->flags & REQ_F_WORK_INITIALIZED))
>   		return;
> 
> @@ -1051,10 +1058,6 @@ static inline void io_req_work_drop_env(struct io_kiocb *req)
>   		mmdrop(req->work.mm);
>   		req->work.mm = NULL;
>   	}
> -	if (req->work.creds) {
> -		put_cred(req->work.creds);
> -		req->work.creds = NULL;
> -	}
>   	if (req->work.fs) {
>   		struct fs_struct *fs = req->work.fs;
> 
> @@ -5901,12 +5904,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct
> io_kiocb *req,
> 
>   	id = READ_ONCE(sqe->personality);
>   	if (id) {
> -		req->creds = idr_find(&ctx->personality_idr, id);
> -		if (unlikely(!req->creds))
> +		req->work.creds = idr_find(&ctx->personality_idr, id);
> +		if (unlikely(!req->work.creds))
>   			return -EINVAL;
> -		get_cred(req->creds);
> +		get_cred(req->work.creds);
>   	} else
> -		req->creds = NULL;
> +		req->work.creds = NULL;
> 
>   	/* same numerical values with corresponding REQ_F_*, safe to copy */
>   	req->flags |= sqe_flags;
> 

  parent reply	other threads:[~2020-05-30 13:36 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-28  9:15 [PATCH v3 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Xiaoguang Wang
2020-05-28  9:15 ` [PATCH v3 2/2] io_uring: avoid unnecessary io_wq_work copy for fast poll feature Xiaoguang Wang
2020-05-28 17:03 ` [PATCH v3 1/2] io_uring: avoid whole io_wq_work copy for requests completed inline Jens Axboe
2020-05-29  3:58   ` Xiaoguang Wang
2020-05-29  8:58 ` Pavel Begunkov
2020-05-29 14:27   ` Jens Axboe
2020-05-30 13:36   ` Xiaoguang Wang [this message]
2020-05-30 13:43     ` Jens Axboe
2020-05-30 19:38     ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b472d985-0e34-c53a-e976-3a174211d12b@linux.alibaba.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox