public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: Liu Yong <[email protected]>
Cc: "Pavel Begunkov" <[email protected]>,
	"、 [email protected]" <[email protected]>,
	、io-uring <[email protected]>
Subject: Re: [PATCH] fs/io_uring.c: fix null ptr deference in io_send_recvmsg()
Date: Tue, 4 Aug 2020 22:10:55 -0600	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <20200805034044.GB24925@ubuntu>

On 8/4/20 9:40 PM, Liu Yong wrote:
> On Tue, Aug 04, 2020 at 03:55:16PM -0600, Jens Axboe wrote:
>> On 8/4/20 11:15 AM, Jens Axboe wrote:
>>> On 8/4/20 11:02 AM, xiao lin wrote:
>>>> 在 2020年8月4日星期二,Jens Axboe <[email protected] <mailto:[email protected]>> 写道:
>>>>
>>>>     On 8/4/20 7:18 AM, Pavel Begunkov wrote:
>>>>     > On 04/08/2020 15:56, Liu Yong wrote:
>>>>     >> In io_send_recvmsg(), there is no check for the req->file.
>>>>     >> User can change the opcode from IORING_OP_NOP to IORING_OP_SENDMSG
>>>>     >> through competition after the io_req_set_file().
>>>>     >
>>>>     > After sqe->opcode is read and copied in io_init_req(), it only uses
>>>>     > in-kernel req->opcode. Also, io_init_req() should check for req->file
>>>>     > NULL, so shouldn't happen after.
>>>>     >
>>>>     > Do you have a reproducer? What kernel version did you use?
>>>>
>>>>     Was looking at this too, and I'm guessing this is some 5.4 based kernel.
>>>>     Unfortunately the oops doesn't include that information.
>>>
>>>> Sorry, I forgot to mention that the kernel version I am using is 5.4.55.
>>>
>>> I think there are two options here:
>>>
>>> 1) Backport the series that ensured we only read those important bits once
>>> 2) Make s->sqe a full sqe, and memcpy it in
>>
>> Something like this should close the ->opcode re-read for 5.4-stable.
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index e0200406765c..8bb5e19b7c3c 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -279,6 +279,7 @@ struct sqe_submit {
>>  	bool				has_user;
>>  	bool				needs_lock;
>>  	bool				needs_fixed_file;
>> +	u8				opcode;
>>  };
>>  
>>  /*
>> @@ -505,7 +506,7 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
>>  	int rw = 0;
>>  
>>  	if (req->submit.sqe) {
>> -		switch (req->submit.sqe->opcode) {
>> +		switch (req->submit.opcode) {
>>  		case IORING_OP_WRITEV:
>>  		case IORING_OP_WRITE_FIXED:
>>  			rw = !(req->rw.ki_flags & IOCB_DIRECT);
>> @@ -1254,23 +1255,15 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
>>  }
>>  
>>  static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
>> -			       const struct sqe_submit *s, struct iovec **iovec,
>> +			       struct io_kiocb *req, struct iovec **iovec,
>>  			       struct iov_iter *iter)
>>  {
>> -	const struct io_uring_sqe *sqe = s->sqe;
>> +	const struct io_uring_sqe *sqe = req->submit.sqe;
>>  	void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
>>  	size_t sqe_len = READ_ONCE(sqe->len);
>>  	u8 opcode;
>>  
>> -	/*
>> -	 * We're reading ->opcode for the second time, but the first read
>> -	 * doesn't care whether it's _FIXED or not, so it doesn't matter
>> -	 * whether ->opcode changes concurrently. The first read does care
>> -	 * about whether it is a READ or a WRITE, so we don't trust this read
>> -	 * for that purpose and instead let the caller pass in the read/write
>> -	 * flag.
>> -	 */
>> -	opcode = READ_ONCE(sqe->opcode);
>> +	opcode = req->submit.opcode;
>>  	if (opcode == IORING_OP_READ_FIXED ||
>>  	    opcode == IORING_OP_WRITE_FIXED) {
>>  		ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
>> @@ -1278,7 +1271,7 @@ static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
>>  		return ret;
>>  	}
>>  
>> -	if (!s->has_user)
>> +	if (!req->submit.has_user)
>>  		return -EFAULT;
>>  
>>  #ifdef CONFIG_COMPAT
>> @@ -1425,7 +1418,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
>>  	if (unlikely(!(file->f_mode & FMODE_READ)))
>>  		return -EBADF;
>>  
>> -	ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
>> +	ret = io_import_iovec(req->ctx, READ, req, &iovec, &iter);
>>  	if (ret < 0)
>>  		return ret;
>>  
>> @@ -1490,7 +1483,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
>>  	if (unlikely(!(file->f_mode & FMODE_WRITE)))
>>  		return -EBADF;
>>  
>> -	ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
>> +	ret = io_import_iovec(req->ctx, WRITE, req, &iovec, &iter);
>>  	if (ret < 0)
>>  		return ret;
>>  
>> @@ -2109,15 +2102,14 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>  static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>  			   const struct sqe_submit *s, bool force_nonblock)
>>  {
>> -	int ret, opcode;
>> +	int ret;
>>  
>>  	req->user_data = READ_ONCE(s->sqe->user_data);
>>  
>>  	if (unlikely(s->index >= ctx->sq_entries))
>>  		return -EINVAL;
>>  
>> -	opcode = READ_ONCE(s->sqe->opcode);
>> -	switch (opcode) {
>> +	switch (req->submit.opcode) {
>>  	case IORING_OP_NOP:
>>  		ret = io_nop(req, req->user_data);
>>  		break;
>> @@ -2181,10 +2173,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>  	return 0;
>>  }
>>  
>> -static struct async_list *io_async_list_from_sqe(struct io_ring_ctx *ctx,
>> -						 const struct io_uring_sqe *sqe)
>> +static struct async_list *io_async_list_from_req(struct io_ring_ctx *ctx,
>> +						 struct io_kiocb *req)
>>  {
>> -	switch (sqe->opcode) {
>> +	switch (req->submit.opcode) {
>>  	case IORING_OP_READV:
>>  	case IORING_OP_READ_FIXED:
>>  		return &ctx->pending_async[READ];
>> @@ -2196,12 +2188,10 @@ static struct async_list *io_async_list_from_sqe(struct io_ring_ctx *ctx,
>>  	}
>>  }
>>  
>> -static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
>> +static inline bool io_req_needs_user(struct io_kiocb *req)
>>  {
>> -	u8 opcode = READ_ONCE(sqe->opcode);
>> -
>> -	return !(opcode == IORING_OP_READ_FIXED ||
>> -		 opcode == IORING_OP_WRITE_FIXED);
>> +	return !(req->submit.opcode == IORING_OP_READ_FIXED ||
>> +		req->submit.opcode == IORING_OP_WRITE_FIXED);
>>  }
>>  
>>  static void io_sq_wq_submit_work(struct work_struct *work)
>> @@ -2217,7 +2207,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
>>  	int ret;
>>  
>>  	old_cred = override_creds(ctx->creds);
>> -	async_list = io_async_list_from_sqe(ctx, req->submit.sqe);
>> +	async_list = io_async_list_from_req(ctx, req);
>>  
>>  	allow_kernel_signal(SIGINT);
>>  restart:
>> @@ -2239,7 +2229,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
>>  		}
>>  
>>  		ret = 0;
>> -		if (io_sqe_needs_user(sqe) && !cur_mm) {
>> +		if (io_req_needs_user(req) && !cur_mm) {
>>  			if (!mmget_not_zero(ctx->sqo_mm)) {
>>  				ret = -EFAULT;
>>  			} else {
>> @@ -2387,11 +2377,9 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
>>  	return ret;
>>  }
>>  
>> -static bool io_op_needs_file(const struct io_uring_sqe *sqe)
>> +static bool io_op_needs_file(struct io_kiocb *req)
>>  {
>> -	int op = READ_ONCE(sqe->opcode);
>> -
>> -	switch (op) {
>> +	switch (req->submit.opcode) {
>>  	case IORING_OP_NOP:
>>  	case IORING_OP_POLL_REMOVE:
>>  	case IORING_OP_TIMEOUT:
>> @@ -2419,7 +2407,7 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
>>  	 */
>>  	req->sequence = s->sequence;
>>  
>> -	if (!io_op_needs_file(s->sqe))
>> +	if (!io_op_needs_file(req))
>>  		return 0;
>>  
>>  	if (flags & IOSQE_FIXED_FILE) {
>> @@ -2460,7 +2448,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>  
>>  			s->sqe = sqe_copy;
>>  			memcpy(&req->submit, s, sizeof(*s));
>> -			list = io_async_list_from_sqe(ctx, s->sqe);
>> +			list = io_async_list_from_req(ctx, req);
>>  			if (!io_add_to_prev_work(list, req)) {
>>  				if (list)
>>  					atomic_inc(&list->cnt);
>> @@ -2582,7 +2570,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
>>  	req->user_data = s->sqe->user_data;
>>  
>>  #if defined(CONFIG_NET)
>> -	switch (READ_ONCE(s->sqe->opcode)) {
>> +	switch (req->submit.opcode) {
>>  	case IORING_OP_SENDMSG:
>>  	case IORING_OP_RECVMSG:
>>  		spin_lock(&current->fs->lock);
>> @@ -2697,6 +2685,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
>>  	if (head < ctx->sq_entries) {
>>  		s->index = head;
>>  		s->sqe = &ctx->sq_sqes[head];
>> +		s->opcode = READ_ONCE(s->sqe->opcode);
>>  		s->sequence = ctx->cached_sq_head;
>>  		ctx->cached_sq_head++;
>>  		return true;
>>
>> -- 
>> Jens Axboe
>>
> 
> I think this patch solves similar problems from the root cause.
> So, Should I submit this commit, or you?

Thanks for testing, I'll add your tested-by. Probably best if I do it,
since it's going to 5.4-stable only and it's not from upstream.


-- 
Jens Axboe


      reply	other threads:[~2020-08-05  4:11 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20200804125637.GA22088@ubuntu>
2020-08-04 13:18 ` [PATCH] fs/io_uring.c: fix null ptr deference in io_send_recvmsg() Pavel Begunkov
2020-08-04 13:27   ` Jens Axboe
     [not found]     ` <CAGAoTxzadSphnE2aLsFKS04TjTKYVq2uLFgH9dvLPwWiyqEGEQ@mail.gmail.com>
2020-08-04 17:15       ` Jens Axboe
2020-08-04 21:55         ` Jens Axboe
2020-08-05  3:40           ` Liu Yong
2020-08-05  4:10             ` Jens Axboe [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox