public inbox for [email protected]
 help / color / mirror / Atom feed
From: Bijan Mottahedeh <[email protected]>
To: [email protected], Pavel Begunkov <[email protected]>
Cc: [email protected]
Subject: Re: [PATCH 6/8] io_uring: support buffer registration updates
Date: Tue, 8 Dec 2020 16:42:19 -0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 11/18/2020 12:17 PM, Pavel Begunkov wrote:
> On 12/11/2020 23:00, Bijan Mottahedeh wrote:
>> Introduce IORING_REGISTER_BUFFERS_UPDATE and IORING_OP_BUFFERS_UPDATE,
>> consistent with file registration update.
> 
> I'd prefer to not add a new opcode for each new resource. Can we have
> only IORING_OP_RESOURCE_UPDATE and multiplex inside? Even better if you
> could fit all into IORING_OP_FILES_UPDATE and then
> 
> #define IORING_OP_RESOURCE_UPDATE IORING_OP_FILES_UPDATE
> 
> Jens, what do you think?

Hi Jens,

What do you think the right approach is here?

Thanks.

--bijan

> 
>>
>> Signed-off-by: Bijan Mottahedeh <[email protected]>
>> ---
>>   fs/io_uring.c                 | 139 +++++++++++++++++++++++++++++++++++++++++-
>>   include/uapi/linux/io_uring.h |   8 +--
>>   2 files changed, 140 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 71f6d5c..6020fd2 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1006,6 +1006,9 @@ struct io_op_def {
>>   		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
>>   						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
>>   	},
>> +	[IORING_OP_BUFFERS_UPDATE] = {
>> +		.work_flags		= IO_WQ_WORK_MM,
>> +	},
>>   };
>>   
>>   enum io_mem_account {
>> @@ -1025,6 +1028,9 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
>>   static int __io_sqe_files_update(struct io_ring_ctx *ctx,
>>   				 struct io_uring_rsrc_update *ip,
>>   				 unsigned nr_args);
>> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>> +				   struct io_uring_rsrc_update *up,
>> +				   unsigned nr_args);
>>   static void __io_clean_op(struct io_kiocb *req);
>>   static struct file *io_file_get(struct io_submit_state *state,
>>   				struct io_kiocb *req, int fd, bool fixed);
>> @@ -5939,6 +5945,19 @@ static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
>>   	percpu_ref_exit(&ref_node->refs);
>>   	kfree(ref_node);
>>   }
>> +
>> +static int io_buffers_update_prep(struct io_kiocb *req,
>> +				  const struct io_uring_sqe *sqe)
>> +{
>> +	return io_rsrc_update_prep(req, sqe);
>> +}
>> +
>> +static int io_buffers_update(struct io_kiocb *req, bool force_nonblock,
>> +			     struct io_comp_state *cs)
>> +{
>> +	return io_rsrc_update(req, force_nonblock, cs, __io_sqe_buffers_update);
>> +}
>> +
>>   static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>   {
>>   	switch (req->opcode) {
>> @@ -6010,11 +6029,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>   		return io_renameat_prep(req, sqe);
>>   	case IORING_OP_UNLINKAT:
>>   		return io_unlinkat_prep(req, sqe);
>> +	case IORING_OP_BUFFERS_UPDATE:
>> +		return io_buffers_update_prep(req, sqe);
>>   	}
>>   
>>   	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
>>   			req->opcode);
>> -	return-EINVAL;
>> +	return -EINVAL;
>>   }
>>   
>>   static int io_req_defer_prep(struct io_kiocb *req,
>> @@ -6268,6 +6289,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
>>   	case IORING_OP_UNLINKAT:
>>   		ret = io_unlinkat(req, force_nonblock);
>>   		break;
>> +	case IORING_OP_BUFFERS_UPDATE:
>> +		ret = io_buffers_update(req, force_nonblock, cs);
>> +		break;
>>   	default:
>>   		ret = -EINVAL;
>>   		break;
>> @@ -8224,6 +8248,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
>>   	if (imu->acct_pages)
>>   		io_unaccount_mem(ctx, imu->nr_bvecs, ACCT_PINNED);
>>   	kvfree(imu->bvec);
>> +	imu->bvec = NULL;
>>   	imu->nr_bvecs = 0;
>>   }
>>   
>> @@ -8441,6 +8466,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
>>   		if (pret > 0)
>>   			unpin_user_pages(pages, pret);
>>   		kvfree(imu->bvec);
>> +		imu->bvec = NULL;
>>   		goto done;
>>   	}
>>   
>> @@ -8602,6 +8628,8 @@ static void io_buf_data_ref_zero(struct percpu_ref *ref)
>>   static void io_ring_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
>>   {
>>   	io_buffer_unmap(ctx, prsrc->buf);
>> +	kvfree(prsrc->buf);
>> +	prsrc->buf = NULL;
>>   }
>>   
>>   static struct fixed_rsrc_ref_node *alloc_fixed_buf_ref_node(
>> @@ -8684,6 +8712,111 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
>>   	return 0;
>>   }
>>   
>> +static inline int io_queue_buffer_removal(struct fixed_rsrc_data *data,
>> +					  struct io_mapped_ubuf *imu)
>> +{
>> +	return io_queue_rsrc_removal(data, (void *)imu);
>> +}
>> +
>> +static void destroy_fixed_buf_ref_node(struct fixed_rsrc_ref_node *ref_node)
>> +{
>> +	destroy_fixed_rsrc_ref_node(ref_node);
>> +}
>> +
>> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
>> +				   struct io_uring_rsrc_update *up,
>> +				   unsigned nr_args)
>> +{
>> +	struct fixed_rsrc_data *data = ctx->buf_data;
>> +	struct fixed_rsrc_ref_node *ref_node;
>> +	struct io_mapped_ubuf *imu;
>> +	struct iovec iov;
>> +	struct iovec __user *iovs;
>> +	struct page *last_hpage = NULL;
>> +	__u32 done;
>> +	int i, err;
>> +	bool needs_switch = false;
>> +
>> +	if (check_add_overflow(up->offset, nr_args, &done))
>> +		return -EOVERFLOW;
>> +	if (done > ctx->nr_user_bufs)
>> +		return -EINVAL;
>> +
>> +	ref_node = alloc_fixed_buf_ref_node(ctx);
>> +	if (IS_ERR(ref_node))
>> +		return PTR_ERR(ref_node);
>> +
>> +	done = 0;
>> +	iovs = u64_to_user_ptr(up->iovs);
>> +	while (nr_args) {
>> +		struct fixed_rsrc_table *table;
>> +		unsigned index;
>> +
>> +		err = 0;
>> +		if (copy_from_user(&iov, &iovs[done], sizeof(iov))) {
>> +			err = -EFAULT;
>> +			break;
>> +		}
>> +		i = array_index_nospec(up->offset, ctx->nr_user_bufs);
>> +		table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT];
>> +		index = i & IORING_BUF_TABLE_MASK;
>> +		imu = &table->bufs[index];
>> +		if (table->bufs[index].ubuf) {
>> +			struct io_mapped_ubuf *dup;
>> +			dup = kmemdup(imu, sizeof(*imu), GFP_KERNEL);
>> +			if (!dup) {
>> +				err = -ENOMEM;
>> +				break;
>> +			}
>> +			err = io_queue_buffer_removal(data, dup);
>> +			if (err)
>> +				break;
>> +			memset(imu, 0, sizeof(*imu));
>> +			needs_switch = true;
>> +		}
>> +		if (!io_buffer_validate(&iov)) {
>> +			err = io_sqe_buffer_register(ctx, &iov, imu,
>> +						     &last_hpage);
>> +			if (err) {
>> +				memset(imu, 0, sizeof(*imu));
>> +				break;
>> +			}
>> +		}
>> +		nr_args--;
>> +		done++;
>> +		up->offset++;
>> +	}
>> +
>> +	if (needs_switch) {
>> +		percpu_ref_kill(&data->node->refs);
>> +		spin_lock(&data->lock);
>> +		list_add(&ref_node->node, &data->ref_list);
>> +		data->node = ref_node;
>> +		spin_unlock(&data->lock);
>> +		percpu_ref_get(&ctx->buf_data->refs);
>> +	} else
>> +		destroy_fixed_buf_ref_node(ref_node);
>> +
>> +	return done ? done : err;
>> +}
>> +
>> +static int io_sqe_buffers_update(struct io_ring_ctx *ctx, void __user *arg,
>> +				 unsigned nr_args)
>> +{
>> +	struct io_uring_rsrc_update up;
>> +
>> +	if (!ctx->buf_data)
>> +		return -ENXIO;
>> +	if (!nr_args)
>> +		return -EINVAL;
>> +	if (copy_from_user(&up, arg, sizeof(up)))
>> +		return -EFAULT;
>> +	if (up.resv)
>> +		return -EINVAL;
>> +
>> +	return __io_sqe_buffers_update(ctx, &up, nr_args);
>> +}
>> +
>>   static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
>>   {
>>   	__s32 __user *fds = arg;
>> @@ -9961,6 +10094,7 @@ static bool io_register_op_must_quiesce(int op)
>>   	switch (op) {
>>   	case IORING_UNREGISTER_FILES:
>>   	case IORING_REGISTER_FILES_UPDATE:
>> +	case IORING_REGISTER_BUFFERS_UPDATE:
>>   	case IORING_REGISTER_PROBE:
>>   	case IORING_REGISTER_PERSONALITY:
>>   	case IORING_UNREGISTER_PERSONALITY:
>> @@ -10036,6 +10170,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
>>   			break;
>>   		ret = io_sqe_buffers_unregister(ctx);
>>   		break;
>> +	case IORING_REGISTER_BUFFERS_UPDATE:
>> +		ret = io_sqe_buffers_update(ctx, arg, nr_args);
>> +		break;
>>   	case IORING_REGISTER_FILES:
>>   		ret = io_sqe_files_register(ctx, arg, nr_args);
>>   		break;
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 87f0f56..17682b5 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -137,6 +137,7 @@ enum {
>>   	IORING_OP_SHUTDOWN,
>>   	IORING_OP_RENAMEAT,
>>   	IORING_OP_UNLINKAT,
>> +	IORING_OP_BUFFERS_UPDATE,
>>   
>>   	/* this goes last, obviously */
>>   	IORING_OP_LAST,
>> @@ -279,17 +280,12 @@ enum {
>>   	IORING_UNREGISTER_PERSONALITY		= 10,
>>   	IORING_REGISTER_RESTRICTIONS		= 11,
>>   	IORING_REGISTER_ENABLE_RINGS		= 12,
>> +	IORING_REGISTER_BUFFERS_UPDATE		= 13,
>>   
>>   	/* this goes last */
>>   	IORING_REGISTER_LAST
>>   };
>>   
>> -struct io_uring_files_update {
>> -	__u32 offset;
>> -	__u32 resv;
>> -	__aligned_u64 /* __s32 * */ fds;
>> -};
>> -
>>   struct io_uring_rsrc_update {
>>   	__u32 offset;
>>   	__u32 resv;
>>
> 


  reply	other threads:[~2020-12-09  0:45 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-12 23:00 [PATCH 0/8] io_uring: buffer registration enhancements Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 1/8] io_uring: modularize io_sqe_buffer_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 2/8] io_uring: modularize io_sqe_buffers_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 3/8] io_uring: generalize fixed file functionality Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 4/8] io_uring: implement fixed buffers registration similar to fixed files Bijan Mottahedeh
2020-11-15 13:33   ` Pavel Begunkov
2020-11-16 21:24     ` Bijan Mottahedeh
2020-11-16 23:09       ` Pavel Begunkov
2020-11-17  0:41         ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 5/8] io_uring: generalize files_update functionlity to rsrc_update Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 6/8] io_uring: support buffer registration updates Bijan Mottahedeh
2020-11-18 20:17   ` Pavel Begunkov
2020-12-09  0:42     ` Bijan Mottahedeh [this message]
2020-11-12 23:00 ` [PATCH 7/8] io_uring: support readv/writev with fixed buffers Bijan Mottahedeh
2020-11-17 11:04   ` Pavel Begunkov
2020-11-17 22:59     ` Bijan Mottahedeh
2020-11-18  9:14       ` Pavel Begunkov
2020-11-18 20:12       ` Pavel Begunkov
     [not found]         ` <[email protected]>
     [not found]           ` <[email protected]>
2020-11-19 19:27             ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 8/8] io_uring: support buffer registration sharing Bijan Mottahedeh
2020-11-16 23:28 ` [PATCH 0/8] io_uring: buffer registration enhancements Pavel Begunkov
2020-11-17  0:21   ` Bijan Mottahedeh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox