From: Pavel Begunkov <[email protected]>
To: Bijan Mottahedeh <[email protected]>, [email protected]
Cc: [email protected]
Subject: Re: [PATCH 6/8] io_uring: support buffer registration updates
Date: Wed, 18 Nov 2020 20:17:28 +0000 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 12/11/2020 23:00, Bijan Mottahedeh wrote:
> Introduce IORING_REGISTER_BUFFERS_UPDATE and IORING_OP_BUFFERS_UPDATE,
> consistent with file registration update.
I'd prefer to not add a new opcode for each new resource. Can we have
only IORING_OP_RESOURCE_UPDATE and multiplex inside? Even better if you
could fit all into IORING_OP_FILES_UPDATE and then
#define IORING_OP_RESOURCE_UPDATE IORING_OP_FILES_UPDATE
Jens, what do you think?
>
> Signed-off-by: Bijan Mottahedeh <[email protected]>
> ---
> fs/io_uring.c | 139 +++++++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/io_uring.h | 8 +--
> 2 files changed, 140 insertions(+), 7 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 71f6d5c..6020fd2 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1006,6 +1006,9 @@ struct io_op_def {
> .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
> IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
> },
> + [IORING_OP_BUFFERS_UPDATE] = {
> + .work_flags = IO_WQ_WORK_MM,
> + },
> };
>
> enum io_mem_account {
> @@ -1025,6 +1028,9 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
> static int __io_sqe_files_update(struct io_ring_ctx *ctx,
> struct io_uring_rsrc_update *ip,
> unsigned nr_args);
> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
> + struct io_uring_rsrc_update *up,
> + unsigned nr_args);
> static void __io_clean_op(struct io_kiocb *req);
> static struct file *io_file_get(struct io_submit_state *state,
> struct io_kiocb *req, int fd, bool fixed);
> @@ -5939,6 +5945,19 @@ static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
> percpu_ref_exit(&ref_node->refs);
> kfree(ref_node);
> }
> +
> +static int io_buffers_update_prep(struct io_kiocb *req,
> + const struct io_uring_sqe *sqe)
> +{
> + return io_rsrc_update_prep(req, sqe);
> +}
> +
> +static int io_buffers_update(struct io_kiocb *req, bool force_nonblock,
> + struct io_comp_state *cs)
> +{
> + return io_rsrc_update(req, force_nonblock, cs, __io_sqe_buffers_update);
> +}
> +
> static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> {
> switch (req->opcode) {
> @@ -6010,11 +6029,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return io_renameat_prep(req, sqe);
> case IORING_OP_UNLINKAT:
> return io_unlinkat_prep(req, sqe);
> + case IORING_OP_BUFFERS_UPDATE:
> + return io_buffers_update_prep(req, sqe);
> }
>
> printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
> req->opcode);
> - return-EINVAL;
> + return -EINVAL;
> }
>
> static int io_req_defer_prep(struct io_kiocb *req,
> @@ -6268,6 +6289,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
> case IORING_OP_UNLINKAT:
> ret = io_unlinkat(req, force_nonblock);
> break;
> + case IORING_OP_BUFFERS_UPDATE:
> + ret = io_buffers_update(req, force_nonblock, cs);
> + break;
> default:
> ret = -EINVAL;
> break;
> @@ -8224,6 +8248,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
> if (imu->acct_pages)
> io_unaccount_mem(ctx, imu->nr_bvecs, ACCT_PINNED);
> kvfree(imu->bvec);
> + imu->bvec = NULL;
> imu->nr_bvecs = 0;
> }
>
> @@ -8441,6 +8466,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
> if (pret > 0)
> unpin_user_pages(pages, pret);
> kvfree(imu->bvec);
> + imu->bvec = NULL;
> goto done;
> }
>
> @@ -8602,6 +8628,8 @@ static void io_buf_data_ref_zero(struct percpu_ref *ref)
> static void io_ring_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
> {
> io_buffer_unmap(ctx, prsrc->buf);
> + kvfree(prsrc->buf);
> + prsrc->buf = NULL;
> }
>
> static struct fixed_rsrc_ref_node *alloc_fixed_buf_ref_node(
> @@ -8684,6 +8712,111 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
> return 0;
> }
>
> +static inline int io_queue_buffer_removal(struct fixed_rsrc_data *data,
> + struct io_mapped_ubuf *imu)
> +{
> + return io_queue_rsrc_removal(data, (void *)imu);
> +}
> +
> +static void destroy_fixed_buf_ref_node(struct fixed_rsrc_ref_node *ref_node)
> +{
> + destroy_fixed_rsrc_ref_node(ref_node);
> +}
> +
> +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
> + struct io_uring_rsrc_update *up,
> + unsigned nr_args)
> +{
> + struct fixed_rsrc_data *data = ctx->buf_data;
> + struct fixed_rsrc_ref_node *ref_node;
> + struct io_mapped_ubuf *imu;
> + struct iovec iov;
> + struct iovec __user *iovs;
> + struct page *last_hpage = NULL;
> + __u32 done;
> + int i, err;
> + bool needs_switch = false;
> +
> + if (check_add_overflow(up->offset, nr_args, &done))
> + return -EOVERFLOW;
> + if (done > ctx->nr_user_bufs)
> + return -EINVAL;
> +
> + ref_node = alloc_fixed_buf_ref_node(ctx);
> + if (IS_ERR(ref_node))
> + return PTR_ERR(ref_node);
> +
> + done = 0;
> + iovs = u64_to_user_ptr(up->iovs);
> + while (nr_args) {
> + struct fixed_rsrc_table *table;
> + unsigned index;
> +
> + err = 0;
> + if (copy_from_user(&iov, &iovs[done], sizeof(iov))) {
> + err = -EFAULT;
> + break;
> + }
> + i = array_index_nospec(up->offset, ctx->nr_user_bufs);
> + table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT];
> + index = i & IORING_BUF_TABLE_MASK;
> + imu = &table->bufs[index];
> + if (table->bufs[index].ubuf) {
> + struct io_mapped_ubuf *dup;
> + dup = kmemdup(imu, sizeof(*imu), GFP_KERNEL);
> + if (!dup) {
> + err = -ENOMEM;
> + break;
> + }
> + err = io_queue_buffer_removal(data, dup);
> + if (err)
> + break;
> + memset(imu, 0, sizeof(*imu));
> + needs_switch = true;
> + }
> + if (!io_buffer_validate(&iov)) {
> + err = io_sqe_buffer_register(ctx, &iov, imu,
> + &last_hpage);
> + if (err) {
> + memset(imu, 0, sizeof(*imu));
> + break;
> + }
> + }
> + nr_args--;
> + done++;
> + up->offset++;
> + }
> +
> + if (needs_switch) {
> + percpu_ref_kill(&data->node->refs);
> + spin_lock(&data->lock);
> + list_add(&ref_node->node, &data->ref_list);
> + data->node = ref_node;
> + spin_unlock(&data->lock);
> + percpu_ref_get(&ctx->buf_data->refs);
> + } else
> + destroy_fixed_buf_ref_node(ref_node);
> +
> + return done ? done : err;
> +}
> +
> +static int io_sqe_buffers_update(struct io_ring_ctx *ctx, void __user *arg,
> + unsigned nr_args)
> +{
> + struct io_uring_rsrc_update up;
> +
> + if (!ctx->buf_data)
> + return -ENXIO;
> + if (!nr_args)
> + return -EINVAL;
> + if (copy_from_user(&up, arg, sizeof(up)))
> + return -EFAULT;
> + if (up.resv)
> + return -EINVAL;
> +
> + return __io_sqe_buffers_update(ctx, &up, nr_args);
> +}
> +
> static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
> {
> __s32 __user *fds = arg;
> @@ -9961,6 +10094,7 @@ static bool io_register_op_must_quiesce(int op)
> switch (op) {
> case IORING_UNREGISTER_FILES:
> case IORING_REGISTER_FILES_UPDATE:
> + case IORING_REGISTER_BUFFERS_UPDATE:
> case IORING_REGISTER_PROBE:
> case IORING_REGISTER_PERSONALITY:
> case IORING_UNREGISTER_PERSONALITY:
> @@ -10036,6 +10170,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
> break;
> ret = io_sqe_buffers_unregister(ctx);
> break;
> + case IORING_REGISTER_BUFFERS_UPDATE:
> + ret = io_sqe_buffers_update(ctx, arg, nr_args);
> + break;
> case IORING_REGISTER_FILES:
> ret = io_sqe_files_register(ctx, arg, nr_args);
> break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 87f0f56..17682b5 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -137,6 +137,7 @@ enum {
> IORING_OP_SHUTDOWN,
> IORING_OP_RENAMEAT,
> IORING_OP_UNLINKAT,
> + IORING_OP_BUFFERS_UPDATE,
>
> /* this goes last, obviously */
> IORING_OP_LAST,
> @@ -279,17 +280,12 @@ enum {
> IORING_UNREGISTER_PERSONALITY = 10,
> IORING_REGISTER_RESTRICTIONS = 11,
> IORING_REGISTER_ENABLE_RINGS = 12,
> + IORING_REGISTER_BUFFERS_UPDATE = 13,
>
> /* this goes last */
> IORING_REGISTER_LAST
> };
>
> -struct io_uring_files_update {
> - __u32 offset;
> - __u32 resv;
> - __aligned_u64 /* __s32 * */ fds;
> -};
> -
> struct io_uring_rsrc_update {
> __u32 offset;
> __u32 resv;
>
--
Pavel Begunkov
next prev parent reply other threads:[~2020-11-18 20:20 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-12 23:00 [PATCH 0/8] io_uring: buffer registration enhancements Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 1/8] io_uring: modularize io_sqe_buffer_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 2/8] io_uring: modularize io_sqe_buffers_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 3/8] io_uring: generalize fixed file functionality Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 4/8] io_uring: implement fixed buffers registration similar to fixed files Bijan Mottahedeh
2020-11-15 13:33 ` Pavel Begunkov
2020-11-16 21:24 ` Bijan Mottahedeh
2020-11-16 23:09 ` Pavel Begunkov
2020-11-17 0:41 ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 5/8] io_uring: generalize files_update functionlity to rsrc_update Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 6/8] io_uring: support buffer registration updates Bijan Mottahedeh
2020-11-18 20:17 ` Pavel Begunkov [this message]
2020-12-09 0:42 ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 7/8] io_uring: support readv/writev with fixed buffers Bijan Mottahedeh
2020-11-17 11:04 ` Pavel Begunkov
2020-11-17 22:59 ` Bijan Mottahedeh
2020-11-18 9:14 ` Pavel Begunkov
2020-11-18 20:12 ` Pavel Begunkov
[not found] ` <[email protected]>
[not found] ` <[email protected]>
2020-11-19 19:27 ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 8/8] io_uring: support buffer registration sharing Bijan Mottahedeh
2020-11-16 23:28 ` [PATCH 0/8] io_uring: buffer registration enhancements Pavel Begunkov
2020-11-17 0:21 ` Bijan Mottahedeh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox