From: Jens Axboe <[email protected]>
To: [email protected]
Cc: [email protected], Jens Axboe <[email protected]>
Subject: [PATCH 2/9] io_uring: add IORING_OP_PROVIDE_BUFFERS
Date: Tue, 10 Mar 2020 09:04:19 -0600 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
IORING_OP_PROVIDE_BUFFERS uses the buffer registration infrastructure to
support passing in an addr/len that is associated with a buffer ID and
buffer group ID. The group ID is used to index and lookup the buffers,
while the buffer ID can be used to notify the application which buffer
in the group was used. The addr passed in is the starting buffer address,
and length is each buffer length. A number of buffers to add with can be
specified, in which case addr is incremented by length for each addition,
and each buffer increments the buffer ID specified.
No validation is done of the buffer ID. If the application provides
buffers within the same group with identical buffer IDs, then it'll have
a hard time telling which buffer ID was used. The only restriction is
that the buffer ID can be a max of 16-bits in size, so USHRT_MAX is the
maximum ID that can be used.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 128 +++++++++++++++++++++++++++++++++-
include/uapi/linux/io_uring.h | 10 ++-
2 files changed, 135 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1f3ae208f6a6..675b33cf855a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -447,6 +447,15 @@ struct io_splice {
unsigned int flags;
};
+struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+ __s32 len;
+ __u32 bgid;
+ __u16 nbufs;
+ __u16 bid;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -572,6 +581,7 @@ struct io_kiocb {
struct io_madvise madvise;
struct io_epoll epoll;
struct io_splice splice;
+ struct io_provide_buf pbuf;
};
struct io_async_ctx *io;
@@ -799,7 +809,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
- }
+ },
+ [IORING_OP_PROVIDE_BUFFERS] = {},
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2785,6 +2796,110 @@ static int io_openat(struct io_kiocb *req, bool force_nonblock)
return io_openat2(req, force_nonblock);
}
+static int io_provide_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -E2BIG;
+ p->nbufs = tmp;
+ p->addr = READ_ONCE(sqe->addr);
+ p->len = READ_ONCE(sqe->len);
+
+ if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+ return -EFAULT;
+
+ p->bgid = READ_ONCE(sqe->buf_group);
+ tmp = READ_ONCE(sqe->off);
+ if (tmp > USHRT_MAX)
+ return -E2BIG;
+ p->bid = tmp;
+ return 0;
+}
+
+static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+{
+ struct io_buffer *buf;
+ u64 addr = pbuf->addr;
+ int i, bid = pbuf->bid;
+
+ for (i = 0; i < pbuf->nbufs; i++) {
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ break;
+
+ buf->addr = addr;
+ buf->len = pbuf->len;
+ buf->bid = bid;
+ addr += pbuf->len;
+ bid++;
+ if (!*head) {
+ INIT_LIST_HEAD(&buf->list);
+ *head = buf;
+ } else {
+ list_add_tail(&buf->list, &(*head)->list);
+ }
+ }
+
+ return i ? i : -ENOMEM;
+}
+
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head, *list;
+ int ret = 0;
+
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (!force_nonblock)
+ mutex_lock(&ctx->uring_lock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+
+ ret = io_add_buffers(p, &head);
+ if (ret < 0)
+ goto out;
+
+ if (!list) {
+ ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
+ GFP_KERNEL);
+ if (ret < 0) {
+ while (!list_empty(&head->list)) {
+ struct io_buffer *buf;
+
+ buf = list_first_entry(&head->list,
+ struct io_buffer, list);
+ list_del(&buf->list);
+ kfree(buf);
+ }
+ kfree(head);
+ goto out;
+ }
+ }
+out:
+ if (!force_nonblock)
+ mutex_unlock(&ctx->uring_lock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
+}
+
static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -4392,6 +4507,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_SPLICE:
ret = io_splice_prep(req, sqe);
break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ ret = io_provide_buffers_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4669,6 +4787,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_splice(req, force_nonblock);
break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ if (sqe) {
+ ret = io_provide_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_provide_buffers(req, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 53b36311cdac..bc34a57a660b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,8 +45,13 @@ struct io_uring_sqe {
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
- /* index into fixed buffers, if used */
- __u16 buf_index;
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
/* personality to use, if used */
__u16 personality;
__s32 splice_fd_in;
@@ -119,6 +124,7 @@ enum {
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.25.1
next prev parent reply other threads:[~2020-03-10 15:04 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-10 15:04 [PATCHSET v4] Support for automatic buffer selection Jens Axboe
2020-03-10 15:04 ` [PATCH 1/9] io_uring: buffer registration infrastructure Jens Axboe
2020-03-10 15:04 ` Jens Axboe [this message]
2020-03-10 15:19 ` [PATCH 2/9] io_uring: add IORING_OP_PROVIDE_BUFFERS Jens Axboe
2020-03-10 15:04 ` [PATCH 3/9] io_uring: support buffer selection for OP_READ and OP_RECV Jens Axboe
2020-03-10 15:04 ` [PATCH 4/9] io_uring: add IOSQE_BUFFER_SELECT support for IORING_OP_READV Jens Axboe
2020-03-10 15:04 ` [PATCH 5/9] net: abstract out normal and compat msghdr import Jens Axboe
2020-03-10 15:04 ` [PATCH 6/9] io_uring: add IOSQE_BUFFER_SELECT support for IORING_OP_RECVMSG Jens Axboe
2020-03-10 15:04 ` [PATCH 7/9] io_uring: provide means of removing buffers Jens Axboe
2020-03-10 15:04 ` [PATCH 8/9] io_uring: add end-of-bits marker and build time verify it Jens Axboe
2020-03-10 15:04 ` [PATCH 9/9] io_uring: Fix unused function warnings Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox