From: Pavel Begunkov <[email protected]>
To: Ming Lei <[email protected]>, Jens Axboe <[email protected]>
Cc: [email protected], [email protected],
Uday Shankar <[email protected]>,
Akilesh Kailash <[email protected]>
Subject: Re: (subset) [PATCH V10 0/12] io_uring: support group buffer & ublk zc
Date: Wed, 13 Nov 2024 13:43:23 +0000 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <ZzKm_lN_1U_u6St7@fedora>
[-- Attachment #1: Type: text/plain, Size: 1097 bytes --]
On 11/12/24 00:53, Ming Lei wrote:
> On Thu, Nov 07, 2024 at 03:25:59PM -0700, Jens Axboe wrote:
>> On 11/7/24 3:25 PM, Jens Axboe wrote:
...
> Hi Jens,
>
> Any comment on the rest of the series?
Ming, it's dragging on because it's over complicated. I very much want
it to get to some conclusion, get it merged and move on, and I strongly
believe Jens shares the sentiment on getting the thing done.
Please, take the patches attached, adjust them to your needs and put
ublk on top. Or tell if there is a strong reason why it doesn't work.
The implementation is very simple and doesn't need almost anything
from io_uring, it's low risk and we can merge in no time.
If you can't cache the allocation in ublk, io_uring can add a cache.
If ublk needs more space and cannot embed the structure, we can add
a "private" pointer into io_mapped_ubuf. If it needs to check the IO
direction, we can add that as well (though I have doubts you really need
it, read-only might makes sense, write-only not so much). We'll also
merge Jens' patch allowing to remove a buffer with a request.
--
Pavel Begunkov
[-- Attachment #2: io_uring-leased-buffers.patch --]
[-- Type: text/x-patch, Size: 6484 bytes --]
From 78a9c8a3b9d59e7465d6c158283a531a221fa3b2 Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 22:58:18 +0000
Subject: [PATCH 1/4] io_uring: export io_mapped_ubuf definition
---
include/linux/io_uring/kbuf.h | 19 +++++++++++++++++++
io_uring/rsrc.h | 12 ++----------
2 files changed, 21 insertions(+), 10 deletions(-)
create mode 100644 include/linux/io_uring/kbuf.h
diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
new file mode 100644
index 000000000000..a32578df3d8e
--- /dev/null
+++ b/include/linux/io_uring/kbuf.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IO_URING_KBUF_H
+#define _LINUX_IO_URING_KBUF_H
+
+#include <uapi/linux/io_uring.h>
+#include <linux/io_uring_types.h>
+#include <linux/bvec.h>
+
+struct io_mapped_ubuf {
+ u64 ubuf;
+ unsigned int len;
+ unsigned int nr_bvecs;
+ unsigned int folio_shift;
+ refcount_t refs;
+ unsigned long acct_pages;
+ struct bio_vec bvec[] __counted_by(nr_bvecs);
+};
+
+#endif
\ No newline at end of file
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 7a4668deaa1a..885ccecade08 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -2,6 +2,8 @@
#ifndef IOU_RSRC_H
#define IOU_RSRC_H
+#include <linux/io_uring/kbuf.h>
+
#define IO_NODE_ALLOC_CACHE_MAX 32
#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
@@ -24,16 +26,6 @@ struct io_rsrc_node {
};
};
-struct io_mapped_ubuf {
- u64 ubuf;
- unsigned int len;
- unsigned int nr_bvecs;
- unsigned int folio_shift;
- refcount_t refs;
- unsigned long acct_pages;
- struct bio_vec bvec[] __counted_by(nr_bvecs);
-};
-
struct io_imu_folio_data {
/* Head folio can be partially included in the fixed buf */
unsigned int nr_pages_head;
--
2.46.0
From 6839ca1ca94a89ec11362f32af22e2c0cfdfaa81 Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 23:12:15 +0000
Subject: [PATCH 2/4] io_uring: add io_mapped_ubuf release callback
---
include/linux/io_uring/kbuf.h | 10 ++++++++++
io_uring/rsrc.c | 6 +++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
index a32578df3d8e..aa3eeaa1ac25 100644
--- a/include/linux/io_uring/kbuf.h
+++ b/include/linux/io_uring/kbuf.h
@@ -13,7 +13,17 @@ struct io_mapped_ubuf {
unsigned int folio_shift;
refcount_t refs;
unsigned long acct_pages;
+ void (*release)(struct io_mapped_ubuf *);
struct bio_vec bvec[] __counted_by(nr_bvecs);
};
+static inline void iou_init_kbuf(struct io_mapped_ubuf *buf,
+ void (*release)(struct io_mapped_ubuf *))
+{
+ refcount_set(&buf->refs, 1);
+ buf->acct_pages = 0;
+ buf->ubuf = 0;
+ buf->release = release;
+}
+
#endif
\ No newline at end of file
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index adaae8630932..84ea5a480058 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -110,6 +110,10 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
if (!refcount_dec_and_test(&imu->refs))
return;
+ if (imu->release) {
+ imu->release(imu);
+ return;
+ }
for (i = 0; i < imu->nr_bvecs; i++)
unpin_user_page(imu->bvec[i].bv_page);
if (imu->acct_pages)
@@ -762,6 +766,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
}
size = iov->iov_len;
+ iou_init_kbuf(imu, NULL);
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->len = iov->iov_len;
@@ -769,7 +774,6 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
imu->folio_shift = PAGE_SHIFT;
if (coalesced)
imu->folio_shift = data.folio_shift;
- refcount_set(&imu->refs, 1);
off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
node->buf = imu;
ret = 0;
--
2.46.0
From 55b56ed8c5cb58727c7daabd1e56e6f194749e7f Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 23:33:24 +0000
Subject: [PATCH 3/4] io_uring: add a helper for leasing a buffer
---
include/linux/io_uring/kbuf.h | 23 +++++++++++++++++++++++
io_uring/rsrc.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
index aa3eeaa1ac25..91cfcdc685cc 100644
--- a/include/linux/io_uring/kbuf.h
+++ b/include/linux/io_uring/kbuf.h
@@ -4,6 +4,7 @@
#include <uapi/linux/io_uring.h>
#include <linux/io_uring_types.h>
+#include <linux/io_uring/cmd.h>
#include <linux/bvec.h>
struct io_mapped_ubuf {
@@ -26,4 +27,26 @@ static inline void iou_init_kbuf(struct io_mapped_ubuf *buf,
buf->release = release;
}
+#if defined(CONFIG_IO_URING)
+int iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+ struct io_mapped_ubuf *buf, unsigned index);
+#else
+static inline int iou_export_kbuf(struct io_ring_ctx *ctx,
+ unsigned issue_flags,
+ struct io_mapped_ubuf *buf, unsigned index)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+static inline int io_uring_cmd_export_kbuf(struct io_uring_cmd *cmd,
+ unsigned issue_flags,
+ struct io_mapped_ubuf *buf,
+ unsigned index)
+{
+ struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
+
+ return iou_export_kbuf(ctx, issue_flags, buf, index);
+}
+
#endif
\ No newline at end of file
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 84ea5a480058..07842a6a8020 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -797,6 +797,38 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
return node;
}
+static int __iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+ struct io_mapped_ubuf *buf, unsigned idx)
+{
+ struct io_rsrc_node *node;
+
+ if (unlikely(idx >= ctx->buf_table.nr)) {
+ if (!ctx->buf_table.nr)
+ return -ENXIO;
+ return -EINVAL;
+ }
+ idx = array_index_nospec(idx, ctx->buf_table.nr);
+
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+ if (!node)
+ return -ENOMEM;
+ node->buf = buf;
+ io_reset_rsrc_node(ctx, &ctx->buf_table, idx);
+ ctx->buf_table.nodes[idx] = node;
+ return 0;
+}
+
+int iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+ struct io_mapped_ubuf *buf, unsigned idx)
+{
+ int ret;
+
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = __iou_export_kbuf(ctx, issue_flags, buf, idx);
+ io_ring_submit_unlock(ctx, issue_flags);
+ return ret;
+}
+
int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned int nr_args, u64 __user *tags)
{
--
2.46.0
[-- Attachment #3: leased-buffer-test.c --]
[-- Type: text/x-csrc, Size: 2124 bytes --]
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
#include "liburing.h"
#include "test.h"
#define BUF_SIZE 4096
static char buf[BUF_SIZE];
static char buf_tmp[BUF_SIZE];
static void io_uring_prep_my_cmd_lease(struct io_uring_sqe *sqe)
{
io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, 0, 0, 0);
/* TODO */
...
}
static void do_submit_wait1(struct io_uring *ring)
{
struct io_uring_cqe *cqe;
int ret;
ret = io_uring_submit(ring);
assert(ret == 1);
ret = io_uring_wait_cqe(ring, &cqe);
assert(ret >= 0);
printf("cqe data %i res %i\n", (int)cqe->user_data, cqe->res);
io_uring_cqe_seen(ring, cqe);
}
int main(int argc, char *argv[])
{
unsigned long buf_offset = 0;
unsigned slot = 0; /* reg buffer table index */
struct io_uring_sqe *sqe;
int pipe1[2], pipe2[2];
struct io_uring ring;
int ret, i;
for (i = 0; i < BUF_SIZE; i++)
buf[i] = (char)i;
ret = pipe(pipe1);
assert(ret == 0);
ret = pipe(pipe2);
assert(ret == 0);
ret = io_uring_queue_init(8, &ring, 0);
assert(ret == 0);
ret = io_uring_register_buffers_sparse(&ring, 16);
assert(ret >= 0);
ret = write(pipe1[1], buf, BUF_SIZE);
assert(ret == BUF_SIZE);
sqe = io_uring_get_sqe(&ring);
io_uring_prep_my_cmd_lease(sqe, bdev_fd, slot);
sqe->user_data = 1;
do_submit_wait1(&ring);
// read data into the leased buffer
sqe = io_uring_get_sqe(&ring);
io_uring_prep_read_fixed(sqe, pipe1[0], (void *)buf_offset, BUF_SIZE, 0, slot);
sqe->user_data = 2;
do_submit_wait1(&ring);
// write from the leased buffer into a pipe
sqe = io_uring_get_sqe(&ring);
io_uring_prep_write_fixed(sqe, pipe2[1], (void *)buf_offset, BUF_SIZE, 0, slot);
sqe->user_data = 3;
do_submit_wait1(&ring);
// check the right data is in the pipe
ret = read(pipe2[0], buf_tmp, BUF_SIZE);
assert(ret == BUF_SIZE);
for (i = 0; i < BUF_SIZE; i++) {
assert(buf[i] == buf_tmp[i]);
}
struct iovec iovec = {};
ret = io_uring_register_buffers_update_tag(&ring, 0, &iovec, NULL, 1);
assert(ret >= 0);
io_uring_queue_exit(&ring);
return 0;
}
next prev parent reply other threads:[~2024-11-13 13:42 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-07 11:01 [PATCH V10 0/12] io_uring: support group buffer & ublk zc Ming Lei
2024-11-07 11:01 ` [PATCH V10 01/12] io_uring/rsrc: pass 'struct io_ring_ctx' reference to rsrc helpers Ming Lei
2024-11-07 11:01 ` [PATCH V10 02/12] io_uring/rsrc: remove '->ctx_ptr' of 'struct io_rsrc_node' Ming Lei
2024-11-07 11:01 ` [PATCH V10 03/12] io_uring/rsrc: add & apply io_req_assign_buf_node() Ming Lei
2024-11-07 11:01 ` [PATCH V10 04/12] io_uring/rsrc: prepare for supporting external 'io_rsrc_node' Ming Lei
2024-11-07 11:01 ` [PATCH V10 05/12] io_uring: rename io_mapped_ubuf as io_mapped_buf Ming Lei
2024-11-07 11:01 ` [PATCH V10 06/12] io_uring: rename io_mapped_buf->ubuf as io_mapped_buf->addr Ming Lei
2024-11-07 11:01 ` [PATCH V10 07/12] io_uring: shrink io_mapped_buf Ming Lei
2024-11-07 11:01 ` [PATCH V10 08/12] io_uring: reuse io_mapped_buf for kernel buffer Ming Lei
2024-11-07 11:01 ` [PATCH V10 09/12] io_uring: add callback to 'io_mapped_buffer' for giving back " Ming Lei
2024-11-07 11:01 ` [PATCH V10 10/12] io_uring: support leased group buffer with REQ_F_GROUP_BUF Ming Lei
2024-11-07 11:01 ` [PATCH V10 11/12] io_uring/uring_cmd: support leasing device kernel buffer to io_uring Ming Lei
2024-11-07 18:27 ` kernel test robot
2024-11-07 19:30 ` kernel test robot
2024-11-08 0:59 ` Ming Lei
2024-11-07 11:01 ` [PATCH V10 12/12] ublk: support leasing io " Ming Lei
2024-11-07 22:25 ` (subset) [PATCH V10 0/12] io_uring: support group buffer & ublk zc Jens Axboe
2024-11-07 22:25 ` Jens Axboe
2024-11-12 0:53 ` Ming Lei
2024-11-13 13:43 ` Pavel Begunkov [this message]
2024-11-13 14:56 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox