From: Kanchan Joshi <[email protected]>
To: [email protected], [email protected]
Cc: [email protected], [email protected],
[email protected], [email protected], [email protected],
[email protected], [email protected], [email protected],
[email protected]
Subject: [RFC 4/5] io_uring: add support for big-cqe
Date: Fri, 1 Apr 2022 16:33:09 +0530 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Add IORING_SETUP_CQE32 flag to allow setting up ring with big-cqe which
is 32 bytes in size. Also modify uring-cmd completion infra to accept
additional result and fill that up in big-cqe.
Signed-off-by: Kanchan Joshi <[email protected]>
Signed-off-by: Anuj Gupta <[email protected]>
---
fs/io_uring.c | 82 +++++++++++++++++++++++++++++------
include/linux/io_uring.h | 10 +++--
include/uapi/linux/io_uring.h | 11 +++++
3 files changed, 87 insertions(+), 16 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bd0e6b102a7b..b819c0ad47fc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -211,8 +211,8 @@ struct io_mapped_ubuf {
struct io_ring_ctx;
struct io_overflow_cqe {
- struct io_uring_cqe cqe;
struct list_head list;
+ struct io_uring_cqe cqe; /* this must be kept at end */
};
struct io_fixed_file {
@@ -1713,6 +1713,13 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
return NULL;
tail = ctx->cached_cq_tail++;
+
+ /* double index for large CQE */
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ mask = 2 * ctx->cq_entries - 1;
+ tail <<= 1;
+ }
+
return &rings->cqes[tail & mask];
}
@@ -1792,13 +1799,16 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
while (!list_empty(&ctx->cq_overflow_list)) {
struct io_uring_cqe *cqe = io_get_cqe(ctx);
struct io_overflow_cqe *ocqe;
+ int cqeshift = 0;
if (!cqe && !force)
break;
+ /* copy more for big-cqe */
+ cqeshift = ctx->flags & IORING_SETUP_CQE32 ? 1 : 0;
ocqe = list_first_entry(&ctx->cq_overflow_list,
struct io_overflow_cqe, list);
if (cqe)
- memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+ memcpy(cqe, &ocqe->cqe, sizeof(*cqe) << cqeshift);
else
io_account_cq_overflow(ctx);
@@ -1884,11 +1894,17 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
}
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+ s32 res, u32 cflags, u64 res2,
+ int bigcqe)
{
struct io_overflow_cqe *ocqe;
+ int size = sizeof(*ocqe);
+
+ /* allocate more for big-cqe */
+ if (bigcqe)
+ size += sizeof(struct io_uring_cqe);
- ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+ ocqe = kmalloc(size, GFP_ATOMIC | __GFP_ACCOUNT);
if (!ocqe) {
/*
* If we're in ring overflow flush mode, or in task cancel mode,
@@ -1907,6 +1923,11 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
ocqe->cqe.user_data = user_data;
ocqe->cqe.res = res;
ocqe->cqe.flags = cflags;
+ if (bigcqe) {
+ struct io_uring_cqe32 *bcqe = (struct io_uring_cqe32 *)&ocqe->cqe;
+
+ bcqe->res2 = res2;
+ }
list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
return true;
}
@@ -1928,13 +1949,38 @@ static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
WRITE_ONCE(cqe->flags, cflags);
return true;
}
- return io_cqring_event_overflow(ctx, user_data, res, cflags);
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, false);
}
+static inline bool __fill_big_cqe(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags, u64 res2)
+{
+ struct io_uring_cqe32 *bcqe;
+
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ bcqe = (struct io_uring_cqe32 *) io_get_cqe(ctx);
+ if (likely(bcqe)) {
+ WRITE_ONCE(bcqe->cqe.user_data, user_data);
+ WRITE_ONCE(bcqe->cqe.res, res);
+ WRITE_ONCE(bcqe->cqe.flags, cflags);
+ WRITE_ONCE(bcqe->res2, res2);
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, res2,
+ true);
+}
static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
{
trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
- return __fill_cqe(req->ctx, req->user_data, res, cflags);
+ if (!(req->ctx->flags & IORING_SETUP_CQE32))
+ return __fill_cqe(req->ctx, req->user_data, res, cflags);
+ else
+ return __fill_big_cqe(req->ctx, req->user_data, res, cflags,
+ req->uring_cmd.res2);
}
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
@@ -4126,10 +4172,12 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
*/
-void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret)
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
{
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+ /* store secondary result in res2 */
+ req->uring_cmd.res2 = res2;
if (ret < 0)
req_set_fail(req);
io_req_complete(req, ret);
@@ -4163,7 +4211,7 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
/* queued async, consumer will call io_uring_cmd_done() when complete */
if (ret == -EIOCBQUEUED)
return 0;
- io_uring_cmd_done(ioucmd, ret);
+ io_uring_cmd_done(ioucmd, ret, 0);
return 0;
}
@@ -9026,13 +9074,20 @@ static void *io_mem_alloc(size_t size)
return (void *) __get_free_pages(gfp_flags, get_order(size));
}
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
- size_t *sq_offset)
+static unsigned long rings_size(struct io_uring_params *p,
+ size_t *sq_offset)
{
+ unsigned sq_entries, cq_entries;
struct io_rings *rings;
size_t off, sq_array_size;
- off = struct_size(rings, cqes, cq_entries);
+ sq_entries = p->sq_entries;
+ cq_entries = p->cq_entries;
+
+ if (p->flags & IORING_SETUP_CQE32)
+ off = struct_size(rings, cqes, 2 * cq_entries);
+ else
+ off = struct_size(rings, cqes, cq_entries);
if (off == SIZE_MAX)
return SIZE_MAX;
@@ -10483,7 +10538,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
ctx->sq_entries = p->sq_entries;
ctx->cq_entries = p->cq_entries;
- size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+ size = rings_size(p, &sq_array_offset);
if (size == SIZE_MAX)
return -EOVERFLOW;
@@ -10713,7 +10768,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128 |
+ IORING_SETUP_CQE32))
return -EINVAL;
return io_uring_create(entries, &p, params);
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index cedc68201469..0aba7b50cde6 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -14,7 +14,10 @@ enum io_uring_cmd_flags {
struct io_uring_cmd {
struct file *file;
- void *cmd;
+ union {
+ void *cmd; /* used on submission */
+ u64 res2; /* used on completion */
+ };
/* for irq-completion - if driver requires doing stuff in task-context*/
void (*driver_cb)(struct io_uring_cmd *cmd);
u32 flags;
@@ -25,7 +28,7 @@ struct io_uring_cmd {
};
#if defined(CONFIG_IO_URING)
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret);
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*driver_cb)(struct io_uring_cmd *));
struct sock *io_uring_get_socket(struct file *file);
@@ -48,7 +51,8 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret)
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+ ssize_t ret2)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d7a4bdb9bf3b..85b8ff046496 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -113,6 +113,7 @@ enum {
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
#define IORING_SETUP_SQE128 (1U << 7) /* SQEs are 128b */
+#define IORING_SETUP_CQE32 (1U << 8) /* CQEs are 32b */
enum {
IORING_OP_NOP,
@@ -207,6 +208,16 @@ struct io_uring_cqe {
__u32 flags;
};
+/*
+ * If the ring is initializefd with IORING_SETUP_CQE32, we setup large cqe.
+ * Large CQE is created by combining two adjacent regular CQES.
+ */
+struct io_uring_cqe32 {
+ struct io_uring_cqe cqe;
+ __u64 res2;
+ __u64 unused;
+};
+
/*
* cqe->flags
*
--
2.25.1
next prev parent reply other threads:[~2022-04-01 14:11 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20220401110829epcas5p39f3cf4d3f6eb8a5c59794787a2b72b15@epcas5p3.samsung.com>
2022-04-01 11:03 ` [RFC 0/5] big-cqe based uring-passthru Kanchan Joshi
[not found] ` <CGME20220401110831epcas5p403bacabe8f7e5262356fdc1a2e66df90@epcas5p4.samsung.com>
2022-04-01 11:03 ` [RFC 1/5] io_uring: add support for 128-byte SQEs Kanchan Joshi
[not found] ` <CGME20220401110833epcas5p18e828a307a646cef5b7aa429be4396e0@epcas5p1.samsung.com>
2022-04-01 11:03 ` [RFC 2/5] fs: add file_operations->async_cmd() Kanchan Joshi
2022-04-04 7:09 ` Christoph Hellwig
[not found] ` <CGME20220401110834epcas5p4d1e5e8d1beb1a6205d670bbcb932bf77@epcas5p4.samsung.com>
2022-04-01 11:03 ` [RFC 3/5] io_uring: add infra and support for IORING_OP_URING_CMD Kanchan Joshi
2022-04-04 7:16 ` Christoph Hellwig
2022-04-04 8:20 ` Pavel Begunkov
2022-04-05 5:58 ` Christoph Hellwig
2022-04-06 6:37 ` Kanchan Joshi
2022-04-04 15:14 ` Kanchan Joshi
2022-04-05 6:00 ` Christoph Hellwig
2022-04-05 16:27 ` Kanchan Joshi
[not found] ` <CGME20220401110836epcas5p37bd59ab5a48cf77ca3ac05052a164b0b@epcas5p3.samsung.com>
2022-04-01 11:03 ` Kanchan Joshi [this message]
2022-04-04 7:07 ` [RFC 4/5] io_uring: add support for big-cqe Christoph Hellwig
2022-04-04 14:04 ` Kanchan Joshi
[not found] ` <CGME20220401110838epcas5p2c1a2e776923dfe5bf65a3e7946820150@epcas5p2.samsung.com>
2022-04-01 11:03 ` [RFC 5/5] nvme: wire-up support for async-passthru on char-device Kanchan Joshi
2022-04-04 7:20 ` Christoph Hellwig
2022-04-04 14:25 ` Kanchan Joshi
2022-04-05 6:02 ` Christoph Hellwig
2022-04-05 15:40 ` Jens Axboe
2022-04-05 15:49 ` Kanchan Joshi
2022-04-06 5:20 ` Kanchan Joshi
2022-04-06 5:23 ` Christoph Hellwig
2022-04-23 17:53 ` Christoph Hellwig
2022-04-25 17:38 ` Kanchan Joshi
2022-04-29 13:16 ` Kanchan Joshi
2022-04-04 7:21 ` [RFC 0/5] big-cqe based uring-passthru Christoph Hellwig
2022-04-05 15:37 ` Kanchan Joshi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox