public inbox for [email protected]
 help / color / mirror / Atom feed
From: Kanchan Joshi <[email protected]>
To: [email protected], [email protected]
Cc: [email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected]
Subject: [RFC 4/5] io_uring: add support for big-cqe
Date: Fri,  1 Apr 2022 16:33:09 +0530	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Add IORING_SETUP_CQE32 flag to allow setting up ring with big-cqe which
is 32 bytes in size. Also modify uring-cmd completion infra to accept
additional result and fill that up in big-cqe.

Signed-off-by: Kanchan Joshi <[email protected]>
Signed-off-by: Anuj Gupta <[email protected]>
---
 fs/io_uring.c                 | 82 +++++++++++++++++++++++++++++------
 include/linux/io_uring.h      | 10 +++--
 include/uapi/linux/io_uring.h | 11 +++++
 3 files changed, 87 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bd0e6b102a7b..b819c0ad47fc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -211,8 +211,8 @@ struct io_mapped_ubuf {
 struct io_ring_ctx;
 
 struct io_overflow_cqe {
-	struct io_uring_cqe cqe;
 	struct list_head list;
+	struct io_uring_cqe cqe; /* this must be kept at end */
 };
 
 struct io_fixed_file {
@@ -1713,6 +1713,13 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
 		return NULL;
 
 	tail = ctx->cached_cq_tail++;
+
+	/* double index for large CQE */
+	if (ctx->flags & IORING_SETUP_CQE32) {
+		mask = 2 * ctx->cq_entries - 1;
+		tail <<= 1;
+	}
+
 	return &rings->cqes[tail & mask];
 }
 
@@ -1792,13 +1799,16 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	while (!list_empty(&ctx->cq_overflow_list)) {
 		struct io_uring_cqe *cqe = io_get_cqe(ctx);
 		struct io_overflow_cqe *ocqe;
+		int cqeshift = 0;
 
 		if (!cqe && !force)
 			break;
+		/* copy more for big-cqe */
+		cqeshift = ctx->flags & IORING_SETUP_CQE32 ? 1 : 0;
 		ocqe = list_first_entry(&ctx->cq_overflow_list,
 					struct io_overflow_cqe, list);
 		if (cqe)
-			memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+			memcpy(cqe, &ocqe->cqe, sizeof(*cqe) << cqeshift);
 		else
 			io_account_cq_overflow(ctx);
 
@@ -1884,11 +1894,17 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
 }
 
 static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
-				     s32 res, u32 cflags)
+				     s32 res, u32 cflags, u64 res2,
+				     int bigcqe)
 {
 	struct io_overflow_cqe *ocqe;
+	int size = sizeof(*ocqe);
+
+	/* allocate more for big-cqe */
+	if (bigcqe)
+		size += sizeof(struct io_uring_cqe);
 
-	ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+	ocqe = kmalloc(size, GFP_ATOMIC | __GFP_ACCOUNT);
 	if (!ocqe) {
 		/*
 		 * If we're in ring overflow flush mode, or in task cancel mode,
@@ -1907,6 +1923,11 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	ocqe->cqe.user_data = user_data;
 	ocqe->cqe.res = res;
 	ocqe->cqe.flags = cflags;
+	if (bigcqe) {
+		struct io_uring_cqe32 *bcqe = (struct io_uring_cqe32 *)&ocqe->cqe;
+
+		bcqe->res2 = res2;
+	}
 	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
 	return true;
 }
@@ -1928,13 +1949,38 @@ static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 		WRITE_ONCE(cqe->flags, cflags);
 		return true;
 	}
-	return io_cqring_event_overflow(ctx, user_data, res, cflags);
+	return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, false);
 }
 
+static inline bool __fill_big_cqe(struct io_ring_ctx *ctx, u64 user_data,
+				 s32 res, u32 cflags, u64 res2)
+{
+	struct io_uring_cqe32 *bcqe;
+
+	/*
+	 * If we can't get a cq entry, userspace overflowed the
+	 * submission (by quite a lot). Increment the overflow count in
+	 * the ring.
+	 */
+	bcqe = (struct io_uring_cqe32 *) io_get_cqe(ctx);
+	if (likely(bcqe)) {
+		WRITE_ONCE(bcqe->cqe.user_data, user_data);
+		WRITE_ONCE(bcqe->cqe.res, res);
+		WRITE_ONCE(bcqe->cqe.flags, cflags);
+		WRITE_ONCE(bcqe->res2, res2);
+		return true;
+	}
+	return io_cqring_event_overflow(ctx, user_data, res, cflags, res2,
+		       true);
+}
 static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
-	return __fill_cqe(req->ctx, req->user_data, res, cflags);
+	if (!(req->ctx->flags & IORING_SETUP_CQE32))
+		return __fill_cqe(req->ctx, req->user_data, res, cflags);
+	else
+		return __fill_big_cqe(req->ctx, req->user_data, res, cflags,
+				req->uring_cmd.res2);
 }
 
 static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
@@ -4126,10 +4172,12 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
  * Called by consumers of io_uring_cmd, if they originally returned
  * -EIOCBQUEUED upon receiving the command.
  */
-void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret)
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
 {
 	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
 
+	/* store secondary result in res2 */
+	req->uring_cmd.res2 = res2;
 	if (ret < 0)
 		req_set_fail(req);
 	io_req_complete(req, ret);
@@ -4163,7 +4211,7 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	/* queued async, consumer will call io_uring_cmd_done() when complete */
 	if (ret == -EIOCBQUEUED)
 		return 0;
-	io_uring_cmd_done(ioucmd, ret);
+	io_uring_cmd_done(ioucmd, ret, 0);
 	return 0;
 }
 
@@ -9026,13 +9074,20 @@ static void *io_mem_alloc(size_t size)
 	return (void *) __get_free_pages(gfp_flags, get_order(size));
 }
 
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
-				size_t *sq_offset)
+static unsigned long rings_size(struct io_uring_params *p,
+		size_t *sq_offset)
 {
+	unsigned sq_entries, cq_entries;
 	struct io_rings *rings;
 	size_t off, sq_array_size;
 
-	off = struct_size(rings, cqes, cq_entries);
+	sq_entries = p->sq_entries;
+	cq_entries = p->cq_entries;
+
+	if (p->flags & IORING_SETUP_CQE32)
+		off = struct_size(rings, cqes, 2 * cq_entries);
+	else
+		off = struct_size(rings, cqes, cq_entries);
 	if (off == SIZE_MAX)
 		return SIZE_MAX;
 
@@ -10483,7 +10538,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 	ctx->sq_entries = p->sq_entries;
 	ctx->cq_entries = p->cq_entries;
 
-	size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+	size = rings_size(p, &sq_array_offset);
 	if (size == SIZE_MAX)
 		return -EOVERFLOW;
 
@@ -10713,7 +10768,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
 			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-			IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128))
+			IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128 |
+			IORING_SETUP_CQE32))
 		return -EINVAL;
 
 	return  io_uring_create(entries, &p, params);
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index cedc68201469..0aba7b50cde6 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -14,7 +14,10 @@ enum io_uring_cmd_flags {
 
 struct io_uring_cmd {
 	struct file     *file;
-	void            *cmd;
+	union {
+		void            *cmd; /* used on submission */
+		u64		res2; /* used on completion */
+	};
 	/* for irq-completion - if driver requires doing stuff in task-context*/
 	void (*driver_cb)(struct io_uring_cmd *cmd);
 	u32             flags;
@@ -25,7 +28,7 @@ struct io_uring_cmd {
 };
 
 #if defined(CONFIG_IO_URING)
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret);
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
 void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 			void (*driver_cb)(struct io_uring_cmd *));
 struct sock *io_uring_get_socket(struct file *file);
@@ -48,7 +51,8 @@ static inline void io_uring_free(struct task_struct *tsk)
 		__io_uring_free(tsk);
 }
 #else
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret)
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+		ssize_t ret2)
 {
 }
 static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d7a4bdb9bf3b..85b8ff046496 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -113,6 +113,7 @@ enum {
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
 #define IORING_SETUP_SQE128	(1U << 7)	/* SQEs are 128b */
+#define IORING_SETUP_CQE32	(1U << 8)	/* CQEs are 32b */
 
 enum {
 	IORING_OP_NOP,
@@ -207,6 +208,16 @@ struct io_uring_cqe {
 	__u32	flags;
 };
 
+/*
+ * If the ring is initializefd with IORING_SETUP_CQE32, we setup large cqe.
+ * Large CQE is created by combining two adjacent regular CQES.
+ */
+struct io_uring_cqe32 {
+	struct io_uring_cqe	cqe;
+	__u64	res2;
+	__u64	unused;
+};
+
 /*
  * cqe->flags
  *
-- 
2.25.1


  parent reply	other threads:[~2022-04-01 14:11 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20220401110829epcas5p39f3cf4d3f6eb8a5c59794787a2b72b15@epcas5p3.samsung.com>
2022-04-01 11:03 ` [RFC 0/5] big-cqe based uring-passthru Kanchan Joshi
     [not found]   ` <CGME20220401110831epcas5p403bacabe8f7e5262356fdc1a2e66df90@epcas5p4.samsung.com>
2022-04-01 11:03     ` [RFC 1/5] io_uring: add support for 128-byte SQEs Kanchan Joshi
     [not found]   ` <CGME20220401110833epcas5p18e828a307a646cef5b7aa429be4396e0@epcas5p1.samsung.com>
2022-04-01 11:03     ` [RFC 2/5] fs: add file_operations->async_cmd() Kanchan Joshi
2022-04-04  7:09       ` Christoph Hellwig
     [not found]   ` <CGME20220401110834epcas5p4d1e5e8d1beb1a6205d670bbcb932bf77@epcas5p4.samsung.com>
2022-04-01 11:03     ` [RFC 3/5] io_uring: add infra and support for IORING_OP_URING_CMD Kanchan Joshi
2022-04-04  7:16       ` Christoph Hellwig
2022-04-04  8:20         ` Pavel Begunkov
2022-04-05  5:58           ` Christoph Hellwig
2022-04-06  6:37             ` Kanchan Joshi
2022-04-04 15:14         ` Kanchan Joshi
2022-04-05  6:00           ` Christoph Hellwig
2022-04-05 16:27             ` Kanchan Joshi
     [not found]   ` <CGME20220401110836epcas5p37bd59ab5a48cf77ca3ac05052a164b0b@epcas5p3.samsung.com>
2022-04-01 11:03     ` Kanchan Joshi [this message]
2022-04-04  7:07       ` [RFC 4/5] io_uring: add support for big-cqe Christoph Hellwig
2022-04-04 14:04         ` Kanchan Joshi
     [not found]   ` <CGME20220401110838epcas5p2c1a2e776923dfe5bf65a3e7946820150@epcas5p2.samsung.com>
2022-04-01 11:03     ` [RFC 5/5] nvme: wire-up support for async-passthru on char-device Kanchan Joshi
2022-04-04  7:20       ` Christoph Hellwig
2022-04-04 14:25         ` Kanchan Joshi
2022-04-05  6:02           ` Christoph Hellwig
2022-04-05 15:40             ` Jens Axboe
2022-04-05 15:49             ` Kanchan Joshi
2022-04-06  5:20               ` Kanchan Joshi
2022-04-06  5:23                 ` Christoph Hellwig
2022-04-23 17:53                 ` Christoph Hellwig
2022-04-25 17:38                   ` Kanchan Joshi
2022-04-29 13:16                     ` Kanchan Joshi
2022-04-04  7:21   ` [RFC 0/5] big-cqe based uring-passthru Christoph Hellwig
2022-04-05 15:37     ` Kanchan Joshi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox