public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCHv5 2/4] Add support IORING_SETUP_SQE_MIXED
  2025-10-13 18:00 [PATCHv5 0/4] liburing: support for mix sized sqe's Keith Busch
@ 2025-10-13 18:00 ` Keith Busch
  0 siblings, 0 replies; 3+ messages in thread
From: Keith Busch @ 2025-10-13 18:00 UTC (permalink / raw)
  To: io-uring, axboe, csander; +Cc: Keith Busch

From: Keith Busch <kbusch@kernel.org>

This adds core support for mixed sized SQEs in the same SQ ring. Before
this, SQEs were either 64b in size (the normal size), or 128b if
IORING_SETUP_SQE128 was set in the ring initialization. With the mixed
support, an SQE may be either 64b or 128b on the same SQ ring. If the
SQE is 128b in size, then a 128b opcode will be set in the sqe op. When
acquiring a large sqe at the end of the sq, the client may post a NOP
SQE with IOSQE_CQE_SKIP_SUCCESS set that the kernel will process and
skip posting a CQE.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 src/include/liburing.h          | 71 +++++++++++++++++++++++++++++++--
 src/include/liburing/io_uring.h |  8 ++++
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/src/include/liburing.h b/src/include/liburing.h
index f7af20aa..d6a45cbb 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -800,6 +800,12 @@ IOURINGINLINE void io_uring_prep_nop(struct io_uring_sqe *sqe)
 	io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
 }
 
+IOURINGINLINE void io_uring_prep_nop128(struct io_uring_sqe *sqe)
+	LIBURING_NOEXCEPT
+{
+	io_uring_prep_rw(IORING_OP_NOP128, sqe, -1, NULL, 0, 0);
+}
+
 IOURINGINLINE void io_uring_prep_timeout(struct io_uring_sqe *sqe,
 					 const struct __kernel_timespec *ts,
 					 unsigned count, unsigned flags)
@@ -1517,12 +1523,13 @@ IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
 }
 
-IOURINGINLINE void io_uring_prep_uring_cmd(struct io_uring_sqe *sqe,
-					   int cmd_op,
-					   int fd)
+IOURINGINLINE void __io_uring_prep_uring_cmd(struct io_uring_sqe *sqe,
+					     int op,
+					     int cmd_op,
+					     int fd)
 	LIBURING_NOEXCEPT
 {
-	sqe->opcode = (__u8) IORING_OP_URING_CMD;
+	sqe->opcode = (__u8) op;
 	sqe->fd = fd;
 	sqe->cmd_op = cmd_op;
 	sqe->__pad1 = 0;
@@ -1530,6 +1537,22 @@ IOURINGINLINE void io_uring_prep_uring_cmd(struct io_uring_sqe *sqe,
 	sqe->len = 0;
 }
 
+IOURINGINLINE void io_uring_prep_uring_cmd(struct io_uring_sqe *sqe,
+					   int cmd_op,
+					   int fd)
+	LIBURING_NOEXCEPT
+{
+	__io_uring_prep_uring_cmd(sqe, IORING_OP_URING_CMD, cmd_op, fd);
+}
+
+IOURINGINLINE void io_uring_prep_uring_cmd128(struct io_uring_sqe *sqe,
+					      int cmd_op,
+					      int fd)
+	LIBURING_NOEXCEPT
+{
+	__io_uring_prep_uring_cmd(sqe, IORING_OP_URING_CMD128, cmd_op, fd);
+}
+
 /*
  * Prepare commands for sockets
  */
@@ -1894,6 +1917,46 @@ IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
 	return sqe;
 }
 
+/*
+ * Return a 128B sqe to fill. Applications must later call io_uring_submit()
+ * when it's ready to tell the kernel about it. The caller may call this
+ * function multiple times before calling io_uring_submit().
+ *
+ * Returns a vacant 128B sqe, or NULL if we're full. If the current tail is the
+ * last entry in the ring, this function will insert a nop + skip complete such
+ * that the 128b entry wraps back to the beginning of the queue for a
+ * contiguous big sq entry. It's up to the caller to use a 128b opcode in order
+ * for the kernel to know how to advance its sq head pointer.
+ */
+IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe128_mixed(struct io_uring *ring)
+	LIBURING_NOEXCEPT
+{
+	struct io_uring_sq *sq = &ring->sq;
+	unsigned head = io_uring_load_sq_head(ring), tail = sq->sqe_tail;
+	struct io_uring_sqe *sqe;
+
+	if (!(ring->flags & IORING_SETUP_SQE_MIXED))
+		return NULL;
+
+	if (((tail + 1) & sq->ring_mask) == 0) {
+		if ((tail + 2) - head >= sq->ring_entries)
+			return NULL;
+
+		sqe = _io_uring_get_sqe(ring);
+		io_uring_prep_nop(sqe);
+		sqe->flags |= IOSQE_CQE_SKIP_SUCCESS;
+		tail = sq->sqe_tail;
+	} else if ((tail + 1) - head >= sq->ring_entries) {
+		return NULL;
+	}
+
+	sqe = &sq->sqes[tail & sq->ring_mask];
+	sq->sqe_tail = tail + 2;
+	io_uring_initialize_sqe(sqe);
+
+	return sqe;
+}
+
 /*
  * Return the appropriate mask for a buffer ring of size 'ring_entries'
  */
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 31396057..f2388645 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -211,6 +211,12 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_CQE_MIXED		(1U << 18)
 
+/*
+ *  Allow both 64b and 128b SQEs. If a 128b SQE is posted, it will have
+ *  IOSQE_SQE_128B set in sqe->flags.
+ */
+#define IORING_SETUP_SQE_MIXED		(1U << 19)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
@@ -275,6 +281,8 @@ enum io_uring_op {
 	IORING_OP_READV_FIXED,
 	IORING_OP_WRITEV_FIXED,
 	IORING_OP_PIPE,
+	IORING_OP_NOP128,
+	IORING_OP_URING_CMD128,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCHv5 2/4] Add support IORING_SETUP_SQE_MIXED
       [not found] ` <20251021213329.784558-3-kbusch@meta.com>
@ 2025-10-21 22:08   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2025-10-21 22:08 UTC (permalink / raw)
  To: Keith Busch, csander, io-uring; +Cc: Keith Busch

(Adding right list)

On 10/21/25 3:33 PM, Keith Busch wrote:
> +/*
> + * Return a 128B sqe to fill. Applications must later call io_uring_submit()
> + * when it's ready to tell the kernel about it. The caller may call this
> + * function multiple times before calling io_uring_submit().
> + *
> + * Returns a vacant 128B sqe, or NULL if we're full. If the current tail is the
> + * last entry in the ring, this function will insert a nop + skip complete such
> + * that the 128b entry wraps back to the beginning of the queue for a
> + * contiguous big sq entry. It's up to the caller to use a 128b opcode in order
> + * for the kernel to know how to advance its sq head pointer.
> + */
> +IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe128_mixed(struct io_uring *ring)
> +	LIBURING_NOEXCEPT
> +{

I would probably just name this io_uring_get_sqe128() and have it work
for both MIXED and SQE128. That would make for a cleaner API for the
application.

> +	struct io_uring_sq *sq = &ring->sq;
> +	unsigned head = io_uring_load_sq_head(ring), tail = sq->sqe_tail;
> +	struct io_uring_sqe *sqe;
> +
> +	if (!(ring->flags & IORING_SETUP_SQE_MIXED))
> +		return NULL;
> +
> +	if (((tail + 1) & sq->ring_mask) == 0) {
> +		if ((tail + 2) - head >= sq->ring_entries)
> +			return NULL;
> +
> +		sqe = _io_uring_get_sqe(ring);
> +		io_uring_prep_nop(sqe);
> +		sqe->flags |= IOSQE_CQE_SKIP_SUCCESS;
> +		tail = sq->sqe_tail;
> +	} else if ((tail + 1) - head >= sq->ring_entries) {
> +		return NULL;
> +	}
> +
> +	sqe = &sq->sqes[tail & sq->ring_mask];
> +	sq->sqe_tail = tail + 2;
> +	io_uring_initialize_sqe(sqe);
> +
> +	return sqe;
> +}

Spurious newline before turn, just group them.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCHv5 4/4] Add mixed sqe test for uring commands
       [not found] ` <20251021213329.784558-5-kbusch@meta.com>
@ 2025-10-21 22:09   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2025-10-21 22:09 UTC (permalink / raw)
  To: Keith Busch, csander, io-uring; +Cc: Keith Busch

> +	ret = io_uring_wait_cqe(ring, &cqe);
> +	if (ret < 0)
> +		fprintf(stderr, "wait completion %d\n", ret);
> +	else if (cqe->user_data != seq)
> +		fprintf(stderr, "Unexpected user_data: %ld\n", (long) cqe->user_data);
> +	else {
> +		io_uring_cqe_seen(ring, cqe);
> +		return T_EXIT_PASS;
> +	}
> +	return T_EXIT_FAIL;

All braces if one has braces. In a few different spots.

Outside of those little nits and the previous comment, I think this is
looking fine.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-10-21 22:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20251021213329.784558-1-kbusch@meta.com>
     [not found] ` <20251021213329.784558-3-kbusch@meta.com>
2025-10-21 22:08   ` [PATCHv5 2/4] Add support IORING_SETUP_SQE_MIXED Jens Axboe
     [not found] ` <20251021213329.784558-5-kbusch@meta.com>
2025-10-21 22:09   ` [PATCHv5 4/4] Add mixed sqe test for uring commands Jens Axboe
2025-10-13 18:00 [PATCHv5 0/4] liburing: support for mix sized sqe's Keith Busch
2025-10-13 18:00 ` [PATCHv5 2/4] Add support IORING_SETUP_SQE_MIXED Keith Busch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox