* [PATCH] io_uring: add support for IORING_OP_MSG_RING command
@ 2022-03-10 13:24 Jens Axboe
0 siblings, 0 replies; only message in thread
From: Jens Axboe @ 2022-03-10 13:24 UTC (permalink / raw)
To: io-uring; +Cc: Artyom Pavlov
This adds support for IORING_OP_MSG_RING, which allows an SQE to signal
another ring. That allows either waking up someone waiting on the ring,
or even passing a 64-bit value via the user_data field in the CQE.
sqe->fd must point to the fd of a ring that should receive the CQE.
sqe->off will be propagated to the cqe->user_data on the target ring,
and the CQE will have IORING_CQE_F_MSG set in its flags to indicate that
this CQE was generated from a messaging request rather than a SQE issued
locally on that ring. cqe->res will contain the pid/tid of the
application that sent the request.
This request type has the following request specific error cases:
- -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is
of the io_uring type.
- -EOVERFLOW. Set if the target rings CQ ring was in an overflow state
and we could not post the msssage.
Signed-off-by: Jens Axboe <[email protected]>
---
There's a test case in the liburing wakeup-ring branch:
https://git.kernel.dk/cgit/liburing/log/?h=wakeup-ring
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ea5356599cb..941d513f50cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -707,6 +707,11 @@ struct io_hardlink {
int flags;
};
+struct io_msg {
+ struct file *file;
+ u64 user_data;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -872,6 +877,7 @@ struct io_kiocb {
struct io_mkdir mkdir;
struct io_symlink symlink;
struct io_hardlink hardlink;
+ struct io_msg msg;
};
u8 opcode;
@@ -1122,6 +1128,9 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_MKDIRAT] = {},
[IORING_OP_SYMLINKAT] = {},
[IORING_OP_LINKAT] = {},
+ [IORING_OP_MSG_RING] = {
+ .needs_file = 1,
+ },
};
/* requests with any of those set should undergo io_disarm_next() */
@@ -4356,6 +4365,46 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_msg_ring_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || sqe->len ||
+ sqe->rw_flags || sqe->splice_fd_in || sqe->buf_index ||
+ sqe->personality))
+ return -EINVAL;
+
+ if (req->file->f_op != &io_uring_fops)
+ return -EBADFD;
+
+ req->msg.user_data = READ_ONCE(sqe->off);
+ return 0;
+}
+
+static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ring_ctx *target_ctx;
+ struct io_uring_cqe *cqe;
+ int ret = -EOVERFLOW;
+
+ target_ctx = req->file->private_data;
+ spin_lock(&target_ctx->completion_lock);
+ cqe = io_get_cqe(target_ctx);
+ if (cqe) {
+ ret = 0;
+ WRITE_ONCE(cqe->user_data, req->msg.user_data);
+ WRITE_ONCE(cqe->res, current->pid);
+ WRITE_ONCE(cqe->flags, IORING_CQE_F_MSG);
+ trace_io_uring_complete(target_ctx, NULL, cqe->user_data,
+ cqe->res, cqe->flags);
+ }
+ io_commit_cqring(target_ctx);
+ spin_unlock(&target_ctx->completion_lock);
+ io_cqring_ev_posted(target_ctx);
+
+ __io_req_complete(req, issue_flags, ret, 0);
+ return 0;
+}
+
static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -6734,6 +6783,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_symlinkat_prep(req, sqe);
case IORING_OP_LINKAT:
return io_linkat_prep(req, sqe);
+ case IORING_OP_MSG_RING:
+ return io_msg_ring_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7017,6 +7068,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_LINKAT:
ret = io_linkat(req, issue_flags);
break;
+ case IORING_OP_MSG_RING:
+ ret = io_msg_ring(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 42b2fe84dbcd..8bd4bfdd9a89 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -143,6 +143,7 @@ enum {
IORING_OP_MKDIRAT,
IORING_OP_SYMLINKAT,
IORING_OP_LINKAT,
+ IORING_OP_MSG_RING,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -199,9 +200,11 @@ struct io_uring_cqe {
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_MSG (1U << 2)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
--
Jens Axboe
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2022-03-10 13:24 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-10 13:24 [PATCH] io_uring: add support for IORING_OP_MSG_RING command Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox