* [PATCH 1/2] io_uring: split out fixed file installation and removal
2022-06-17 13:45 [PATCHSET RFC for-next 0/2] Add direct descriptor ring passing Jens Axboe
@ 2022-06-17 13:45 ` Jens Axboe
2022-06-17 13:45 ` [PATCH 2/2] io_uring: add support for passing fixed file descriptors Jens Axboe
1 sibling, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2022-06-17 13:45 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
Put it with the filetable code, which is where it belongs. While doing
so, have the helpers take a ctx rather than an io_kiocb. It doesn't make
sense to use a request, as it's not an operation on the request itself.
It applies to the ring itself.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/filetable.c | 72 +++++++++++++++++++++++++++++++++-----------
io_uring/filetable.h | 3 ++
io_uring/openclose.c | 35 +++------------------
io_uring/openclose.h | 2 +-
io_uring/rsrc.c | 2 +-
5 files changed, 63 insertions(+), 51 deletions(-)
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index 534e1a3c625d..abaa5ba7f655 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -58,11 +58,10 @@ void io_free_file_tables(struct io_file_table *table)
table->bitmap = NULL;
}
-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
- unsigned int issue_flags, u32 slot_index)
+static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
+ u32 slot_index)
__must_hold(&req->ctx->uring_lock)
{
- struct io_ring_ctx *ctx = req->ctx;
bool needs_switch = false;
struct io_fixed_file *file_slot;
int ret;
@@ -108,6 +107,26 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
return ret;
}
+int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
+ unsigned int file_slot)
+{
+ bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
+ int ret;
+
+ if (alloc_slot) {
+ ret = io_file_bitmap_get(ctx);
+ if (unlikely(ret < 0))
+ return ret;
+ file_slot = ret;
+ } else {
+ file_slot--;
+ }
+
+ ret = io_install_fixed_file(ctx, file, file_slot);
+ if (!ret && alloc_slot)
+ ret = file_slot;
+ return ret;
+}
/*
* Note when io_fixed_fd_install() returns error value, it will ensure
* fput() is called correspondingly.
@@ -115,27 +134,44 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot)
{
- bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
struct io_ring_ctx *ctx = req->ctx;
int ret;
io_ring_submit_lock(ctx, issue_flags);
-
- if (alloc_slot) {
- ret = io_file_bitmap_get(ctx);
- if (unlikely(ret < 0))
- goto err;
- file_slot = ret;
- } else {
- file_slot--;
- }
-
- ret = io_install_fixed_file(req, file, issue_flags, file_slot);
- if (!ret && alloc_slot)
- ret = file_slot;
-err:
+ ret = __io_fixed_fd_install(ctx, file, file_slot);
io_ring_submit_unlock(ctx, issue_flags);
+
if (unlikely(ret < 0))
fput(file);
return ret;
}
+
+int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
+{
+ struct io_fixed_file *file_slot;
+ struct file *file;
+ int ret;
+
+ if (unlikely(!ctx->file_data))
+ return -ENXIO;
+ if (offset >= ctx->nr_user_files)
+ return -EINVAL;
+ ret = io_rsrc_node_switch_start(ctx);
+ if (ret)
+ return ret;
+
+ offset = array_index_nospec(offset, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, offset);
+ if (!file_slot->file_ptr)
+ return -EBADF;
+
+ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+ ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+ if (ret)
+ return ret;
+
+ file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, offset);
+ io_rsrc_node_switch(ctx, ctx->file_data);
+ return 0;
+}
diff --git a/io_uring/filetable.h b/io_uring/filetable.h
index 6b58aa48bc45..0b050f3c331e 100644
--- a/io_uring/filetable.h
+++ b/io_uring/filetable.h
@@ -27,6 +27,9 @@ void io_free_file_tables(struct io_file_table *table);
int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot);
+int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
+ unsigned int file_slot);
+int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset);
unsigned int io_file_get_flags(struct file *file);
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 099a5ec84dfd..d1818ec9169b 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -173,42 +173,15 @@ void io_open_cleanup(struct io_kiocb *req)
putname(open->filename);
}
-int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+int __io_close_fixed(struct io_ring_ctx *ctx, unsigned int issue_flags,
unsigned int offset)
{
- struct io_ring_ctx *ctx = req->ctx;
- struct io_fixed_file *file_slot;
- struct file *file;
int ret;
io_ring_submit_lock(ctx, issue_flags);
- ret = -ENXIO;
- if (unlikely(!ctx->file_data))
- goto out;
- ret = -EINVAL;
- if (offset >= ctx->nr_user_files)
- goto out;
- ret = io_rsrc_node_switch_start(ctx);
- if (ret)
- goto out;
-
- offset = array_index_nospec(offset, ctx->nr_user_files);
- file_slot = io_fixed_file_slot(&ctx->file_table, offset);
- ret = -EBADF;
- if (!file_slot->file_ptr)
- goto out;
-
- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
- if (ret)
- goto out;
-
- file_slot->file_ptr = 0;
- io_file_bitmap_clear(&ctx->file_table, offset);
- io_rsrc_node_switch(ctx, ctx->file_data);
- ret = 0;
-out:
+ ret = io_fixed_fd_remove(ctx, offset);
io_ring_submit_unlock(ctx, issue_flags);
+
return ret;
}
@@ -216,7 +189,7 @@ static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_close *close = io_kiocb_to_cmd(req);
- return __io_close_fixed(req, issue_flags, close->file_slot - 1);
+ return __io_close_fixed(req->ctx, issue_flags, close->file_slot - 1);
}
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
index 9f578f3fad87..4b1c28d3a66c 100644
--- a/io_uring/openclose.h
+++ b/io_uring/openclose.h
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+int __io_close_fixed(struct io_ring_ctx *ctx, unsigned int issue_flags,
unsigned int offset);
int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index c10c512aa71b..3340aa9e3fab 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -700,7 +700,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
if (ret < 0)
break;
if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
- __io_close_fixed(req, issue_flags, ret);
+ __io_close_fixed(req->ctx, issue_flags, ret);
ret = -EFAULT;
break;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] io_uring: add support for passing fixed file descriptors
2022-06-17 13:45 [PATCHSET RFC for-next 0/2] Add direct descriptor ring passing Jens Axboe
2022-06-17 13:45 ` [PATCH 1/2] io_uring: split out fixed file installation and removal Jens Axboe
@ 2022-06-17 13:45 ` Jens Axboe
2022-06-18 11:02 ` Hao Xu
1 sibling, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2022-06-17 13:45 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
With IORING_OP_MSG_RING, one ring can send a message to another ring.
Extend that support to also allow sending a fixed file descriptor to
that ring, enabling one ring to pass a registered descriptor to another
one.
Arguments are extended to pass in:
sqe->addr3 fixed file slot in source ring
sqe->file_index fixed file slot in destination ring
IORING_OP_MSG_RING is extended to take a command argument in sqe->addr.
If set to zero (or IORING_MSG_DATA), it sends just a message like before.
If set to IORING_MSG_SEND_FD, a fixed file descriptor is sent according
to the above arguments.
Undecided:
- Should we post a cqe with the send, or require that the sender
just link a separate IORING_OP_MSG_RING? This makes error
handling easier, as we cannot easily retract the installed
file descriptor if the target CQ ring is full. Right now we do
fill a CQE. If the request completes with -EOVERFLOW, then the
sender must re-send a CQE if the target must get notified.
- Add an IORING_MSG_MOVE_FD which moves the descriptor, removing
it from the source ring when installed in the target? Again
error handling is difficult.
Signed-off-by: Jens Axboe <[email protected]>
---
include/uapi/linux/io_uring.h | 8 +++
io_uring/msg_ring.c | 122 ++++++++++++++++++++++++++++++++--
2 files changed, 123 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8715f0942ec2..dbdaeef3ea89 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -264,6 +264,14 @@ enum io_uring_op {
*/
#define IORING_ACCEPT_MULTISHOT (1U << 0)
+/*
+ * IORING_OP_MSG_RING command types, stored in sqe->addr
+ */
+enum {
+ IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
+ IORING_MSG_SEND_FD, /* send a registered fd to another ring */
+};
+
/*
* IO completion data structure (Completion Queue Entry)
*/
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index b02be2349652..e9d6fb25d141 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -3,46 +3,154 @@
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
+#include "rsrc.h"
+#include "filetable.h"
#include "msg_ring.h"
struct io_msg {
struct file *file;
u64 user_data;
u32 len;
+ u32 cmd;
+ u32 src_fd;
+ u32 dst_fd;
};
+static int io_msg_ring_data(struct io_kiocb *req)
+{
+ struct io_ring_ctx *target_ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req);
+
+ if (msg->src_fd || msg->dst_fd)
+ return -EINVAL;
+
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ return 0;
+
+ return -EOVERFLOW;
+}
+
+static void io_double_unlock_ctx(struct io_ring_ctx *ctx,
+ struct io_ring_ctx *octx,
+ unsigned int issue_flags)
+{
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_unlock(&ctx->uring_lock);
+ mutex_unlock(&octx->uring_lock);
+}
+
+static int io_double_lock_ctx(struct io_ring_ctx *ctx,
+ struct io_ring_ctx *octx,
+ unsigned int issue_flags)
+{
+ /*
+ * To ensure proper ordering between the two ctxs, we can only
+ * attempt a trylock on the target. If that fails and we already have
+ * the source ctx lock, punt to io-wq.
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ if (!mutex_trylock(&octx->uring_lock))
+ return -EAGAIN;
+ return 0;
+ }
+
+ /* Always grab smallest value ctx first. */
+ if (ctx < octx) {
+ mutex_lock(&ctx->uring_lock);
+ mutex_lock(&octx->uring_lock);
+ } else if (ctx > octx) {
+ mutex_lock(&octx->uring_lock);
+ mutex_lock(&ctx->uring_lock);
+ }
+
+ return 0;
+}
+
+static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ring_ctx *target_ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req);
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned long file_ptr;
+ struct file *src_file;
+ int ret;
+
+ if (target_ctx == ctx)
+ return -EINVAL;
+
+ ret = io_double_lock_ctx(ctx, target_ctx, issue_flags);
+ if (unlikely(ret))
+ return ret;
+
+ ret = -EBADF;
+ if (unlikely(msg->src_fd >= ctx->nr_user_files))
+ goto err_unlock;
+
+ msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files);
+ file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr;
+ src_file = (struct file *) (file_ptr & FFS_MASK);
+ get_file(src_file);
+
+ ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
+ if (ret < 0) {
+ fput(src_file);
+ goto err_unlock;
+ }
+
+ /*
+ * If this fails, the target still received the file descriptor but
+ * wasn't notified of the fact. This means that if this request
+ * completes with -EOVERFLOW, then the sender must ensure that a
+ * later IORING_OP_MSG_RING delivers the message.
+ */
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = -EOVERFLOW;
+err_unlock:
+ io_double_unlock_ctx(ctx, target_ctx, issue_flags);
+ return ret;
+}
+
int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_msg *msg = io_kiocb_to_cmd(req);
- if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
- sqe->buf_index || sqe->personality))
+ if (unlikely(sqe->rw_flags || sqe->buf_index || sqe->personality))
return -EINVAL;
msg->user_data = READ_ONCE(sqe->off);
msg->len = READ_ONCE(sqe->len);
+ msg->cmd = READ_ONCE(sqe->addr);
+ msg->src_fd = READ_ONCE(sqe->addr3);
+ msg->dst_fd = READ_ONCE(sqe->file_index);
return 0;
}
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_msg *msg = io_kiocb_to_cmd(req);
- struct io_ring_ctx *target_ctx;
int ret;
ret = -EBADFD;
if (!io_is_uring_fops(req->file))
goto done;
- ret = -EOVERFLOW;
- target_ctx = req->file->private_data;
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = 0;
+ switch (msg->cmd) {
+ case IORING_MSG_DATA:
+ ret = io_msg_ring_data(req);
+ break;
+ case IORING_MSG_SEND_FD:
+ ret = io_msg_send_fd(req, issue_flags);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
done:
if (ret < 0)
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread