public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] New IORING_OP_DUP
@ 2026-03-20 18:23 Daniele Di Proietto
  2026-03-20 18:23 ` [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper Daniele Di Proietto
  2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
  0 siblings, 2 replies; 4+ messages in thread
From: Daniele Di Proietto @ 2026-03-20 18:23 UTC (permalink / raw)
  To: io-uring
  Cc: Jens Axboe, Keith Busch, Pavel Begunkov, linux-fsdevel,
	Alexander Viro, Christian Brauner, Jan Kara, Daniele Di Proietto

The new operation is like dup3(). The source file can be a regular file
descriptor or a direct descriptor. The destination is a regular file
descriptor.

The direct descriptor variant is useful to move a descriptor to an fd
and close the existing fd with a single acquisition of the `struct
files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
for multithreaded applications.

Changes since v1:
* Implemented dup to direct descriptors as well
* dup from fd to fd is now atomic
* Punt to io-wq if the operation might sleep
* Removed prep() check on fd
* Avoided use of IOSQE_FIXED_FILE flag

v1: https://lore.kernel.org/io-uring/086190ca-1c34-448f-a565-aa41f671971f@gmail.com/T/#t

Daniele Di Proietto (2):
  io_uring: Extract io_file_get_fixed_node helper
  io_uring: Add IORING_OP_DUP

 fs/file.c                     | 102 ++++++++++++-------
 fs/internal.h                 |   5 +
 include/uapi/linux/io_uring.h |  17 ++++
 io_uring/io_uring.c           |  20 +++-
 io_uring/io_uring.h           |   2 +
 io_uring/opdef.c              |   8 ++
 io_uring/openclose.c          | 180 ++++++++++++++++++++++++++++++++++
 io_uring/openclose.h          |   4 +
 io_uring/splice.c             |   6 +-
 9 files changed, 298 insertions(+), 46 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper
  2026-03-20 18:23 [PATCH v2 0/2] New IORING_OP_DUP Daniele Di Proietto
@ 2026-03-20 18:23 ` Daniele Di Proietto
  2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
  1 sibling, 0 replies; 4+ messages in thread
From: Daniele Di Proietto @ 2026-03-20 18:23 UTC (permalink / raw)
  To: io-uring
  Cc: Jens Axboe, Keith Busch, Pavel Begunkov, linux-fsdevel,
	Alexander Viro, Christian Brauner, Jan Kara, Daniele Di Proietto

This has two users and it's about to have a third in a future commit.

Reading io_slot_flags() and io_slot_file() outside of
io_ring_submit_lock() should be fine after a reference has been
acquired, as those are immutable.

Signed-off-by: Daniele Di Proietto <daniele.di.proietto@gmail.com>
---
 io_uring/io_uring.c | 20 ++++++++++++++++----
 io_uring/io_uring.h |  2 ++
 io_uring/splice.c   |  6 +-----
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a37035e76c0..726245a28b87 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1553,22 +1553,34 @@ void io_wq_submit_work(struct io_wq_work *work)
 		io_req_task_queue_fail(req, ret);
 }
 
-inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
-				      unsigned int issue_flags)
+inline struct io_rsrc_node *io_file_get_fixed_node(struct io_kiocb *req, int fd,
+						   unsigned int issue_flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_rsrc_node *node;
-	struct file *file = NULL;
 
 	io_ring_submit_lock(ctx, issue_flags);
 	node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
 	if (node) {
 		node->refs++;
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	return node;
+}
+
+inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
+				      unsigned int issue_flags)
+{
+	struct io_rsrc_node *node;
+	struct file *file = NULL;
+
+	node = io_file_get_fixed_node(req, fd, issue_flags);
+	if (node) {
 		req->file_node = node;
 		req->flags |= io_slot_flags(node);
 		file = io_slot_file(node);
 	}
-	io_ring_submit_unlock(ctx, issue_flags);
 	return file;
 }
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 0fa844faf287..1ed44201fa77 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -170,6 +170,8 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
 
 unsigned io_linked_nr(struct io_kiocb *req);
 void io_req_track_inflight(struct io_kiocb *req);
+struct io_rsrc_node *io_file_get_fixed_node(struct io_kiocb *req, int fd,
+					    unsigned int issue_flags);
 struct file *io_file_get_normal(struct io_kiocb *req, int fd);
 struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 			       unsigned issue_flags);
diff --git a/io_uring/splice.c b/io_uring/splice.c
index e81ebbb91925..3c5021a46e79 100644
--- a/io_uring/splice.c
+++ b/io_uring/splice.c
@@ -60,22 +60,18 @@ static struct file *io_splice_get_file(struct io_kiocb *req,
 				       unsigned int issue_flags)
 {
 	struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice);
-	struct io_ring_ctx *ctx = req->ctx;
 	struct io_rsrc_node *node;
 	struct file *file = NULL;
 
 	if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
 		return io_file_get_normal(req, sp->splice_fd_in);
 
-	io_ring_submit_lock(ctx, issue_flags);
-	node = io_rsrc_node_lookup(&ctx->file_table.data, sp->splice_fd_in);
+	node = io_file_get_fixed_node(req, sp->splice_fd_in, issue_flags);
 	if (node) {
-		node->refs++;
 		sp->rsrc_node = node;
 		file = io_slot_file(node);
 		req->flags |= REQ_F_NEED_CLEANUP;
 	}
-	io_ring_submit_unlock(ctx, issue_flags);
 	return file;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] io_uring: Add IORING_OP_DUP
  2026-03-20 18:23 [PATCH v2 0/2] New IORING_OP_DUP Daniele Di Proietto
  2026-03-20 18:23 ` [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper Daniele Di Proietto
@ 2026-03-20 18:23 ` Daniele Di Proietto
  2026-03-20 19:34   ` Jens Axboe
  1 sibling, 1 reply; 4+ messages in thread
From: Daniele Di Proietto @ 2026-03-20 18:23 UTC (permalink / raw)
  To: io-uring
  Cc: Jens Axboe, Keith Busch, Pavel Begunkov, linux-fsdevel,
	Alexander Viro, Christian Brauner, Jan Kara, Daniele Di Proietto

The new operation is like dup3(). The source file can be a regular file
descriptor or a direct descriptor. The destination is a regular file
descriptor.

The direct descriptor variant is useful to move a descriptor to an fd
and close the existing fd with a single acquisition of the `struct
files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
for multithreaded applications.

Signed-off-by: Daniele Di Proietto <daniele.di.proietto@gmail.com>
---
 fs/file.c                     | 102 ++++++++++++-------
 fs/internal.h                 |   5 +
 include/uapi/linux/io_uring.h |  17 ++++
 io_uring/opdef.c              |   8 ++
 io_uring/openclose.c          | 180 ++++++++++++++++++++++++++++++++++
 io_uring/openclose.h          |   4 +
 6 files changed, 279 insertions(+), 37 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 384c83ce768d..64d712ef89b5 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -285,9 +285,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
  * Return <0 error code on error; 0 on success.
  * The files->file_lock should be held on entry, and will be held on exit.
  */
-static int expand_files(struct files_struct *files, unsigned int nr)
-	__releases(files->file_lock)
-	__acquires(files->file_lock)
+int expand_files(struct files_struct *files, unsigned int nr)
+	__releases(files->file_lock) __acquires(files->file_lock)
 {
 	struct fdtable *fdt;
 	int error;
@@ -1291,13 +1290,33 @@ bool get_close_on_exec(unsigned int fd)
 	return res;
 }
 
-static int do_dup2(struct files_struct *files,
-	struct file *file, unsigned fd, unsigned flags)
-__releases(&files->file_lock)
+/**
+ * do_replace_fd_locked() - Installs a file on a specific fd number.
+ * @files: struct files_struct to install the file on.
+ * @file: struct file to be installed.
+ * @fd: number in the files table to replace
+ * @flags: the O_* flags to apply to the new fd entry
+ *
+ * Installs a @file on the specific @fd number on the @files table.
+ *
+ * The caller must makes sure that @fd fits inside the @files table, likely via
+ * expand_files().
+ *
+ * Requires holding @files->file_lock.
+ *
+ * This helper handles its own reference counting of the incoming
+ * struct file.
+ *
+ * Returns a preexisting file in @fd, if any, NULL, if none or an error.
+ */
+struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
+				  unsigned int fd, unsigned int flags)
 {
 	struct file *tofree;
 	struct fdtable *fdt;
 
+	lockdep_assert_held(&files->file_lock);
+
 	/*
 	 * dup2() is expected to close the file installed in the target fd slot
 	 * (if any). However, userspace hand-picking a fd may be racing against
@@ -1328,26 +1347,19 @@ __releases(&files->file_lock)
 	fd = array_index_nospec(fd, fdt->max_fds);
 	tofree = rcu_dereference_raw(fdt->fd[fd]);
 	if (!tofree && fd_is_open(fd, fdt))
-		goto Ebusy;
+		return ERR_PTR(-EBUSY);
 	get_file(file);
 	rcu_assign_pointer(fdt->fd[fd], file);
 	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
-	spin_unlock(&files->file_lock);
-
-	if (tofree)
-		filp_close(tofree, files);
-
-	return fd;
 
-Ebusy:
-	spin_unlock(&files->file_lock);
-	return -EBUSY;
+	return tofree;
 }
 
 int replace_fd(unsigned fd, struct file *file, unsigned flags)
 {
-	int err;
 	struct files_struct *files = current->files;
+	struct file *tofree;
+	int err;
 
 	if (!file)
 		return close_fd(fd);
@@ -1359,9 +1371,14 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
 	err = expand_files(files, fd);
 	if (unlikely(err < 0))
 		goto out_unlock;
-	err = do_dup2(files, file, fd, flags);
-	if (err < 0)
-		return err;
+	tofree = do_replace_fd_locked(files, file, fd, flags);
+	spin_unlock(&files->file_lock);
+
+	if (IS_ERR(tofree))
+		return PTR_ERR(tofree);
+
+	if (tofree)
+		filp_close(tofree, files);
 	return 0;
 
 out_unlock:
@@ -1422,11 +1439,29 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
 	return new_fd;
 }
 
-static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
+static struct file *do_dup3(struct files_struct *files, unsigned int oldfd,
+			    unsigned int newfd, int flags)
+	__releases(files->file_lock) __acquires(files->file_lock)
 {
-	int err = -EBADF;
 	struct file *file;
+	int err = 0;
+
+	err = expand_files(files, newfd);
+	file = files_lookup_fd_locked(files, oldfd);
+	if (unlikely(!file))
+		return ERR_PTR(-EBADF);
+	if (err < 0) {
+		if (err == -EMFILE)
+			err = -EBADF;
+		return ERR_PTR(err);
+	}
+	return do_replace_fd_locked(files, file, newfd, flags);
+}
+
+static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
+{
 	struct files_struct *files = current->files;
+	struct file *to_free;
 
 	if ((flags & ~O_CLOEXEC) != 0)
 		return -EINVAL;
@@ -1438,22 +1473,15 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 		return -EBADF;
 
 	spin_lock(&files->file_lock);
-	err = expand_files(files, newfd);
-	file = files_lookup_fd_locked(files, oldfd);
-	if (unlikely(!file))
-		goto Ebadf;
-	if (unlikely(err < 0)) {
-		if (err == -EMFILE)
-			goto Ebadf;
-		goto out_unlock;
-	}
-	return do_dup2(files, file, newfd, flags);
-
-Ebadf:
-	err = -EBADF;
-out_unlock:
+	to_free = do_dup3(files, oldfd, newfd, flags);
 	spin_unlock(&files->file_lock);
-	return err;
+
+	if (IS_ERR(to_free))
+		return PTR_ERR(to_free);
+	if (to_free)
+		filp_close(to_free, files);
+
+	return newfd;
 }
 
 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..c3d1eaf65328 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -197,6 +197,11 @@ extern struct file *do_file_open_root(const struct path *,
 extern struct open_how build_open_how(int flags, umode_t mode);
 extern int build_open_flags(const struct open_how *how, struct open_flags *op);
 struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
+struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
+				  unsigned int fd, unsigned int flags)
+	__must_hold(files->file_lock);
+int expand_files(struct files_struct *files, unsigned int nr)
+	__releases(files->file_lock) __acquires(files->file_lock);
 
 int do_ftruncate(struct file *file, loff_t length, int small);
 int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ff16141c8a5..1612aa2db846 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -74,6 +74,7 @@ struct io_uring_sqe {
 		__u32		install_fd_flags;
 		__u32		nop_flags;
 		__u32		pipe_flags;
+		__u32		dup_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -90,6 +91,7 @@ struct io_uring_sqe {
 		__u32	file_index;
 		__u32	zcrx_ifq_idx;
 		__u32	optlen;
+		__s32	dup_new_fd;
 		struct {
 			__u16	addr_len;
 			__u16	__pad3[1];
@@ -316,6 +318,7 @@ enum io_uring_op {
 	IORING_OP_PIPE,
 	IORING_OP_NOP128,
 	IORING_OP_URING_CMD128,
+	IORING_OP_DUP,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -475,6 +478,20 @@ enum io_uring_msg_ring_flags {
  */
 #define IORING_FIXED_FD_NO_CLOEXEC	(1U << 0)
 
+/*
+ * IORING_OP_DUP flags (sqe->dup_flags)
+ *
+ * IORING_DUP_NO_CLOEXEC	Don't mark the new fd as O_CLOEXEC. Only valid
+ *				if IORING_DUP_NEW_FIXED is not set.
+ * IORING_DUP_OLD_FIXED		sqe->fd (the source) is a fixed descriptor.
+ *				Otherwise it's a regular fd.
+ * IORING_DUP_NEW_FIXED		sqe->dup_new_fd (the destination) is a fixed
+ *				descriptor. Otherwise is a regular fd.
+ */
+#define IORING_DUP_NO_CLOEXEC	(1U << 0)
+#define IORING_DUP_OLD_FIXED	(1U << 1)
+#define IORING_DUP_NEW_FIXED	(1U << 2)
+
 /*
  * IORING_OP_NOP flags (sqe->nop_flags)
  *
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 91a23baf415e..62fe566d2cad 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -599,6 +599,10 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_uring_cmd_prep,
 		.issue			= io_uring_cmd,
 	},
+	[IORING_OP_DUP] = {
+		.prep			= io_dup_prep,
+		.issue			= io_dup,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -857,6 +861,10 @@ const struct io_cold_def io_cold_defs[] = {
 		.sqe_copy		= io_uring_cmd_sqe_copy,
 		.cleanup		= io_uring_cmd_cleanup,
 	},
+	[IORING_OP_DUP] = {
+		.name			= "DUP",
+		.cleanup		= io_dup_cleanup,
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index c71242915dad..2658adbfd17a 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -39,6 +39,14 @@ struct io_fixed_install {
 	unsigned int			o_flags;
 };
 
+struct io_dup {
+	struct file *file;
+	int old_fd;
+	int new_fd;
+	unsigned int flags;
+	struct io_rsrc_node *rsrc_node;
+};
+
 static bool io_openat_force_async(struct io_open *open)
 {
 	/*
@@ -446,3 +454,175 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
 		fput(files[1]);
 	return ret;
 }
+
+void io_dup_cleanup(struct io_kiocb *req)
+{
+	struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+
+	if (id->rsrc_node)
+		io_put_rsrc_node(req->ctx, id->rsrc_node);
+}
+
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_dup *id;
+
+	if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
+		return -EINVAL;
+
+	id = io_kiocb_to_cmd(req, struct io_dup);
+	id->flags = READ_ONCE(sqe->dup_flags);
+	if (id->flags & ~(IORING_DUP_NO_CLOEXEC | IORING_DUP_OLD_FIXED |
+			  IORING_DUP_NEW_FIXED))
+		return -EINVAL;
+
+	if ((id->flags & IORING_DUP_NO_CLOEXEC) &&
+	    (id->flags & IORING_DUP_NEW_FIXED))
+		return -EINVAL;
+
+	id->old_fd = READ_ONCE(sqe->fd);
+	id->new_fd = READ_ONCE(sqe->dup_new_fd);
+
+	if (((id->flags & IORING_DUP_NEW_FIXED) == 0) ==
+		    ((id->flags & IORING_DUP_OLD_FIXED) == 0) &&
+	    id->old_fd == id->new_fd)
+		return -EINVAL;
+
+	id->rsrc_node = NULL;
+
+	/* ensure the task's creds are used when installing/receiving fds */
+	if (req->flags & REQ_F_CREDS)
+		return -EPERM;
+
+	return 0;
+}
+
+static struct file *io_dup_get_old_file_fixed(struct io_kiocb *req,
+					      unsigned int issue_flags,
+					      unsigned int file_slot)
+{
+	struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+	struct file *file = NULL;
+
+	if (!id->rsrc_node)
+		id->rsrc_node =
+			io_file_get_fixed_node(req, file_slot, issue_flags);
+
+	if (id->rsrc_node) {
+		file = io_slot_file(id->rsrc_node);
+		req->flags |= REQ_F_NEED_CLEANUP;
+	}
+	return file;
+}
+
+static int io_dup_to_fixed(struct io_kiocb *req, unsigned int issue_flags,
+			   bool old_fixed, int old_fd, unsigned int file_slot)
+{
+	struct file *old_file = NULL;
+	int ret;
+
+	if (!old_fixed) {
+		old_file = io_file_get_normal(req, old_fd);
+		if (old_file && io_is_uring_fops(old_file)) {
+			fput(old_file);
+			old_file = NULL;
+		}
+	} else {
+		old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
+		if (old_file)
+			get_file(old_file);
+	}
+	if (!old_file)
+		return -EBADF;
+
+	if (file_slot != IORING_FILE_INDEX_ALLOC)
+		file_slot++;
+
+	ret = io_fixed_fd_install(req, issue_flags, old_file, file_slot);
+	if (file_slot == IORING_FILE_INDEX_ALLOC || ret < 0)
+		return ret;
+	return file_slot - 1;
+}
+
+static int io_dup_to_fd(struct io_kiocb *req, unsigned int issue_flags,
+			bool old_fixed, int old_fd, int new_fd, int o_flags)
+{
+	struct file *old_file, *to_replace, *to_close = NULL;
+	bool non_block = issue_flags & IO_URING_F_NONBLOCK;
+	struct files_struct *files = current->files;
+	int ret;
+
+	if (new_fd >= rlimit(RLIMIT_NOFILE))
+		return -EBADF;
+
+	if (old_fixed)
+		old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
+
+	spin_lock(&files->file_lock);
+
+	/* Do we need to expand? If so, be safe and punt to async */
+	if (new_fd >= files_fdtable(files)->max_fds && non_block)
+		goto out_again;
+	ret = expand_files(files, new_fd);
+	if (ret < 0)
+		goto out_unlock;
+
+	if (!old_fixed)
+		old_file = files_lookup_fd_locked(files, old_fd);
+
+	ret = -EBADF;
+	if (!old_file)
+		goto out_unlock;
+
+	to_replace = files_lookup_fd_locked(files, new_fd);
+	if (to_replace) {
+		if (io_is_uring_fops(to_replace))
+			goto out_unlock;
+
+		/* if the file has a flush method, be safe and punt to async */
+		if (to_replace->f_op->flush && non_block)
+			goto out_again;
+	}
+	to_close = do_replace_fd_locked(files, old_file, new_fd, o_flags);
+	ret = new_fd;
+
+out_unlock:
+	spin_unlock(&files->file_lock);
+
+	if (IS_ERR(to_close))
+		ret = PTR_ERR(to_close);
+	else if (to_close)
+		filp_close(to_close, files);
+
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_COMPLETE;
+
+out_again:
+	spin_unlock(&files->file_lock);
+	return -EAGAIN;
+}
+
+int io_dup(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
+	bool old_fixed = id->flags & IORING_DUP_OLD_FIXED;
+	bool new_fixed = id->flags & IORING_DUP_NEW_FIXED;
+	int ret, o_flags;
+
+	if (new_fixed) {
+		ret = io_dup_to_fixed(req, issue_flags, old_fixed, id->old_fd,
+				      id->new_fd);
+		if (ret < 0)
+			req_set_fail(req);
+		io_req_set_res(req, ret, 0);
+		return IOU_COMPLETE;
+	}
+
+	o_flags = O_CLOEXEC;
+	if (id->flags & IORING_DUP_NO_CLOEXEC)
+		o_flags = 0;
+	return io_dup_to_fd(req, issue_flags, old_fixed, id->old_fd, id->new_fd,
+			    o_flags);
+}
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
index 566739920658..95d6a338ac66 100644
--- a/io_uring/openclose.h
+++ b/io_uring/openclose.h
@@ -21,3 +21,7 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags);
 
 int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
+
+void io_dup_cleanup(struct io_kiocb *req);
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_dup(struct io_kiocb *req, unsigned int issue_flags);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 2/2] io_uring: Add IORING_OP_DUP
  2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
@ 2026-03-20 19:34   ` Jens Axboe
  0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2026-03-20 19:34 UTC (permalink / raw)
  To: Daniele Di Proietto, io-uring
  Cc: Keith Busch, Pavel Begunkov, linux-fsdevel, Alexander Viro,
	Christian Brauner, Jan Kara

On 3/20/26 12:23 PM, Daniele Di Proietto wrote:
> diff --git a/fs/file.c b/fs/file.c
> index 384c83ce768d..64d712ef89b5 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -285,9 +285,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
>   * Return <0 error code on error; 0 on success.
>   * The files->file_lock should be held on entry, and will be held on exit.
>   */
> -static int expand_files(struct files_struct *files, unsigned int nr)
> -	__releases(files->file_lock)
> -	__acquires(files->file_lock)
> +int expand_files(struct files_struct *files, unsigned int nr)
> +	__releases(files->file_lock) __acquires(files->file_lock)
>  {
>  	struct fdtable *fdt;
>  	int error;
> @@ -1291,13 +1290,33 @@ bool get_close_on_exec(unsigned int fd)
>  	return res;
>  }
>  
> -static int do_dup2(struct files_struct *files,
> -	struct file *file, unsigned fd, unsigned flags)
> -__releases(&files->file_lock)
> +/**
> + * do_replace_fd_locked() - Installs a file on a specific fd number.
> + * @files: struct files_struct to install the file on.
> + * @file: struct file to be installed.
> + * @fd: number in the files table to replace
> + * @flags: the O_* flags to apply to the new fd entry
> + *
> + * Installs a @file on the specific @fd number on the @files table.
> + *
> + * The caller must makes sure that @fd fits inside the @files table, likely via
> + * expand_files().
> + *
> + * Requires holding @files->file_lock.
> + *
> + * This helper handles its own reference counting of the incoming
> + * struct file.
> + *
> + * Returns a preexisting file in @fd, if any, NULL, if none or an error.
> + */
> +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> +				  unsigned int fd, unsigned int flags)
>  {
>  	struct file *tofree;
>  	struct fdtable *fdt;
>  
> +	lockdep_assert_held(&files->file_lock);
> +
>  	/*
>  	 * dup2() is expected to close the file installed in the target fd slot
>  	 * (if any). However, userspace hand-picking a fd may be racing against
> @@ -1328,26 +1347,19 @@ __releases(&files->file_lock)
>  	fd = array_index_nospec(fd, fdt->max_fds);
>  	tofree = rcu_dereference_raw(fdt->fd[fd]);
>  	if (!tofree && fd_is_open(fd, fdt))
> -		goto Ebusy;
> +		return ERR_PTR(-EBUSY);
>  	get_file(file);
>  	rcu_assign_pointer(fdt->fd[fd], file);
>  	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
> -	spin_unlock(&files->file_lock);
> -
> -	if (tofree)
> -		filp_close(tofree, files);
> -
> -	return fd;
>  
> -Ebusy:
> -	spin_unlock(&files->file_lock);
> -	return -EBUSY;
> +	return tofree;
>  }
>  
>  int replace_fd(unsigned fd, struct file *file, unsigned flags)
>  {
> -	int err;
>  	struct files_struct *files = current->files;
> +	struct file *tofree;
> +	int err;
>  
>  	if (!file)
>  		return close_fd(fd);
> @@ -1359,9 +1371,14 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
>  	err = expand_files(files, fd);
>  	if (unlikely(err < 0))
>  		goto out_unlock;
> -	err = do_dup2(files, file, fd, flags);
> -	if (err < 0)
> -		return err;
> +	tofree = do_replace_fd_locked(files, file, fd, flags);
> +	spin_unlock(&files->file_lock);
> +
> +	if (IS_ERR(tofree))
> +		return PTR_ERR(tofree);
> +
> +	if (tofree)
> +		filp_close(tofree, files);
>  	return 0;
>  
>  out_unlock:
> @@ -1422,11 +1439,29 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
>  	return new_fd;
>  }
>  
> -static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> +static struct file *do_dup3(struct files_struct *files, unsigned int oldfd,
> +			    unsigned int newfd, int flags)
> +	__releases(files->file_lock) __acquires(files->file_lock)
>  {
> -	int err = -EBADF;
>  	struct file *file;
> +	int err = 0;
> +
> +	err = expand_files(files, newfd);
> +	file = files_lookup_fd_locked(files, oldfd);
> +	if (unlikely(!file))
> +		return ERR_PTR(-EBADF);
> +	if (err < 0) {
> +		if (err == -EMFILE)
> +			err = -EBADF;
> +		return ERR_PTR(err);
> +	}
> +	return do_replace_fd_locked(files, file, newfd, flags);
> +}
> +
> +static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
> +{
>  	struct files_struct *files = current->files;
> +	struct file *to_free;
>  
>  	if ((flags & ~O_CLOEXEC) != 0)
>  		return -EINVAL;
> @@ -1438,22 +1473,15 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
>  		return -EBADF;
>  
>  	spin_lock(&files->file_lock);
> -	err = expand_files(files, newfd);
> -	file = files_lookup_fd_locked(files, oldfd);
> -	if (unlikely(!file))
> -		goto Ebadf;
> -	if (unlikely(err < 0)) {
> -		if (err == -EMFILE)
> -			goto Ebadf;
> -		goto out_unlock;
> -	}
> -	return do_dup2(files, file, newfd, flags);
> -
> -Ebadf:
> -	err = -EBADF;
> -out_unlock:
> +	to_free = do_dup3(files, oldfd, newfd, flags);
>  	spin_unlock(&files->file_lock);
> -	return err;
> +
> +	if (IS_ERR(to_free))
> +		return PTR_ERR(to_free);
> +	if (to_free)
> +		filp_close(to_free, files);
> +
> +	return newfd;
>  }
>  
>  SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
> diff --git a/fs/internal.h b/fs/internal.h
> index cbc384a1aa09..c3d1eaf65328 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -197,6 +197,11 @@ extern struct file *do_file_open_root(const struct path *,
>  extern struct open_how build_open_how(int flags, umode_t mode);
>  extern int build_open_flags(const struct open_how *how, struct open_flags *op);
>  struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
> +struct file *do_replace_fd_locked(struct files_struct *files, struct file *file,
> +				  unsigned int fd, unsigned int flags)
> +	__must_hold(files->file_lock);
> +int expand_files(struct files_struct *files, unsigned int nr)
> +	__releases(files->file_lock) __acquires(files->file_lock);
>  
>  int do_ftruncate(struct file *file, loff_t length, int small);
>  int do_sys_ftruncate(unsigned int fd, loff_t length, int small);

All of the above should be a separate prep patch, not part of the
io_uring patch adding IORING_OP_DUP.

> diff --git a/io_uring/openclose.c b/io_uring/openclose.c
> index c71242915dad..2658adbfd17a 100644
> --- a/io_uring/openclose.c
> +++ b/io_uring/openclose.c
> @@ -446,3 +454,175 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
>  		fput(files[1]);
>  	return ret;
>  }
> +
> +void io_dup_cleanup(struct io_kiocb *req)
> +{
> +	struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> +
> +	if (id->rsrc_node)
> +		io_put_rsrc_node(req->ctx, id->rsrc_node);
> +}

Probably doesn't hurt to be defensive and clear ->rsrc_node when put.

> +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_dup *id;
> +
> +	if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
> +		return -EINVAL;
> +
> +	id = io_kiocb_to_cmd(req, struct io_dup);
> +	id->flags = READ_ONCE(sqe->dup_flags);
> +	if (id->flags & ~(IORING_DUP_NO_CLOEXEC | IORING_DUP_OLD_FIXED |
> +			  IORING_DUP_NEW_FIXED))
> +		return -EINVAL;

Would be cleaner with an IORING_DUP_FLAGS mask above io_dup_prep().

> +static struct file *io_dup_get_old_file_fixed(struct io_kiocb *req,
> +					      unsigned int issue_flags,
> +					      unsigned int file_slot)
> +{
> +	struct io_dup *id = io_kiocb_to_cmd(req, struct io_dup);
> +	struct file *file = NULL;
> +
> +	if (!id->rsrc_node)
> +		id->rsrc_node =
> +			io_file_get_fixed_node(req, file_slot, issue_flags);

Just use the full line length, io_uring isn't a stickler on 80 chars and
it'd read better as:

	if (!id->rsrc_node)
		id->rsrc_node = io_file_get_fixed_node(req, file_slot, issue_flags);

Need to check further, but I'm assuming the difference here is for retry
where the node could already be assigned?


> +static int io_dup_to_fd(struct io_kiocb *req, unsigned int issue_flags,
> +			bool old_fixed, int old_fd, int new_fd, int o_flags)
> +{
> +	struct file *old_file, *to_replace, *to_close = NULL;
> +	bool non_block = issue_flags & IO_URING_F_NONBLOCK;
> +	struct files_struct *files = current->files;
> +	int ret;
> +
> +	if (new_fd >= rlimit(RLIMIT_NOFILE))
> +		return -EBADF;
> +
> +	if (old_fixed)
> +		old_file = io_dup_get_old_file_fixed(req, issue_flags, old_fd);
> +
> +	spin_lock(&files->file_lock);

If you use:

	guard(spinlock)(&files->file_lock);

and move:

> +
> +	/* Do we need to expand? If so, be safe and punt to async */
> +	if (new_fd >= files_fdtable(files)->max_fds && non_block)
> +		goto out_again;
> +	ret = expand_files(files, new_fd);
> +	if (ret < 0)
> +		goto out_unlock;
> +
> +	if (!old_fixed)
> +		old_file = files_lookup_fd_locked(files, old_fd);
> +
> +	ret = -EBADF;
> +	if (!old_file)
> +		goto out_unlock;
> +
> +	to_replace = files_lookup_fd_locked(files, new_fd);
> +	if (to_replace) {
> +		if (io_is_uring_fops(to_replace))
> +			goto out_unlock;
> +
> +		/* if the file has a flush method, be safe and punt to async */
> +		if (to_replace->f_op->flush && non_block)
> +			goto out_again;
> +	}
> +	to_close = do_replace_fd_locked(files, old_file, new_fd, o_flags);
> +	ret = new_fd;

into a helper, all of this somewhat messy goto error_handling stuff can
go away.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-03-20 19:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-20 18:23 [PATCH v2 0/2] New IORING_OP_DUP Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 1/2] io_uring: Extract io_file_get_fixed_node helper Daniele Di Proietto
2026-03-20 18:23 ` [PATCH v2 2/2] io_uring: Add IORING_OP_DUP Daniele Di Proietto
2026-03-20 19:34   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox