public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] io_uring: Add IORING_OP_DUP
@ 2026-03-10 15:49 Daniele Di Proietto
  2026-03-10 16:14 ` Keith Busch
  2026-03-10 16:24 ` Jens Axboe
  0 siblings, 2 replies; 5+ messages in thread
From: Daniele Di Proietto @ 2026-03-10 15:49 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe, Daniele Di Proietto

The new operation is like dup3(). The source file can be a regular file
descriptor or a direct descriptor. The destination is a regular file
descriptor.

The direct descriptor variant is useful to move a descriptor to an fd
and close the existing fd with a single acquisition of the `struct
files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
for multithreaded applications.

Signed-off-by: Daniele Di Proietto <daniele.di.proietto@gmail.com>
---
 include/uapi/linux/io_uring.h | 10 +++++++
 io_uring/opdef.c              |  8 ++++++
 io_uring/openclose.c          | 49 +++++++++++++++++++++++++++++++++++
 io_uring/openclose.h          |  3 +++
 4 files changed, 70 insertions(+)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ff16141c8a5..472bebeb569d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -74,6 +74,7 @@ struct io_uring_sqe {
 		__u32		install_fd_flags;
 		__u32		nop_flags;
 		__u32		pipe_flags;
+		__u32		dup_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -90,6 +91,7 @@ struct io_uring_sqe {
 		__u32	file_index;
 		__u32	zcrx_ifq_idx;
 		__u32	optlen;
+		__s32	dup_new_fd;
 		struct {
 			__u16	addr_len;
 			__u16	__pad3[1];
@@ -316,6 +318,7 @@ enum io_uring_op {
 	IORING_OP_PIPE,
 	IORING_OP_NOP128,
 	IORING_OP_URING_CMD128,
+	IORING_OP_DUP,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -475,6 +478,13 @@ enum io_uring_msg_ring_flags {
  */
 #define IORING_FIXED_FD_NO_CLOEXEC	(1U << 0)
 
+/*
+ * IORING_OP_DUP flags (sqe->dup_flags)
+ *
+ * IORING_DUP_NO_CLOEXEC	Don't mark the fd as O_CLOEXEC
+ */
+#define IORING_DUP_NO_CLOEXEC (1U << 0)
+
 /*
  * IORING_OP_NOP flags (sqe->nop_flags)
  *
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 91a23baf415e..34103b9108f6 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -599,6 +599,11 @@ const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_uring_cmd_prep,
 		.issue			= io_uring_cmd,
 	},
+	[IORING_OP_DUP] = {
+		.needs_file		= 1,
+		.prep			= io_dup_prep,
+		.issue			= io_dup,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -857,6 +862,9 @@ const struct io_cold_def io_cold_defs[] = {
 		.sqe_copy		= io_uring_cmd_sqe_copy,
 		.cleanup		= io_uring_cmd_cleanup,
 	},
+	[IORING_OP_DUP] = {
+		.name			= "DUP",
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index c71242915dad..f7a6d45cba17 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -39,6 +39,12 @@ struct io_fixed_install {
 	unsigned int			o_flags;
 };
 
+struct io_dup {
+	struct file *file;
+	int new_fd;
+	unsigned int o_flags;
+};
+
 static bool io_openat_force_async(struct io_open *open)
 {
 	/*
@@ -446,3 +452,46 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
 		fput(files[1]);
 	return ret;
 }
+
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	unsigned int flags;
+	struct io_dup *id;
+	int new_fd;
+
+	if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
+		return -EINVAL;
+
+	flags = READ_ONCE(sqe->dup_flags);
+	if (flags & ~IORING_DUP_NO_CLOEXEC)
+		return -EINVAL;
+
+	new_fd = READ_ONCE(sqe->dup_new_fd);
+	if (new_fd < 0)
+		return -EBADF;
+
+	/* ensure the task's creds are used when installing/receiving fds */
+	if (req->flags & REQ_F_CREDS)
+		return -EPERM;
+
+	id = io_kiocb_to_cmd(req, struct io_dup);
+	id->o_flags = O_CLOEXEC;
+	if (flags & IORING_DUP_NO_CLOEXEC)
+		id->o_flags = 0;
+	id->new_fd = new_fd;
+
+	return 0;
+}
+
+int io_dup(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_dup *id;
+	int ret;
+
+	id = io_kiocb_to_cmd(req, struct io_dup);
+	ret = replace_fd(id->new_fd, id->file, id->o_flags);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_COMPLETE;
+}
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
index 566739920658..86c91ad33714 100644
--- a/io_uring/openclose.h
+++ b/io_uring/openclose.h
@@ -21,3 +21,6 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags);
 
 int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_dup(struct io_kiocb *req, unsigned int issue_flags);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: Add IORING_OP_DUP
  2026-03-10 15:49 [PATCH] io_uring: Add IORING_OP_DUP Daniele Di Proietto
@ 2026-03-10 16:14 ` Keith Busch
  2026-03-10 18:42   ` Daniele Di Proietto
  2026-03-10 16:24 ` Jens Axboe
  1 sibling, 1 reply; 5+ messages in thread
From: Keith Busch @ 2026-03-10 16:14 UTC (permalink / raw)
  To: Daniele Di Proietto; +Cc: io-uring, Jens Axboe

On Tue, Mar 10, 2026 at 03:49:33PM +0000, Daniele Di Proietto wrote:
> +int io_dup(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +	struct io_dup *id;
> +	int ret;
> +
> +	id = io_kiocb_to_cmd(req, struct io_dup);
> +	ret = replace_fd(id->new_fd, id->file, id->o_flags);

It looks like there are a few conditions where replace_fd may block,
so it may be a problem to call it from the uring enter context since it
will block progress through the sq ring for subsequent commands.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: Add IORING_OP_DUP
  2026-03-10 15:49 [PATCH] io_uring: Add IORING_OP_DUP Daniele Di Proietto
  2026-03-10 16:14 ` Keith Busch
@ 2026-03-10 16:24 ` Jens Axboe
  2026-03-10 18:42   ` Daniele Di Proietto
  1 sibling, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2026-03-10 16:24 UTC (permalink / raw)
  To: Daniele Di Proietto, io-uring

On 3/10/26 9:49 AM, Daniele Di Proietto wrote:
> The new operation is like dup3(). The source file can be a regular file
> descriptor or a direct descriptor. The destination is a regular file
> descriptor.
> 
> The direct descriptor variant is useful to move a descriptor to an fd
> and close the existing fd with a single acquisition of the `struct
> files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
> IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
> for multithreaded applications.

Overall comment - how does this interact with direct descriptors? Feels
like this should support both, rather than just normal file descriptors.

> @@ -446,3 +452,46 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
>  		fput(files[1]);
>  	return ret;
>  }
> +
> +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	unsigned int flags;
> +	struct io_dup *id;
> +	int new_fd;
> +
> +	if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
> +		return -EINVAL;
> +
> +	flags = READ_ONCE(sqe->dup_flags);
> +	if (flags & ~IORING_DUP_NO_CLOEXEC)
> +		return -EINVAL;
> +
> +	new_fd = READ_ONCE(sqe->dup_new_fd);
> +	if (new_fd < 0)
> +		return -EBADF;

Is this necessary? Yes it'll help fail early, but do we care about that?

> +	/* ensure the task's creds are used when installing/receiving fds */
> +	if (req->flags & REQ_F_CREDS)
> +		return -EPERM;

Not sure that's sane. Let's say you mark this request as IOSQE_ASYNC,
then it'd fail even if REQ_F_CREDS would then be set, and creds would
match the original task.


> +
> +	id = io_kiocb_to_cmd(req, struct io_dup);
> +	id->o_flags = O_CLOEXEC;
> +	if (flags & IORING_DUP_NO_CLOEXEC)
> +		id->o_flags = 0;
> +	id->new_fd = new_fd;
> +
> +	return 0;
> +}
> +
> +int io_dup(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +	struct io_dup *id;
> +	int ret;
> +
> +	id = io_kiocb_to_cmd(req, struct io_dup);
> +	ret = replace_fd(id->new_fd, id->file, id->o_flags);
> +	if (ret < 0)
> +		req_set_fail(req);
> +	io_req_set_res(req, ret, 0);
> +	return IOU_COMPLETE;

And like Keith said here, we might need to punt it to io-wq if the file
has a ->flush() method.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: Add IORING_OP_DUP
  2026-03-10 16:24 ` Jens Axboe
@ 2026-03-10 18:42   ` Daniele Di Proietto
  0 siblings, 0 replies; 5+ messages in thread
From: Daniele Di Proietto @ 2026-03-10 18:42 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Tue, Mar 10, 2026 at 4:24 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 3/10/26 9:49 AM, Daniele Di Proietto wrote:
> > The new operation is like dup3(). The source file can be a regular file
> > descriptor or a direct descriptor. The destination is a regular file
> > descriptor.
> >
> > The direct descriptor variant is useful to move a descriptor to an fd
> > and close the existing fd with a single acquisition of the `struct
> > files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or
> > IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention
> > for multithreaded applications.
>
> Overall comment - how does this interact with direct descriptors? Feels
> like this should support both, rather than just normal file descriptors.

As implemented, the operation supports:
1. src: direct, dst: normal (this is the use case I mostly care about)
2. src: normal, dst: normal ()

I can extend it to also support
3. src: direct, dst: direct
4, src: normal, dst: direct

I can use IOSQE_FIXED_FILE to pick the source and I guess I can use a
bit in dup_flags (something like IORING_DUP_DIRECT) to decide whether
the destination is a direct descriptor or normal.

Does that make sense?

>
> > @@ -446,3 +452,46 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
> >               fput(files[1]);
> >       return ret;
> >  }
> > +
> > +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> > +{
> > +     unsigned int flags;
> > +     struct io_dup *id;
> > +     int new_fd;
> > +
> > +     if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3)
> > +             return -EINVAL;
> > +
> > +     flags = READ_ONCE(sqe->dup_flags);
> > +     if (flags & ~IORING_DUP_NO_CLOEXEC)
> > +             return -EINVAL;
> > +
> > +     new_fd = READ_ONCE(sqe->dup_new_fd);
> > +     if (new_fd < 0)
> > +             return -EBADF;
>
> Is this necessary? Yes it'll help fail early, but do we care about that?

You're right, we don't really care about that. I'll remove it.

>
> > +     /* ensure the task's creds are used when installing/receiving fds */
> > +     if (req->flags & REQ_F_CREDS)
> > +             return -EPERM;
>
> Not sure that's sane. Let's say you mark this request as IOSQE_ASYNC,
> then it'd fail even if REQ_F_CREDS would then be set, and creds would
> match the original task.

I'm not sure either, I mostly added this because it's in
io_install_fixed_fd_prep, I assume the same rationale applies here,
right?

>
>
> > +
> > +     id = io_kiocb_to_cmd(req, struct io_dup);
> > +     id->o_flags = O_CLOEXEC;
> > +     if (flags & IORING_DUP_NO_CLOEXEC)
> > +             id->o_flags = 0;
> > +     id->new_fd = new_fd;
> > +
> > +     return 0;
> > +}
> > +
> > +int io_dup(struct io_kiocb *req, unsigned int issue_flags)
> > +{
> > +     struct io_dup *id;
> > +     int ret;
> > +
> > +     id = io_kiocb_to_cmd(req, struct io_dup);
> > +     ret = replace_fd(id->new_fd, id->file, id->o_flags);
> > +     if (ret < 0)
> > +             req_set_fail(req);
> > +     io_req_set_res(req, ret, 0);
> > +     return IOU_COMPLETE;
>
> And like Keith said here, we might need to punt it to io-wq if the file
> has a ->flush() method.

Makes sense, thanks.

Thanks for the review!

>
> --
> Jens Axboe

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] io_uring: Add IORING_OP_DUP
  2026-03-10 16:14 ` Keith Busch
@ 2026-03-10 18:42   ` Daniele Di Proietto
  0 siblings, 0 replies; 5+ messages in thread
From: Daniele Di Proietto @ 2026-03-10 18:42 UTC (permalink / raw)
  To: Keith Busch; +Cc: io-uring, Jens Axboe

On Tue, Mar 10, 2026 at 4:14 PM Keith Busch <kbusch@kernel.org> wrote:
>
> On Tue, Mar 10, 2026 at 03:49:33PM +0000, Daniele Di Proietto wrote:
> > +int io_dup(struct io_kiocb *req, unsigned int issue_flags)
> > +{
> > +     struct io_dup *id;
> > +     int ret;
> > +
> > +     id = io_kiocb_to_cmd(req, struct io_dup);
> > +     ret = replace_fd(id->new_fd, id->file, id->o_flags);
>
> It looks like there are a few conditions where replace_fd may block,
> so it may be a problem to call it from the uring enter context since it
> will block progress through the sq ring for subsequent commands.

You're right, thanks!

I can punt the request to io-wq if the file to be closed has a
->flush() method, as asked by Jens.

Any other conditions you're worried about? expand_files() might block
if another thread is trying to expand the file table concurrently, but
io_install_fixed_fd() has the same problem, right?

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-03-10 18:42 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-10 15:49 [PATCH] io_uring: Add IORING_OP_DUP Daniele Di Proietto
2026-03-10 16:14 ` Keith Busch
2026-03-10 18:42   ` Daniele Di Proietto
2026-03-10 16:24 ` Jens Axboe
2026-03-10 18:42   ` Daniele Di Proietto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox