* [PATCH 1/5] eventpoll: pass struct epoll_filefd through ep_find() and ep_insert()
2026-05-03 8:49 [PATCHSET 0/5] io_uring related epoll cleanups Jens Axboe
@ 2026-05-03 8:49 ` Jens Axboe
2026-05-03 8:49 ` [PATCH 2/5] eventpoll: export is_file_epoll() Jens Axboe
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-05-03 8:49 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, brauner, Jens Axboe
Have ep_find() and ep_insert() take a struct epoll_filefd rather
than a file/fd tuple. Kill off ep_set_ffd() as it's now no longer
needed.
No functional change. This is a prep patch for adding a file based
do_epoll_ctl() variant.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/eventpoll.c | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a3090b446af1..f464f2f39e0e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -339,14 +339,6 @@ static inline int is_file_epoll(struct file *f)
return f->f_op == &eventpoll_fops;
}
-/* Setup the structure that is used as key for the RB tree */
-static inline void ep_set_ffd(struct epoll_filefd *ffd,
- struct file *file, int fd)
-{
- ffd->file = file;
- ffd->fd = fd;
-}
-
/* Compare RB tree keys */
static inline int ep_cmp_ffd(struct epoll_filefd *p1,
struct epoll_filefd *p2)
@@ -1173,17 +1165,15 @@ static int ep_alloc(struct eventpoll **pep)
* are protected by the "mtx" mutex, and ep_find() must be called with
* "mtx" held.
*/
-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+static struct epitem *ep_find(struct eventpoll *ep, struct epoll_filefd *tf)
{
int kcmp;
struct rb_node *rbp;
struct epitem *epi, *epir = NULL;
- struct epoll_filefd ffd;
- ep_set_ffd(&ffd, file, fd);
for (rbp = ep->rbr.rb_root.rb_node; rbp; ) {
epi = rb_entry(rbp, struct epitem, rbn);
- kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
+ kcmp = ep_cmp_ffd(tf, &epi->ffd);
if (kcmp > 0)
rbp = rbp->rb_right;
else if (kcmp < 0)
@@ -1564,7 +1554,7 @@ static int attach_epitem(struct file *file, struct epitem *epi)
* Must be called with "mtx" held.
*/
static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
- struct file *tfile, int fd, int full_check)
+ struct epoll_filefd *tf, int full_check)
{
int error, pwake = 0;
__poll_t revents;
@@ -1572,8 +1562,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
struct ep_pqueue epq;
struct eventpoll *tep = NULL;
- if (is_file_epoll(tfile))
- tep = tfile->private_data;
+ if (is_file_epoll(tf->file))
+ tep = tf->file->private_data;
lockdep_assert_irqs_enabled();
@@ -1590,14 +1580,14 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
epi->ep = ep;
- ep_set_ffd(&epi->ffd, tfile, fd);
+ epi->ffd = *tf;
epi->event = *event;
epi->next = EP_UNACTIVE_PTR;
if (tep)
mutex_lock_nested(&tep->mtx, 1);
/* Add the current item to the list of active epoll hook for this file */
- if (unlikely(attach_epitem(tfile, epi) < 0)) {
+ if (unlikely(attach_epitem(tf->file, epi) < 0)) {
if (tep)
mutex_unlock(&tep->mtx);
kmem_cache_free(epi_cache, epi);
@@ -1606,7 +1596,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
if (full_check && !tep)
- list_file(tfile);
+ list_file(tf->file);
/*
* Add the current item to the RB tree. All RB tree operations are
@@ -2243,6 +2233,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
struct eventpoll *ep;
struct epitem *epi;
struct eventpoll *tep = NULL;
+ struct epoll_filefd efd;
CLASS(fd, f)(epfd);
if (fd_empty(f))
@@ -2253,6 +2244,9 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
if (fd_empty(tf))
return -EBADF;
+ efd.file = fd_file(tf);
+ efd.fd = fd;
+
/* The target file descriptor must support poll */
if (!file_can_poll(fd_file(tf)))
return -EPERM;
@@ -2333,14 +2327,14 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
* above, we can be sure to be able to use the item looked up by
* ep_find() till we release the mutex.
*/
- epi = ep_find(ep, fd_file(tf), fd);
+ epi = ep_find(ep, &efd);
error = -EINVAL;
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
epds->events |= EPOLLERR | EPOLLHUP;
- error = ep_insert(ep, epds, fd_file(tf), fd, full_check);
+ error = ep_insert(ep, epds, &efd, full_check);
} else
error = -EEXIST;
break;
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/5] eventpoll: export is_file_epoll()
2026-05-03 8:49 [PATCHSET 0/5] io_uring related epoll cleanups Jens Axboe
2026-05-03 8:49 ` [PATCH 1/5] eventpoll: pass struct epoll_filefd through ep_find() and ep_insert() Jens Axboe
@ 2026-05-03 8:49 ` Jens Axboe
2026-05-03 8:49 ` [PATCH 3/5] eventpoll: add file based control interface Jens Axboe
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-05-03 8:49 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, brauner, Jens Axboe
Make is_file_epoll() available outside of epoll. This is in preparation
from using it from io_uring.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/eventpoll.c | 2 +-
include/linux/eventpoll.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f464f2f39e0e..9ea6a2bd3d87 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -334,7 +334,7 @@ static void __init epoll_sysctls_init(void)
static const struct file_operations eventpoll_fops;
-static inline int is_file_epoll(struct file *f)
+int is_file_epoll(struct file *f)
{
return f->f_op == &eventpoll_fops;
}
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 728fb5dee5ed..7bf30e9f90d7 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -63,6 +63,7 @@ static inline void eventpoll_release(struct file *file)
int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
bool nonblock);
+int is_file_epoll(struct file *f);
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/5] eventpoll: add file based control interface
2026-05-03 8:49 [PATCHSET 0/5] io_uring related epoll cleanups Jens Axboe
2026-05-03 8:49 ` [PATCH 1/5] eventpoll: pass struct epoll_filefd through ep_find() and ep_insert() Jens Axboe
2026-05-03 8:49 ` [PATCH 2/5] eventpoll: export is_file_epoll() Jens Axboe
@ 2026-05-03 8:49 ` Jens Axboe
2026-05-03 8:49 ` [PATCH 4/5] io_uring/epoll: switch to using do_epoll_ctl_file() interface Jens Axboe
2026-05-03 8:49 ` [PATCH 5/5] io_uring/epoll: disallow adding an epoll file to an epoll context Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-05-03 8:49 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, brauner, Jens Axboe
Add do_epoll_ctl_file(), which takes a pre-resolved epoll file and a
struct epoll_filefd for the target rather than two integer file
descriptors. do_epoll_ctl() remains as a thin wrapper.
In preparation for using the file based interface from io_uring.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/eventpoll.c | 62 ++++++++++++++++++++-------------------
include/linux/eventpoll.h | 7 +++++
2 files changed, 39 insertions(+), 30 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9ea6a2bd3d87..1c7001866340 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -99,11 +99,6 @@
#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
-struct epoll_filefd {
- struct file *file;
- int fd;
-} __packed;
-
/* Wait structure used by the poll hooks */
struct eppoll_entry {
/* List header used to link this structure to the "struct epitem" */
@@ -2225,30 +2220,17 @@ static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
return -EAGAIN;
}
-int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
- bool nonblock)
+int do_epoll_ctl_file(struct file *f, int op, struct epoll_filefd *tf,
+ struct epoll_event *epds, bool nonblock)
{
int error;
int full_check = 0;
struct eventpoll *ep;
struct epitem *epi;
struct eventpoll *tep = NULL;
- struct epoll_filefd efd;
-
- CLASS(fd, f)(epfd);
- if (fd_empty(f))
- return -EBADF;
-
- /* Get the "struct file *" for the target file */
- CLASS(fd, tf)(fd);
- if (fd_empty(tf))
- return -EBADF;
-
- efd.file = fd_file(tf);
- efd.fd = fd;
/* The target file descriptor must support poll */
- if (!file_can_poll(fd_file(tf)))
+ if (!file_can_poll(tf->file))
return -EPERM;
/* Check if EPOLLWAKEUP is allowed */
@@ -2261,7 +2243,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
* adding an epoll file descriptor inside itself.
*/
error = -EINVAL;
- if (fd_file(f) == fd_file(tf) || !is_file_epoll(fd_file(f)))
+ if (f == tf->file || !is_file_epoll(f))
goto error_tgt_fput;
/*
@@ -2272,7 +2254,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
if (op == EPOLL_CTL_MOD)
goto error_tgt_fput;
- if (op == EPOLL_CTL_ADD && (is_file_epoll(fd_file(tf)) ||
+ if (op == EPOLL_CTL_ADD && (is_file_epoll(tf->file) ||
(epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
goto error_tgt_fput;
}
@@ -2281,7 +2263,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
* At this point it is safe to assume that the "private_data" contains
* our own data structure.
*/
- ep = fd_file(f)->private_data;
+ ep = f->private_data;
/*
* When we insert an epoll file descriptor inside another epoll file
@@ -2302,16 +2284,16 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
if (error)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
- if (READ_ONCE(fd_file(f)->f_ep) || ep->gen == loop_check_gen ||
- is_file_epoll(fd_file(tf))) {
+ if (READ_ONCE(f->f_ep) || ep->gen == loop_check_gen ||
+ is_file_epoll(tf->file)) {
mutex_unlock(&ep->mtx);
error = epoll_mutex_lock(&epnested_mutex, 0, nonblock);
if (error)
goto error_tgt_fput;
loop_check_gen++;
full_check = 1;
- if (is_file_epoll(fd_file(tf))) {
- tep = fd_file(tf)->private_data;
+ if (is_file_epoll(tf->file)) {
+ tep = tf->file->private_data;
error = -ELOOP;
if (ep_loop_check(ep, tep) != 0)
goto error_tgt_fput;
@@ -2327,14 +2309,14 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
* above, we can be sure to be able to use the item looked up by
* ep_find() till we release the mutex.
*/
- epi = ep_find(ep, &efd);
+ epi = ep_find(ep, tf);
error = -EINVAL;
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
epds->events |= EPOLLERR | EPOLLHUP;
- error = ep_insert(ep, epds, &efd, full_check);
+ error = ep_insert(ep, epds, tf, full_check);
} else
error = -EEXIST;
break;
@@ -2369,6 +2351,26 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
mutex_unlock(&epnested_mutex);
}
return error;
+
+}
+
+int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+ bool nonblock)
+{
+ struct epoll_filefd efd;
+
+ CLASS(fd, f)(epfd);
+ if (fd_empty(f))
+ return -EBADF;
+
+ /* Get the "struct file *" for the target file */
+ CLASS(fd, tf)(fd);
+ if (fd_empty(tf))
+ return -EBADF;
+
+ efd.file = fd_file(tf);
+ efd.fd = fd;
+ return do_epoll_ctl_file(fd_file(f), op, &efd, epds, nonblock);
}
/*
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 7bf30e9f90d7..4a6fe989810b 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,13 @@ static inline void eventpoll_release(struct file *file)
eventpoll_release_file(file);
}
+struct epoll_filefd {
+ struct file *file;
+ int fd;
+} __packed;
+
+int do_epoll_ctl_file(struct file *f, int op, struct epoll_filefd *tf,
+ struct epoll_event *epds, bool nonblock);
int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
bool nonblock);
int is_file_epoll(struct file *f);
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 4/5] io_uring/epoll: switch to using do_epoll_ctl_file() interface
2026-05-03 8:49 [PATCHSET 0/5] io_uring related epoll cleanups Jens Axboe
` (2 preceding siblings ...)
2026-05-03 8:49 ` [PATCH 3/5] eventpoll: add file based control interface Jens Axboe
@ 2026-05-03 8:49 ` Jens Axboe
2026-05-03 8:49 ` [PATCH 5/5] io_uring/epoll: disallow adding an epoll file to an epoll context Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-05-03 8:49 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, brauner, Jens Axboe
No functional changes in this patch.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/epoll.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 8d4610246ba0..59cd4f009648 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -51,10 +51,21 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_epoll *ie = io_kiocb_to_cmd(req, struct io_epoll);
- int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct epoll_filefd efd;
+ int ret;
+
+ CLASS(fd, f)(ie->epfd);
+ if (fd_empty(f))
+ return -EBADF;
+
+ CLASS(fd, tf)(ie->fd);
+ if (fd_empty(tf))
+ return -EBADF;
- ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+ efd.file = fd_file(tf);
+ efd.fd = ie->fd;
+ ret = do_epoll_ctl_file(fd_file(f), ie->op, &efd, &ie->event, force_nonblock);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 5/5] io_uring/epoll: disallow adding an epoll file to an epoll context
2026-05-03 8:49 [PATCHSET 0/5] io_uring related epoll cleanups Jens Axboe
` (3 preceding siblings ...)
2026-05-03 8:49 ` [PATCH 4/5] io_uring/epoll: switch to using do_epoll_ctl_file() interface Jens Axboe
@ 2026-05-03 8:49 ` Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-05-03 8:49 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, brauner, Jens Axboe, Linus Torvalds
One of the nastier things about epoll is how it allows adding epoll
files to epoll contexts. This leads to all sorts of loop detection
code, and has been a source of issues in the past.
Arguably adding IORING_EPOLL_CTL is a historical mistake on the
io_uring side, but we're kind of stuck with it now as it does seem
to be in use according to code searches. But we can at least minimize
the damage a bit and just disallow this part of epoll, where nesting
issues can arise.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/epoll.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 59cd4f009648..42057aab9124 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -62,6 +62,9 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
CLASS(fd, tf)(ie->fd);
if (fd_empty(tf))
return -EBADF;
+ /* disallow adding an epoll context to another epoll context */
+ if (ie->op == EPOLL_CTL_ADD && is_file_epoll(fd_file(tf)))
+ return -EINVAL;
efd.file = fd_file(tf);
efd.fd = ie->fd;
--
2.53.0
^ permalink raw reply related [flat|nested] 6+ messages in thread