* [PATCH 1/3] eventpoll: abstract out epoll_ctl() handler
2020-01-22 16:02 [PATCHSET 0/3] Add io_uring support for epoll_ctl Jens Axboe
@ 2020-01-22 16:02 ` Jens Axboe
2020-01-22 16:02 ` [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls Jens Axboe
2020-01-22 16:02 ` [PATCH 3/3] io_uring: add support for epoll_ctl(2) Jens Axboe
2 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-01-22 16:02 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, Jens Axboe
No functional changes in this patch.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/eventpoll.c | 45 +++++++++++++++++++++++++--------------------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 67a395039268..cd848e8d08e2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2074,27 +2074,15 @@ SYSCALL_DEFINE1(epoll_create, int, size)
return do_epoll_create(0);
}
-/*
- * The following function implements the controller interface for
- * the eventpoll file that enables the insertion/removal/change of
- * file descriptors inside the interest set.
- */
-SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
- struct epoll_event __user *, event)
+static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
{
int error;
int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
- struct epoll_event epds;
struct eventpoll *tep = NULL;
- error = -EFAULT;
- if (ep_op_has_event(op) &&
- copy_from_user(&epds, event, sizeof(struct epoll_event)))
- goto error_return;
-
error = -EBADF;
f = fdget(epfd);
if (!f.file)
@@ -2112,7 +2100,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* Check if EPOLLWAKEUP is allowed */
if (ep_op_has_event(op))
- ep_take_care_of_epollwakeup(&epds);
+ ep_take_care_of_epollwakeup(epds);
/*
* We have to check that the file structure underneath the file descriptor
@@ -2128,11 +2116,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
* Also, we do not currently supported nested exclusive wakeups.
*/
- if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
+ if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
if (op == EPOLL_CTL_MOD)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
- (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+ (epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
goto error_tgt_fput;
}
@@ -2192,8 +2180,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
- epds.events |= EPOLLERR | EPOLLHUP;
- error = ep_insert(ep, &epds, tf.file, fd, full_check);
+ epds->events |= EPOLLERR | EPOLLHUP;
+ error = ep_insert(ep, epds, tf.file, fd, full_check);
} else
error = -EEXIST;
if (full_check)
@@ -2208,8 +2196,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_MOD:
if (epi) {
if (!(epi->event.events & EPOLLEXCLUSIVE)) {
- epds.events |= EPOLLERR | EPOLLHUP;
- error = ep_modify(ep, epi, &epds);
+ epds->events |= EPOLLERR | EPOLLHUP;
+ error = ep_modify(ep, epi, epds);
}
} else
error = -ENOENT;
@@ -2231,6 +2219,23 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
return error;
}
+/*
+ * The following function implements the controller interface for
+ * the eventpoll file that enables the insertion/removal/change of
+ * file descriptors inside the interest set.
+ */
+SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ struct epoll_event __user *, event)
+{
+ struct epoll_event epds;
+
+ if (ep_op_has_event(op) &&
+ copy_from_user(&epds, event, sizeof(struct epoll_event)))
+ return -EFAULT;
+
+ return do_epoll_ctl(epfd, op, fd, &epds);
+}
+
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_wait(2).
--
2.25.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls
2020-01-22 16:02 [PATCHSET 0/3] Add io_uring support for epoll_ctl Jens Axboe
2020-01-22 16:02 ` [PATCH 1/3] eventpoll: abstract out epoll_ctl() handler Jens Axboe
@ 2020-01-22 16:02 ` Jens Axboe
2020-01-22 16:20 ` Jann Horn
2020-01-22 16:02 ` [PATCH 3/3] io_uring: add support for epoll_ctl(2) Jens Axboe
2 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2020-01-22 16:02 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, Jens Axboe
Also make it available outside of epoll, along with the helper that
decides if we need to copy the passed in epoll_event.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/eventpoll.c | 42 ++++++++++++++++++++++++++++-----------
include/linux/eventpoll.h | 9 +++++++++
2 files changed, 39 insertions(+), 12 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cd848e8d08e2..162af749ea50 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -354,12 +354,6 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
return container_of(p, struct ep_pqueue, pt)->epi;
}
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-static inline int ep_op_has_event(int op)
-{
- return op != EPOLL_CTL_DEL;
-}
-
/* Initialize the poll safe wake up structure */
static void ep_nested_calls_init(struct nested_calls *ncalls)
{
@@ -2074,7 +2068,20 @@ SYSCALL_DEFINE1(epoll_create, int, size)
return do_epoll_create(0);
}
-static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
+static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
+ bool nonblock)
+{
+ if (!nonblock) {
+ mutex_lock_nested(mutex, depth);
+ return 0;
+ }
+ if (!mutex_trylock(mutex))
+ return 0;
+ return -EAGAIN;
+}
+
+int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+ bool nonblock)
{
int error;
int full_check = 0;
@@ -2145,13 +2152,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
* deep wakeup paths from forming in parallel through multiple
* EPOLL_CTL_ADD operations.
*/
- mutex_lock_nested(&ep->mtx, 0);
+ error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
+ if (error)
+ goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
is_file_epoll(tf.file)) {
full_check = 1;
mutex_unlock(&ep->mtx);
- mutex_lock(&epmutex);
+ error = epoll_mutex_lock(&epmutex, 0, nonblock);
+ if (error)
+ goto error_tgt_fput;
if (is_file_epoll(tf.file)) {
error = -ELOOP;
if (ep_loop_check(ep, tf.file) != 0) {
@@ -2161,10 +2172,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
} else
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
- mutex_lock_nested(&ep->mtx, 0);
+ error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
+ if (error) {
+out_del:
+ list_del(&tf.file->f_tfile_llink);
+ goto error_tgt_fput;
+ }
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
- mutex_lock_nested(&tep->mtx, 1);
+ error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
+ if (error)
+ goto out_del;
}
}
}
@@ -2233,7 +2251,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
copy_from_user(&epds, event, sizeof(struct epoll_event)))
return -EFAULT;
- return do_epoll_ctl(epfd, op, fd, &epds);
+ return do_epoll_ctl(epfd, op, fd, &epds, false);
}
/*
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index bc6d79b00c4e..8f000fada5a4 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,15 @@ static inline void eventpoll_release(struct file *file)
eventpoll_release_file(file);
}
+int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+ bool nonblock);
+
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_has_event(int op)
+{
+ return op != EPOLL_CTL_DEL;
+}
+
#else
static inline void eventpoll_init_file(struct file *file) {}
--
2.25.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls
2020-01-22 16:02 ` [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls Jens Axboe
@ 2020-01-22 16:20 ` Jann Horn
2020-01-22 16:23 ` Jens Axboe
0 siblings, 1 reply; 7+ messages in thread
From: Jann Horn @ 2020-01-22 16:20 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, linux-fsdevel
On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <[email protected]> wrote:
> Also make it available outside of epoll, along with the helper that
> decides if we need to copy the passed in epoll_event.
[...]
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index cd848e8d08e2..162af749ea50 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
[...]
> -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
> +static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
> + bool nonblock)
> +{
> + if (!nonblock) {
> + mutex_lock_nested(mutex, depth);
> + return 0;
> + }
> + if (!mutex_trylock(mutex))
> + return 0;
> + return -EAGAIN;
The documentation for mutex_trylock() says:
* Try to acquire the mutex atomically. Returns 1 if the mutex
* has been acquired successfully, and 0 on contention.
So in the success case, this evaluates to:
if (!1)
return 0;
return -EAGAIN;
which is
if (0)
return 0;
return -EAGAIN;
which is
return -EAGAIN;
I think you'll have to get rid of the negation.
> +}
> +
> +int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
> + bool nonblock)
> {
> int error;
> int full_check = 0;
> @@ -2145,13 +2152,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
> * deep wakeup paths from forming in parallel through multiple
> * EPOLL_CTL_ADD operations.
> */
> - mutex_lock_nested(&ep->mtx, 0);
> + error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
> + if (error)
> + goto error_tgt_fput;
> if (op == EPOLL_CTL_ADD) {
> if (!list_empty(&f.file->f_ep_links) ||
> is_file_epoll(tf.file)) {
> full_check = 1;
> mutex_unlock(&ep->mtx);
> - mutex_lock(&epmutex);
> + error = epoll_mutex_lock(&epmutex, 0, nonblock);
> + if (error)
> + goto error_tgt_fput;
When we reach the "goto", full_check==1 and epmutex is not held. But
at the jump target, this code runs:
error_tgt_fput:
if (full_check) // true
mutex_unlock(&epmutex);
So I think we're releasing a lock that we don't hold.
> if (is_file_epoll(tf.file)) {
> error = -ELOOP;
> if (ep_loop_check(ep, tf.file) != 0) {
> @@ -2161,10 +2172,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
> } else
> list_add(&tf.file->f_tfile_llink,
> &tfile_check_list);
> - mutex_lock_nested(&ep->mtx, 0);
> + error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
> + if (error) {
> +out_del:
> + list_del(&tf.file->f_tfile_llink);
> + goto error_tgt_fput;
> + }
> if (is_file_epoll(tf.file)) {
> tep = tf.file->private_data;
> - mutex_lock_nested(&tep->mtx, 1);
> + error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
> + if (error)
> + goto out_del;
When we reach this "goto", ep->mtx is held and never dropped.
> }
> }
> }
> @@ -2233,7 +2251,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
> copy_from_user(&epds, event, sizeof(struct epoll_event)))
> return -EFAULT;
>
> - return do_epoll_ctl(epfd, op, fd, &epds);
> + return do_epoll_ctl(epfd, op, fd, &epds, false);
> }
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls
2020-01-22 16:20 ` Jann Horn
@ 2020-01-22 16:23 ` Jens Axboe
0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-01-22 16:23 UTC (permalink / raw)
To: Jann Horn; +Cc: io-uring, linux-fsdevel
On 1/22/20 9:20 AM, Jann Horn wrote:
> On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <[email protected]> wrote:
>> Also make it available outside of epoll, along with the helper that
>> decides if we need to copy the passed in epoll_event.
> [...]
>> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
>> index cd848e8d08e2..162af749ea50 100644
>> --- a/fs/eventpoll.c
>> +++ b/fs/eventpoll.c
> [...]
>> -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
>> +static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
>> + bool nonblock)
>> +{
>> + if (!nonblock) {
>> + mutex_lock_nested(mutex, depth);
>> + return 0;
>> + }
>> + if (!mutex_trylock(mutex))
>> + return 0;
>> + return -EAGAIN;
>
> The documentation for mutex_trylock() says:
>
> * Try to acquire the mutex atomically. Returns 1 if the mutex
> * has been acquired successfully, and 0 on contention.
>
> So in the success case, this evaluates to:
>
> if (!1)
> return 0;
> return -EAGAIN;
>
> which is
>
> if (0)
> return 0;
> return -EAGAIN;
>
> which is
>
> return -EAGAIN;
>
> I think you'll have to get rid of the negation.
Doh indeed. I'll rework and run the test case, just rebased this and I
think I inadvertently used an older version. Ditto for the below.
--
Jens Axboe
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] io_uring: add support for epoll_ctl(2)
2020-01-22 16:02 [PATCHSET 0/3] Add io_uring support for epoll_ctl Jens Axboe
2020-01-22 16:02 ` [PATCH 1/3] eventpoll: abstract out epoll_ctl() handler Jens Axboe
2020-01-22 16:02 ` [PATCH 2/3] eventpoll: support non-blocking do_epoll_ctl() calls Jens Axboe
@ 2020-01-22 16:02 ` Jens Axboe
2 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-01-22 16:02 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, Jens Axboe
This adds IORING_OP_EPOLL_CTL, which can perform the same work as the
epoll_ctl(2) system call.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 72 +++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 1 +
2 files changed, 73 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 09503d1e9e45..b3bff464d2e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -74,6 +74,7 @@
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/fadvise.h>
+#include <linux/eventpoll.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -421,6 +422,14 @@ struct io_madvise {
u32 advice;
};
+struct io_epoll {
+ struct file *file;
+ int epfd;
+ int op;
+ int fd;
+ struct epoll_event event;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -534,6 +543,7 @@ struct io_kiocb {
struct io_files_update files_update;
struct io_fadvise fadvise;
struct io_madvise madvise;
+ struct io_epoll epoll;
};
struct io_async_ctx *io;
@@ -719,6 +729,9 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.fd_non_neg = 1,
},
+ [IORING_OP_EPOLL_CTL] = {
+ .unbound_nonreg_file = 1,
+ },
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2578,6 +2591,54 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
return io_openat2(req, nxt, force_nonblock);
}
+static int io_epoll_ctl_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_EPOLL)
+ if (sqe->ioprio || sqe->buf_index || sqe->off)
+ return -EINVAL;
+
+ req->epoll.epfd = READ_ONCE(sqe->fd);
+ req->epoll.op = READ_ONCE(sqe->len);
+ req->epoll.fd = READ_ONCE(sqe->off);
+
+ if (ep_op_has_event(req->epoll.op)) {
+ struct epoll_event __user *ev;
+
+ ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+ return -EFAULT;
+ }
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+#if defined(CONFIG_EPOLL)
+ struct io_epoll *ie = &req->epoll;
+ int ret;
+
+ ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+ if (force_nonblock && ret == -EAGAIN) {
+ req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ return -EAGAIN;
+ }
+
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
@@ -4039,6 +4100,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_OPENAT2:
ret = io_openat2_prep(req, sqe);
break;
+ case IORING_OP_EPOLL_CTL:
+ ret = io_epoll_ctl_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4267,6 +4331,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_openat2(req, nxt, force_nonblock);
break;
+ case IORING_OP_EPOLL_CTL:
+ if (sqe) {
+ ret = io_epoll_ctl_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_epoll_ctl(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 57d05cc5e271..cffa6fd33827 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -106,6 +106,7 @@ enum {
IORING_OP_SEND,
IORING_OP_RECV,
IORING_OP_OPENAT2,
+ IORING_OP_EPOLL_CTL,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.25.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] io_uring: add support for epoll_ctl(2)
2020-01-22 16:42 [PATCHSET v2 0/3] Add io_uring support for epoll_ctl Jens Axboe
@ 2020-01-22 16:42 ` Jens Axboe
0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-01-22 16:42 UTC (permalink / raw)
To: io-uring; +Cc: linux-fsdevel, jannh, Jens Axboe
This adds IORING_OP_EPOLL_CTL, which can perform the same work as the
epoll_ctl(2) system call.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 72 +++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 1 +
2 files changed, 73 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 09503d1e9e45..64dc9e5df6d4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -74,6 +74,7 @@
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/fadvise.h>
+#include <linux/eventpoll.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -421,6 +422,14 @@ struct io_madvise {
u32 advice;
};
+struct io_epoll {
+ struct file *file;
+ int epfd;
+ int op;
+ int fd;
+ struct epoll_event event;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -534,6 +543,7 @@ struct io_kiocb {
struct io_files_update files_update;
struct io_fadvise fadvise;
struct io_madvise madvise;
+ struct io_epoll epoll;
};
struct io_async_ctx *io;
@@ -719,6 +729,9 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.fd_non_neg = 1,
},
+ [IORING_OP_EPOLL_CTL] = {
+ .unbound_nonreg_file = 1,
+ },
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2578,6 +2591,54 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
return io_openat2(req, nxt, force_nonblock);
}
+static int io_epoll_ctl_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_EPOLL)
+ if (sqe->ioprio || sqe->buf_index)
+ return -EINVAL;
+
+ req->epoll.epfd = READ_ONCE(sqe->fd);
+ req->epoll.op = READ_ONCE(sqe->len);
+ req->epoll.fd = READ_ONCE(sqe->off);
+
+ if (ep_op_has_event(req->epoll.op)) {
+ struct epoll_event __user *ev;
+
+ ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+ return -EFAULT;
+ }
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+#if defined(CONFIG_EPOLL)
+ struct io_epoll *ie = &req->epoll;
+ int ret;
+
+ ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+ if (force_nonblock && ret == -EAGAIN) {
+ req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ return -EAGAIN;
+ }
+
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
@@ -4039,6 +4100,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_OPENAT2:
ret = io_openat2_prep(req, sqe);
break;
+ case IORING_OP_EPOLL_CTL:
+ ret = io_epoll_ctl_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4267,6 +4331,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_openat2(req, nxt, force_nonblock);
break;
+ case IORING_OP_EPOLL_CTL:
+ if (sqe) {
+ ret = io_epoll_ctl_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_epoll_ctl(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 57d05cc5e271..cffa6fd33827 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -106,6 +106,7 @@ enum {
IORING_OP_SEND,
IORING_OP_RECV,
IORING_OP_OPENAT2,
+ IORING_OP_EPOLL_CTL,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.25.0
^ permalink raw reply related [flat|nested] 7+ messages in thread