public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Cc: [email protected], [email protected]
Subject: Re: [PATCH 7/7] io_uring/epoll: add support for IORING_OP_EPOLL_WAIT
Date: Sun, 9 Feb 2025 00:24:52 +0000	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 2/8/25 23:27, Pavel Begunkov wrote:
...
> But it might be better to just poll the epoll fd, reuse all the
> io_uring polling machinery, and implement IO_URING_F_MULTISHOT for
> the epoll opcode.
> 
> epoll_issue(issue_flags) {
>      if (!(flags & IO_URING_F_MULTISHOT))
>          return -EAGAIN;
> 
>      res = epoll_check_events();
>      post_cqe(res);
>      etc.
> }
> 
> I think that would make this patch quite trivial, including
> the multishot mode.

Something like this instead of the last patch. Completely untested,
the eventpoll.c hunk is dirty might be incorrect, need to pass the
right mask for polling, and all that. At least it looks simpler,
and probably doesn't need half of the prep patches.


diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b96cc9193517..99dd8c1a2f2c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1996,33 +1996,6 @@ static int ep_try_send_events(struct eventpoll *ep,
  	return res;
  }
  
-static int ep_poll_queue(struct eventpoll *ep,
-			 struct epoll_event __user *events, int maxevents,
-			 struct wait_queue_entry *wait)
-{
-	int res = 0, eavail;
-
-	/* See ep_poll() for commentary */
-	eavail = ep_events_available(ep);
-	while (1) {
-		if (eavail) {
-			res = ep_try_send_events(ep, events, maxevents);
-			if (res)
-				return res;
-		}
-		if (!list_empty_careful(&wait->entry))
-			break;
-		write_lock_irq(&ep->lock);
-		eavail = ep_events_available(ep);
-		if (!eavail)
-			__add_wait_queue_exclusive(&ep->wq, wait);
-		write_unlock_irq(&ep->lock);
-		if (!eavail)
-			break;
-	}
-	return -EIOCBQUEUED;
-}
-
  static int __epoll_wait_remove(struct eventpoll *ep,
  			       struct wait_queue_entry *wait, int timed_out)
  {
@@ -2517,16 +2490,22 @@ static int ep_check_params(struct file *file, struct epoll_event __user *evs,
  	return 0;
  }
  
-int epoll_queue(struct file *file, struct epoll_event __user *events,
-		int maxevents, struct wait_queue_entry *wait)
+int epoll_sendevents(struct file *file, struct epoll_event __user *events,
+		     int maxevents)
  {
-	int ret;
+	int res = 0, eavail;
  
  	ret = ep_check_params(file, events, maxevents);
  	if (unlikely(ret))
  		return ret;
  
-	return ep_poll_queue(file->private_data, events, maxevents, wait);
+	eavail = ep_events_available(ep);
+	if (eavail) {
+		res = ep_try_send_events(ep, events, maxevents);
+		if (res)
+			return res;
+	}
+	return 0;
  }
  
  /*
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6c088d5e945b..751e3f325927 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -25,9 +25,8 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
  /* Used to release the epoll bits inside the "struct file" */
  void eventpoll_release_file(struct file *file);
  
-/* Use to reap events, and/or queue for a callback on new events */
-int epoll_queue(struct file *file, struct epoll_event __user *events,
-		int maxevents, struct wait_queue_entry *wait);
+int epoll_sendevents(struct file *file, struct epoll_event __user *events,
+		int maxevents);
  
  /* Remove wait entry */
  int epoll_wait_remove(struct file *file, struct wait_queue_entry *wait);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e11c82638527..a559e1e1544a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -278,6 +278,7 @@ enum io_uring_op {
  	IORING_OP_FTRUNCATE,
  	IORING_OP_BIND,
  	IORING_OP_LISTEN,
+	IORING_OP_EPOLL_WAIT,
  
  	/* this goes last, obviously */
  	IORING_OP_LAST,
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 7848d9cc073d..6d2c48ba1923 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -20,6 +20,12 @@ struct io_epoll {
  	struct epoll_event		event;
  };
  
+struct io_epoll_wait {
+	struct file			*file;
+	int				maxevents;
+	struct epoll_event __user	*events;
+};
+
  int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
  	struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
@@ -57,3 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
  	io_req_set_res(req, ret, 0);
  	return IOU_OK;
  }
+
+int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+
+	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+		return -EINVAL;
+
+	iew->maxevents = READ_ONCE(sqe->len);
+	iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	return 0;
+}
+
+int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+	int ret;
+
+	ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
+	if (ret == 0)
+		return -EAGAIN;
+	if (ret < 0)
+		req_set_fail(req);
+
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/epoll.h b/io_uring/epoll.h
index 870cce11ba98..4111997c360b 100644
--- a/io_uring/epoll.h
+++ b/io_uring/epoll.h
@@ -3,4 +3,6 @@
  #if defined(CONFIG_EPOLL)
  int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
  int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags);
+int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags);
  #endif
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index e8baef4e5146..bd62d6068b61 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -514,6 +514,18 @@ const struct io_issue_def io_issue_defs[] = {
  		.async_size		= sizeof(struct io_async_msghdr),
  #else
  		.prep			= io_eopnotsupp_prep,
+#endif
+	},
+	[IORING_OP_EPOLL_WAIT] = {
+		.needs_file		= 1,
+		.audit_skip		= 1,
+		.pollout		= 1,
+		.pollin			= 1,
+#if defined(CONFIG_EPOLL)
+		.prep			= io_epoll_wait_prep,
+		.issue			= io_epoll_wait,
+#else
+		.prep			= io_eopnotsupp_prep,
  #endif
  	},
  };
@@ -745,6 +757,9 @@ const struct io_cold_def io_cold_defs[] = {
  	[IORING_OP_LISTEN] = {
  		.name			= "LISTEN",
  	},
+	[IORING_OP_EPOLL_WAIT] = {
+		.name			= "EPOLL_WAIT",
+	},
  };
  
  const char *io_uring_get_opcode(u8 opcode)


  reply	other threads:[~2025-02-09  0:24 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-07 17:32 [PATCHSET v3 0/7] io_uring epoll wait support Jens Axboe
2025-02-07 17:32 ` [PATCH 1/7] eventpoll: abstract out ep_try_send_events() helper Jens Axboe
2025-02-07 17:32 ` [PATCH 2/7] eventpoll: abstract out parameter sanity checking Jens Axboe
2025-02-07 17:32 ` [PATCH 3/7] eventpoll: add epoll_queue() interface Jens Axboe
2025-02-07 17:32 ` [PATCH 4/7] eventpoll: add helper to remove wait entry from wait queue head Jens Axboe
2025-02-07 17:32 ` [PATCH 5/7] io_uring/epoll: remove CONFIG_EPOLL guards Jens Axboe
2025-02-07 17:32 ` [PATCH 6/7] io_uring/poll: pull ownership handling into poll.h Jens Axboe
2025-02-07 17:32 ` [PATCH 7/7] io_uring/epoll: add support for IORING_OP_EPOLL_WAIT Jens Axboe
2025-02-08 23:27   ` Pavel Begunkov
2025-02-09  0:24     ` Pavel Begunkov [this message]
2025-02-09 16:19       ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox