From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
Date: Mon, 10 Feb 2020 13:56:50 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Some file descriptors use separate waitqueues for their f_ops->poll()
handler, most commonly one for read and one for write. The io_uring
poll implementation doesn't work with that, as the 2nd poll_wait()
call will cause the io_uring poll request to -EINVAL.
This is particularly a problem now that pipes were switched to using
multiple wait queues (commit 0ddad21d3e99), but it also affects tty
devices and /dev/random as well. This is a big problem for event loops
where some file descriptors work, and others don't.
With this fix, io_uring handles multiple waitqueues.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 70 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 123e6424a050..72bc378edebc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3439,10 +3439,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
#endif
}
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+ struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+ if (poll && poll->head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&poll->head->lock, flags);
+ list_del_init(&poll->wait.entry);
+ if (poll->wait.private)
+ refcount_dec(&req->refs);
+ spin_unlock_irqrestore(&poll->head->lock, flags);
+ }
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
+ io_poll_remove_double(req);
+
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
@@ -3678,10 +3695,39 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & poll->events))
return 0;
+ io_poll_remove_double(req);
__io_poll_wake(req, &req->poll, mask);
return 1;
}
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = (void *) req->io;
+ __poll_t mask = key_to_poll(key);
+ bool done = true;
+ int ret;
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+
+ if (req->poll.head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&req->poll.head->lock, flags);
+ done = list_empty(&req->poll.wait.entry);
+ if (!done)
+ list_del_init(&req->poll.wait.entry);
+ spin_unlock_irqrestore(&req->poll.head->lock, flags);
+ }
+ if (!done)
+ __io_poll_wake(req, poll, mask);
+ refcount_dec(&req->refs);
+ return ret;
+}
+
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -3692,15 +3738,33 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+ struct io_kiocb *req = pt->req;
+ struct io_poll_iocb *poll = &req->poll;
- if (unlikely(pt->req->poll.head)) {
- pt->error = -EINVAL;
- return;
+ /*
+ * If poll->head is already set, it's because the file being polled
+ * use multiple waitqueues for poll handling (eg one for read, one
+ * for write). Setup a separate io_poll_iocb if this happens.
+ */
+ if (unlikely(poll->head)) {
+ poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+ if (!poll) {
+ pt->error = -ENOMEM;
+ return;
+ }
+ poll->done = false;
+ poll->canceled = false;
+ poll->events = req->poll.events;
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
+ refcount_inc(&req->refs);
+ poll->wait.private = req;
+ req->io = (void *) poll;
}
pt->error = 0;
- pt->req->poll.head = head;
- add_wait_queue(head, &pt->req->poll.wait);
+ poll->head = head;
+ add_wait_queue(head, &poll->wait);
}
static void io_poll_req_insert(struct io_kiocb *req)
@@ -3777,6 +3841,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
}
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
+ io_poll_remove_double(req);
io_poll_complete(req, mask, 0);
}
spin_unlock_irq(&ctx->completion_lock);
--
2.25.0
next prev parent reply other threads:[~2020-02-10 20:56 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-10 20:56 [PATCHSET 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-10 20:56 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
2020-02-10 20:56 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
2020-02-10 20:56 ` Jens Axboe [this message]
2020-02-11 20:22 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Pavel Begunkov
2020-02-11 20:27 ` Jens Axboe
2020-02-11 20:01 ` [PATCHSET 0/3] io_uring: make POLL_ADD support multiple waitqs Pavel Begunkov
2020-02-11 20:06 ` Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2020-02-12 20:25 [PATCHSET v2 " Jens Axboe
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2020-02-13 15:50 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox