* [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
2020-02-10 20:56 [PATCHSET 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
@ 2020-02-10 20:56 ` Jens Axboe
2020-02-11 20:22 ` Pavel Begunkov
0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2020-02-10 20:56 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Some file descriptors use separate waitqueues for their f_ops->poll()
handler, most commonly one for read and one for write. The io_uring
poll implementation doesn't work with that, as the 2nd poll_wait()
call will cause the io_uring poll request to -EINVAL.
This is particularly a problem now that pipes were switched to using
multiple wait queues (commit 0ddad21d3e99), but it also affects tty
devices and /dev/random as well. This is a big problem for event loops
where some file descriptors work, and others don't.
With this fix, io_uring handles multiple waitqueues.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 70 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 123e6424a050..72bc378edebc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3439,10 +3439,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
#endif
}
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+ struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+ if (poll && poll->head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&poll->head->lock, flags);
+ list_del_init(&poll->wait.entry);
+ if (poll->wait.private)
+ refcount_dec(&req->refs);
+ spin_unlock_irqrestore(&poll->head->lock, flags);
+ }
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
+ io_poll_remove_double(req);
+
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
@@ -3678,10 +3695,39 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & poll->events))
return 0;
+ io_poll_remove_double(req);
__io_poll_wake(req, &req->poll, mask);
return 1;
}
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = (void *) req->io;
+ __poll_t mask = key_to_poll(key);
+ bool done = true;
+ int ret;
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+
+ if (req->poll.head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&req->poll.head->lock, flags);
+ done = list_empty(&req->poll.wait.entry);
+ if (!done)
+ list_del_init(&req->poll.wait.entry);
+ spin_unlock_irqrestore(&req->poll.head->lock, flags);
+ }
+ if (!done)
+ __io_poll_wake(req, poll, mask);
+ refcount_dec(&req->refs);
+ return ret;
+}
+
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -3692,15 +3738,33 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+ struct io_kiocb *req = pt->req;
+ struct io_poll_iocb *poll = &req->poll;
- if (unlikely(pt->req->poll.head)) {
- pt->error = -EINVAL;
- return;
+ /*
+ * If poll->head is already set, it's because the file being polled
+ * use multiple waitqueues for poll handling (eg one for read, one
+ * for write). Setup a separate io_poll_iocb if this happens.
+ */
+ if (unlikely(poll->head)) {
+ poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+ if (!poll) {
+ pt->error = -ENOMEM;
+ return;
+ }
+ poll->done = false;
+ poll->canceled = false;
+ poll->events = req->poll.events;
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
+ refcount_inc(&req->refs);
+ poll->wait.private = req;
+ req->io = (void *) poll;
}
pt->error = 0;
- pt->req->poll.head = head;
- add_wait_queue(head, &pt->req->poll.wait);
+ poll->head = head;
+ add_wait_queue(head, &poll->wait);
}
static void io_poll_req_insert(struct io_kiocb *req)
@@ -3777,6 +3841,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
}
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
+ io_poll_remove_double(req);
io_poll_complete(req, mask, 0);
}
spin_unlock_irq(&ctx->completion_lock);
--
2.25.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
2020-02-10 20:56 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
@ 2020-02-11 20:22 ` Pavel Begunkov
2020-02-11 20:27 ` Jens Axboe
0 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2020-02-11 20:22 UTC (permalink / raw)
To: Jens Axboe, io-uring
[-- Attachment #1.1: Type: text/plain, Size: 4714 bytes --]
On 10/02/2020 23:56, Jens Axboe wrote:
> Some file descriptors use separate waitqueues for their f_ops->poll()
> handler, most commonly one for read and one for write. The io_uring
> poll implementation doesn't work with that, as the 2nd poll_wait()
> call will cause the io_uring poll request to -EINVAL.
>
> This is particularly a problem now that pipes were switched to using
> multiple wait queues (commit 0ddad21d3e99), but it also affects tty
> devices and /dev/random as well. This is a big problem for event loops
> where some file descriptors work, and others don't.
>
> With this fix, io_uring handles multiple waitqueues.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 70 insertions(+), 5 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 123e6424a050..72bc378edebc 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -3439,10 +3439,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
> #endif
> }
>
> +static void io_poll_remove_double(struct io_kiocb *req)
> +{
> + struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
> +
> + if (poll && poll->head) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&poll->head->lock, flags);
> + list_del_init(&poll->wait.entry);
> + if (poll->wait.private)
> + refcount_dec(&req->refs);
> + spin_unlock_irqrestore(&poll->head->lock, flags);
> + }
> +}
> +
> static void io_poll_remove_one(struct io_kiocb *req)
> {
> struct io_poll_iocb *poll = &req->poll;
>
> + io_poll_remove_double(req);
> +
> spin_lock(&poll->head->lock);
> WRITE_ONCE(poll->canceled, true);
> if (!list_empty(&poll->wait.entry)) {
> @@ -3678,10 +3695,39 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
> if (mask && !(mask & poll->events))
> return 0;
>
> + io_poll_remove_double(req);
> __io_poll_wake(req, &req->poll, mask);
> return 1;
> }
>
> +static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
> + int sync, void *key)
> +{
> + struct io_kiocb *req = wait->private;
> + struct io_poll_iocb *poll = (void *) req->io;
> + __poll_t mask = key_to_poll(key);
> + bool done = true;
> + int ret;
> +
> + /* for instances that support it check for an event match first: */
> + if (mask && !(mask & poll->events))
> + return 0;
> +
> + if (req->poll.head) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&req->poll.head->lock, flags);
> + done = list_empty(&req->poll.wait.entry);
> + if (!done)
> + list_del_init(&req->poll.wait.entry);
> + spin_unlock_irqrestore(&req->poll.head->lock, flags);
> + }
> + if (!done)
> + __io_poll_wake(req, poll, mask);
> + refcount_dec(&req->refs);
> + return ret;
> +}
> +
> struct io_poll_table {
> struct poll_table_struct pt;
> struct io_kiocb *req;
> @@ -3692,15 +3738,33 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
> struct poll_table_struct *p)
> {
> struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
> + struct io_kiocb *req = pt->req;
> + struct io_poll_iocb *poll = &req->poll;
>
> - if (unlikely(pt->req->poll.head)) {
> - pt->error = -EINVAL;
> - return;
> + /*
> + * If poll->head is already set, it's because the file being polled
> + * use multiple waitqueues for poll handling (eg one for read, one
> + * for write). Setup a separate io_poll_iocb if this happens.
> + */
> + if (unlikely(poll->head)) {
I'll keep looking, but I guess there should be :
if (req->io)
return -EINVAL;
> + poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
> + if (!poll) {
> + pt->error = -ENOMEM;
> + return;
> + }
> + poll->done = false;
> + poll->canceled = false;
> + poll->events = req->poll.events;
> + INIT_LIST_HEAD(&poll->wait.entry);
> + init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
> + refcount_inc(&req->refs);
> + poll->wait.private = req;
> + req->io = (void *) poll;
> }
>
> pt->error = 0;
> - pt->req->poll.head = head;
> - add_wait_queue(head, &pt->req->poll.wait);
> + poll->head = head;
> + add_wait_queue(head, &poll->wait);
> }
>
> static void io_poll_req_insert(struct io_kiocb *req)
> @@ -3777,6 +3841,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
> }
> if (mask) { /* no async, we'd stolen it */
> ipt.error = 0;
> + io_poll_remove_double(req);
> io_poll_complete(req, mask, 0);
> }
> spin_unlock_irq(&ctx->completion_lock);
>
--
Pavel Begunkov
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
2020-02-11 20:22 ` Pavel Begunkov
@ 2020-02-11 20:27 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2020-02-11 20:27 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 2/11/20 1:22 PM, Pavel Begunkov wrote:
>> @@ -3692,15 +3738,33 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>> struct poll_table_struct *p)
>> {
>> struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
>> + struct io_kiocb *req = pt->req;
>> + struct io_poll_iocb *poll = &req->poll;
>>
>> - if (unlikely(pt->req->poll.head)) {
>> - pt->error = -EINVAL;
>> - return;
>> + /*
>> + * If poll->head is already set, it's because the file being polled
>> + * use multiple waitqueues for poll handling (eg one for read, one
>> + * for write). Setup a separate io_poll_iocb if this happens.
>> + */
>> + if (unlikely(poll->head)) {
>
> I'll keep looking, but I guess there should be :
>
> if (req->io)
> return -EINVAL;
Yes, I've changed it to:
if (unlikely(poll->head)) {
/* already have a 2nd entry, fail a third attempt */
if (req->io) {
pt->error = -EINVAL;
return;
}
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
pt->error = -ENOMEM;
return;
}
[...]
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs
@ 2020-02-12 20:25 Jens Axboe
2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
To: io-uring
Here's v2 of the "let's make POLL_ADD work on everything" patchset. As
before, patches 1-2 are just basic prep patches, and should not have any
functional changes in them. Patch 3 adds support for allocating a new
io_poll_iocb unit if we get multiple additions through our queue proc
for the wait queues. This new 'poll' addition is queued up as well, and
it grabs a reference to the original poll request.
Changes since v1:
- Fix unused 'ret' variable in io_poll_double_wake()
- Fail if we get an attempt at a third waitqueue addition (Pavel)
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/3] io_uring: store io_kiocb in wait->private
2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Store the io_kiocb in the private field instead of the poll entry, this
is in preparation for allowing multiple waitqueues.
No functional changes in this patch.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6d4e20d59729..08ffeb7df4f5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3625,8 +3625,8 @@ static void io_poll_trigger_evfd(struct io_wq_work **workptr)
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
- struct io_poll_iocb *poll = wait->private;
- struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key);
@@ -3749,7 +3749,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
/* initialized the list so that we can do list_empty checks */
INIT_LIST_HEAD(&poll->wait.entry);
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
- poll->wait.private = poll;
+ poll->wait.private = req;
INIT_LIST_HEAD(&req->list);
--
2.25.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/3] io_uring: abstract out main poll wake handler
2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
In preparation for having multiple poll waitqueues, abstract out the
main wake handler so we can call it with the desired data.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 74 +++++++++++++++++++++++++++------------------------
1 file changed, 39 insertions(+), 35 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 08ffeb7df4f5..9cd2ce3b8ad9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3622,17 +3622,11 @@ static void io_poll_trigger_evfd(struct io_wq_work **workptr)
io_put_req(req);
}
-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
- void *key)
+static void __io_poll_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+ __poll_t mask)
{
- struct io_kiocb *req = wait->private;
- struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
- __poll_t mask = key_to_poll(key);
-
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & poll->events))
- return 0;
+ unsigned long flags;
list_del_init(&poll->wait.entry);
@@ -3642,40 +3636,50 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* If we have a link timeout we're going to need the completion_lock
* for finalizing the request, mark us as having grabbed that already.
*/
- if (mask) {
- unsigned long flags;
+ if (llist_empty(&ctx->poll_llist) && !req->io &&
+ spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+ bool trigger_ev;
- if (llist_empty(&ctx->poll_llist) &&
- spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- bool trigger_ev;
-
- hash_del(&req->hash_node);
- io_poll_complete(req, mask, 0);
+ hash_del(&req->hash_node);
+ io_poll_complete(req, mask, 0);
- trigger_ev = io_should_trigger_evfd(ctx);
- if (trigger_ev && eventfd_signal_count()) {
- trigger_ev = false;
- req->work.func = io_poll_trigger_evfd;
- } else {
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- req = NULL;
- }
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- __io_cqring_ev_posted(ctx, trigger_ev);
+ trigger_ev = io_should_trigger_evfd(ctx);
+ if (trigger_ev && eventfd_signal_count()) {
+ trigger_ev = false;
+ req->work.func = io_poll_trigger_evfd;
} else {
- req->result = mask;
- req->llist_node.next = NULL;
- /* if the list wasn't empty, we're done */
- if (!llist_add(&req->llist_node, &ctx->poll_llist))
- req = NULL;
- else
- req->work.func = io_poll_flush;
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
+ req = NULL;
}
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ __io_cqring_ev_posted(ctx, trigger_ev);
+ } else {
+ req->result = mask;
+ req->llist_node.next = NULL;
+ /* if the list wasn't empty, we're done */
+ if (!llist_add(&req->llist_node, &ctx->poll_llist))
+ req = NULL;
+ else
+ req->work.func = io_poll_flush;
}
+
if (req)
io_queue_async_work(req);
+}
+
+static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = &req->poll;
+ __poll_t mask = key_to_poll(key);
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+ __io_poll_wake(req, &req->poll, mask);
return 1;
}
--
2.25.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
@ 2020-02-12 20:25 ` Jens Axboe
2020-02-13 15:50 ` Pavel Begunkov
2 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2020-02-12 20:25 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Some file descriptors use separate waitqueues for their f_ops->poll()
handler, most commonly one for read and one for write. The io_uring
poll implementation doesn't work with that, as the 2nd poll_wait()
call will cause the io_uring poll request to -EINVAL.
This is particularly a problem now that pipes were switched to using
multiple wait queues (commit 0ddad21d3e99), but it also affects tty
devices and /dev/random as well. This is a big problem for event loops
where some file descriptors work, and others don't.
With this fix, io_uring handles multiple waitqueues.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 74 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9cd2ce3b8ad9..9f00f30e1790 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3440,10 +3440,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
#endif
}
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+ struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+ if (poll && poll->head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&poll->head->lock, flags);
+ list_del_init(&poll->wait.entry);
+ if (poll->wait.private)
+ refcount_dec(&req->refs);
+ spin_unlock_irqrestore(&poll->head->lock, flags);
+ }
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
+ io_poll_remove_double(req);
+
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
@@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & poll->events))
return 0;
+ io_poll_remove_double(req);
__io_poll_wake(req, &req->poll, mask);
return 1;
}
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct io_kiocb *req = wait->private;
+ struct io_poll_iocb *poll = (void *) req->io;
+ __poll_t mask = key_to_poll(key);
+ bool done = true;
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & poll->events))
+ return 0;
+
+ if (req->poll.head) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&req->poll.head->lock, flags);
+ done = list_empty(&req->poll.wait.entry);
+ if (!done)
+ list_del_init(&req->poll.wait.entry);
+ spin_unlock_irqrestore(&req->poll.head->lock, flags);
+ }
+ if (!done)
+ __io_poll_wake(req, poll, mask);
+ refcount_dec(&req->refs);
+ return 1;
+}
+
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
@@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+ struct io_kiocb *req = pt->req;
+ struct io_poll_iocb *poll = &req->poll;
- if (unlikely(pt->req->poll.head)) {
- pt->error = -EINVAL;
- return;
+ /*
+ * If poll->head is already set, it's because the file being polled
+ * use multiple waitqueues for poll handling (eg one for read, one
+ * for write). Setup a separate io_poll_iocb if this happens.
+ */
+ if (unlikely(poll->head)) {
+ /* already have a 2nd entry, fail a third attempt */
+ if (req->io) {
+ pt->error = -EINVAL;
+ return;
+ }
+ poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+ if (!poll) {
+ pt->error = -ENOMEM;
+ return;
+ }
+ poll->done = false;
+ poll->canceled = false;
+ poll->events = req->poll.events;
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
+ refcount_inc(&req->refs);
+ poll->wait.private = req;
+ req->io = (void *) poll;
}
pt->error = 0;
- pt->req->poll.head = head;
- add_wait_queue(head, &pt->req->poll.wait);
+ poll->head = head;
+ add_wait_queue(head, &poll->wait);
}
static void io_poll_req_insert(struct io_kiocb *req)
@@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
}
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
+ io_poll_remove_double(req);
io_poll_complete(req, mask, 0);
}
spin_unlock_irq(&ctx->completion_lock);
--
2.25.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
@ 2020-02-13 15:50 ` Pavel Begunkov
0 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2020-02-13 15:50 UTC (permalink / raw)
To: Jens Axboe, io-uring
Hi,
There is a couple of comments below
On 2/12/2020 11:25 PM, Jens Axboe wrote:
> Some file descriptors use separate waitqueues for their f_ops->poll()
> handler, most commonly one for read and one for write. The io_uring
> poll implementation doesn't work with that, as the 2nd poll_wait()
> call will cause the io_uring poll request to -EINVAL.
>
> This is particularly a problem now that pipes were switched to using
> multiple wait queues (commit 0ddad21d3e99), but it also affects tty
> devices and /dev/random as well. This is a big problem for event loops
> where some file descriptors work, and others don't.
>
> With this fix, io_uring handles multiple waitqueues.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 74 insertions(+), 5 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 9cd2ce3b8ad9..9f00f30e1790 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -3440,10 +3440,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
> #endif
> }
>
> +static void io_poll_remove_double(struct io_kiocb *req)
> +{
> + struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
> +
> + if (poll && poll->head) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&poll->head->lock, flags);
> + list_del_init(&poll->wait.entry);
> + if (poll->wait.private)
> + refcount_dec(&req->refs);
> + spin_unlock_irqrestore(&poll->head->lock, flags);
> + }
> +}
> +
> static void io_poll_remove_one(struct io_kiocb *req)
> {
> struct io_poll_iocb *poll = &req->poll;
>
> + io_poll_remove_double(req);
> +
> spin_lock(&poll->head->lock);
> WRITE_ONCE(poll->canceled, true);
> if (!list_empty(&poll->wait.entry)) {
> @@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
> if (mask && !(mask & poll->events))
> return 0;
>
> + io_poll_remove_double(req);
> __io_poll_wake(req, &req->poll, mask);
> return 1;
> }
>
> +static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
> + int sync, void *key)
> +{
> + struct io_kiocb *req = wait->private;
> + struct io_poll_iocb *poll = (void *) req->io;
> + __poll_t mask = key_to_poll(key);
> + bool done = true;
> +
> + /* for instances that support it check for an event match first: */
> + if (mask && !(mask & poll->events))
> + return 0;
> +
> + if (req->poll.head) {
Can there be concurrent problems?
1. io_poll_wake() -> io_poll_remove_double() is working
awhile the second io_poll_queue_proc() is called.
Then there will be a race for req->io
2. concurrent io_poll_wake() and io_poll_double_wake()
> + unsigned long flags;
> +
> + spin_lock_irqsave(&req->poll.head->lock, flags);
> + done = list_empty(&req->poll.wait.entry);
> + if (!done)
> + list_del_init(&req->poll.wait.entry);
> + spin_unlock_irqrestore(&req->poll.head->lock, flags);
> + }
> + if (!done)
> + __io_poll_wake(req, poll, mask);
It's always false if we didn't hit the block under `req->poll.head`, so
it may be placed there along with @done declaration.
> + refcount_dec(&req->refs);
> + return 1;
> +}
> +
> struct io_poll_table {
> struct poll_table_struct pt;
> struct io_kiocb *req;
> @@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
> struct poll_table_struct *p)
> {
May this be called concurrently? (at least in theory)
> struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
> + struct io_kiocb *req = pt->req;
> + struct io_poll_iocb *poll = &req->poll;
>
> - if (unlikely(pt->req->poll.head)) {
> - pt->error = -EINVAL;
> - return;
> + /*
> + * If poll->head is already set, it's because the file being polled
> + * use multiple waitqueues for poll handling (eg one for read, one
> + * for write). Setup a separate io_poll_iocb if this happens.
> + */
> + if (unlikely(poll->head)) {
> + /* already have a 2nd entry, fail a third attempt */
> + if (req->io) {
> + pt->error = -EINVAL;
> + return;
> + }
> + poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
Don't see where this is freed
> + if (!poll) {
> + pt->error = -ENOMEM;
> + return;
> + }
> + poll->done = false;
> + poll->canceled = false;
> + poll->events = req->poll.events;
> + INIT_LIST_HEAD(&poll->wait.entry);
> + init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
> + refcount_inc(&req->refs);
> + poll->wait.private = req;
> + req->io = (void *) poll;
> }
>
> pt->error = 0;
> - pt->req->poll.head = head;
> - add_wait_queue(head, &pt->req->poll.wait);
> + poll->head = head;
> + add_wait_queue(head, &poll->wait);
> }
>
> static void io_poll_req_insert(struct io_kiocb *req)
> @@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
> }
> if (mask) { /* no async, we'd stolen it */
> ipt.error = 0;
> + io_poll_remove_double(req);
> io_poll_complete(req, mask, 0);
> }
> spin_unlock_irq(&ctx->completion_lock);
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-02-13 15:50 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-02-12 20:25 [PATCHSET v2 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-12 20:25 ` [PATCH 1/3] io_uring: store io_kiocb in wait->private Jens Axboe
2020-02-12 20:25 ` [PATCH 2/3] io_uring: abstract out main poll wake handler Jens Axboe
2020-02-12 20:25 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2020-02-13 15:50 ` Pavel Begunkov
-- strict thread matches above, loose matches on Subject: below --
2020-02-10 20:56 [PATCHSET 0/3] io_uring: make POLL_ADD support multiple waitqs Jens Axboe
2020-02-10 20:56 ` [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users Jens Axboe
2020-02-11 20:22 ` Pavel Begunkov
2020-02-11 20:27 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox