On 10/02/2020 23:56, Jens Axboe wrote: > Some file descriptors use separate waitqueues for their f_ops->poll() > handler, most commonly one for read and one for write. The io_uring > poll implementation doesn't work with that, as the 2nd poll_wait() > call will cause the io_uring poll request to -EINVAL. > > This is particularly a problem now that pipes were switched to using > multiple wait queues (commit 0ddad21d3e99), but it also affects tty > devices and /dev/random as well. This is a big problem for event loops > where some file descriptors work, and others don't. > > With this fix, io_uring handles multiple waitqueues. > > Signed-off-by: Jens Axboe > --- > fs/io_uring.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 70 insertions(+), 5 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 123e6424a050..72bc378edebc 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -3439,10 +3439,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt, > #endif > } > > +static void io_poll_remove_double(struct io_kiocb *req) > +{ > + struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io; > + > + if (poll && poll->head) { > + unsigned long flags; > + > + spin_lock_irqsave(&poll->head->lock, flags); > + list_del_init(&poll->wait.entry); > + if (poll->wait.private) > + refcount_dec(&req->refs); > + spin_unlock_irqrestore(&poll->head->lock, flags); > + } > +} > + > static void io_poll_remove_one(struct io_kiocb *req) > { > struct io_poll_iocb *poll = &req->poll; > > + io_poll_remove_double(req); > + > spin_lock(&poll->head->lock); > WRITE_ONCE(poll->canceled, true); > if (!list_empty(&poll->wait.entry)) { > @@ -3678,10 +3695,39 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, > if (mask && !(mask & poll->events)) > return 0; > > + io_poll_remove_double(req); > __io_poll_wake(req, &req->poll, mask); > return 1; > } > > +static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, > + int sync, void *key) > +{ > + struct io_kiocb *req = wait->private; > + struct io_poll_iocb *poll = (void *) req->io; > + __poll_t mask = key_to_poll(key); > + bool done = true; > + int ret; > + > + /* for instances that support it check for an event match first: */ > + if (mask && !(mask & poll->events)) > + return 0; > + > + if (req->poll.head) { > + unsigned long flags; > + > + spin_lock_irqsave(&req->poll.head->lock, flags); > + done = list_empty(&req->poll.wait.entry); > + if (!done) > + list_del_init(&req->poll.wait.entry); > + spin_unlock_irqrestore(&req->poll.head->lock, flags); > + } > + if (!done) > + __io_poll_wake(req, poll, mask); > + refcount_dec(&req->refs); > + return ret; > +} > + > struct io_poll_table { > struct poll_table_struct pt; > struct io_kiocb *req; > @@ -3692,15 +3738,33 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, > struct poll_table_struct *p) > { > struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); > + struct io_kiocb *req = pt->req; > + struct io_poll_iocb *poll = &req->poll; > > - if (unlikely(pt->req->poll.head)) { > - pt->error = -EINVAL; > - return; > + /* > + * If poll->head is already set, it's because the file being polled > + * use multiple waitqueues for poll handling (eg one for read, one > + * for write). Setup a separate io_poll_iocb if this happens. > + */ > + if (unlikely(poll->head)) { I'll keep looking, but I guess there should be : if (req->io) return -EINVAL; > + poll = kmalloc(sizeof(*poll), GFP_ATOMIC); > + if (!poll) { > + pt->error = -ENOMEM; > + return; > + } > + poll->done = false; > + poll->canceled = false; > + poll->events = req->poll.events; > + INIT_LIST_HEAD(&poll->wait.entry); > + init_waitqueue_func_entry(&poll->wait, io_poll_double_wake); > + refcount_inc(&req->refs); > + poll->wait.private = req; > + req->io = (void *) poll; > } > > pt->error = 0; > - pt->req->poll.head = head; > - add_wait_queue(head, &pt->req->poll.wait); > + poll->head = head; > + add_wait_queue(head, &poll->wait); > } > > static void io_poll_req_insert(struct io_kiocb *req) > @@ -3777,6 +3841,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt) > } > if (mask) { /* no async, we'd stolen it */ > ipt.error = 0; > + io_poll_remove_double(req); > io_poll_complete(req, mask, 0); > } > spin_unlock_irq(&ctx->completion_lock); > -- Pavel Begunkov