* [PATCHSET 0/2] Use signal based task_work running
@ 2020-06-30 18:45 Jens Axboe
2020-06-30 18:45 ` [PATCH 1/2] task_work: teach task_work_add() to do signal_wake_up() Jens Axboe
2020-06-30 18:45 ` [PATCH 2/2] io_uring: use signal based task_work running Jens Axboe
0 siblings, 2 replies; 3+ messages in thread
From: Jens Axboe @ 2020-06-30 18:45 UTC (permalink / raw)
To: io-uring; +Cc: oleg, peterz, linux-kernel
This fixes a regression in 5.7 with io_uring, introduced by the
addition of using task_work for certain async events. Details are
in patch 2/2.
--
Jens Axboe
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] task_work: teach task_work_add() to do signal_wake_up()
2020-06-30 18:45 [PATCHSET 0/2] Use signal based task_work running Jens Axboe
@ 2020-06-30 18:45 ` Jens Axboe
2020-06-30 18:45 ` [PATCH 2/2] io_uring: use signal based task_work running Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-06-30 18:45 UTC (permalink / raw)
To: io-uring; +Cc: oleg, peterz, linux-kernel, stable, Jens Axboe
From: Oleg Nesterov <[email protected]>
So that the target task will exit the wait_event_interruptible-like
loop and call task_work_run() asap.
The patch turns "bool notify" into 0,TWA_RESUME,TWA_SIGNAL enum, the
new TWA_SIGNAL flag implies signal_wake_up(). However, it needs to
avoid the race with recalc_sigpending(), so the patch also adds the
new JOBCTL_TASK_WORK bit included in JOBCTL_PENDING_MASK.
TODO: once this patch is merged we need to change all current users
of task_work_add(notify = true) to use TWA_RESUME.
Cc: [email protected] # v5.7
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/sched/jobctl.h | 4 +++-
include/linux/task_work.h | 5 ++++-
kernel/signal.c | 10 +++++++---
kernel/task_work.c | 16 ++++++++++++++--
4 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index fa067de9f1a9..d2b4204ba4d3 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -19,6 +19,7 @@ struct task_struct;
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
+#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -28,9 +29,10 @@ struct task_struct;
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
+#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT)
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
-#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK)
extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
extern void task_clear_jobctl_trapping(struct task_struct *task);
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index bd9a6a91c097..0fb93aafa478 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
twork->func = func;
}
-int task_work_add(struct task_struct *task, struct callback_head *twork, bool);
+#define TWA_RESUME 1
+#define TWA_SIGNAL 2
+int task_work_add(struct task_struct *task, struct callback_head *twork, int);
+
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ca48cc5da76..ee22ec78fd6d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig)
struct signal_struct *signal = current->signal;
int signr;
- if (unlikely(current->task_works))
- task_work_run();
-
if (unlikely(uprobe_deny_signal()))
return false;
@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig)
relock:
spin_lock_irq(&sighand->siglock);
+ current->jobctl &= ~JOBCTL_TASK_WORK;
+ if (unlikely(current->task_works)) {
+ spin_unlock_irq(&sighand->siglock);
+ task_work_run();
+ goto relock;
+ }
+
/*
* Every stopped thread goes here after wakeup. Check to see if
* we should notify the parent, prepare_signal(SIGCONT) encodes
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 825f28259a19..5c0848ca1287 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* 0 if succeeds or -ESRCH.
*/
int
-task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
+task_work_add(struct task_struct *task, struct callback_head *work, int notify)
{
struct callback_head *head;
+ unsigned long flags;
do {
head = READ_ONCE(task->task_works);
@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
work->next = head;
} while (cmpxchg(&task->task_works, head, work) != head);
- if (notify)
+ switch (notify) {
+ case TWA_RESUME:
set_notify_resume(task);
+ break;
+ case TWA_SIGNAL:
+ if (lock_task_sighand(task, &flags)) {
+ task->jobctl |= JOBCTL_TASK_WORK;
+ signal_wake_up(task, 0);
+ unlock_task_sighand(task, &flags);
+ }
+ break;
+ }
+
return 0;
}
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] io_uring: use signal based task_work running
2020-06-30 18:45 [PATCHSET 0/2] Use signal based task_work running Jens Axboe
2020-06-30 18:45 ` [PATCH 1/2] task_work: teach task_work_add() to do signal_wake_up() Jens Axboe
@ 2020-06-30 18:45 ` Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-06-30 18:45 UTC (permalink / raw)
To: io-uring; +Cc: oleg, peterz, linux-kernel, Jens Axboe, stable
Since 5.7, we've been using task_work to trigger async running of
requests in the context of the original task. This generally works
great, but there's a case where if the task is currently blocked
in the kernel waiting on a condition to become true, it won't process
task_work. Even though the task is woken, it just checks whatever
condition it's waiting on, and goes back to sleep if it's still false.
This is a problem if that very condition only becomes true when that
task_work is run. An example of that is the task registering an eventfd
with io_uring, and it's now blocked waiting on an eventfd read. That
read could depend on a completion event, and that completion event
won't get trigged until task_work has been run.
Use the TWA_SIGNAL notification for task_work, so that we ensure that
the task always runs the work when queued.
Cc: [email protected] # v5.7
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e507737f044e..476f03b42777 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4072,6 +4072,23 @@ struct io_poll_table {
int error;
};
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
+ int notify)
+{
+ const bool is_sqthread = (req->ctx->flags & IORING_SETUP_SQPOLL) != 0;
+ struct task_struct *tsk = req->task;
+ int ret;
+
+ if (is_sqthread)
+ notify = 0;
+
+ ret = task_work_add(tsk, cb, notify);
+
+ if (!ret && is_sqthread)
+ wake_up_process(tsk);
+ return ret;
+}
+
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, task_work_func_t func)
{
@@ -4095,13 +4112,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = task_work_add(tsk, &req->task_work, true);
+ ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
if (unlikely(ret)) {
WRITE_ONCE(poll->canceled, true);
tsk = io_wq_get_task(req->ctx->io_wq);
- task_work_add(tsk, &req->task_work, true);
+ task_work_add(tsk, &req->task_work, 0);
+ wake_up_process(tsk);
}
- wake_up_process(tsk);
return 1;
}
@@ -6182,15 +6199,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
+ /* make sure we run task_work before checking for signals */
if (current->task_works)
task_work_run();
- if (io_should_wake(&iowq, false))
- break;
- schedule();
if (signal_pending(current)) {
ret = -EINTR;
break;
}
+ if (io_should_wake(&iowq, false))
+ break;
+ schedule();
} while (1);
finish_wait(&ctx->wait, &iowq.wq);
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-06-30 18:45 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-30 18:45 [PATCHSET 0/2] Use signal based task_work running Jens Axboe
2020-06-30 18:45 ` [PATCH 1/2] task_work: teach task_work_add() to do signal_wake_up() Jens Axboe
2020-06-30 18:45 ` [PATCH 2/2] io_uring: use signal based task_work running Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox