* [PATCH 1/4] kernel: add task_sigpending() helper
2020-10-26 20:32 [PATCHSET v6a 0/4] Add support for TIF_NOTIFY_SIGNAL Jens Axboe
@ 2020-10-26 20:32 ` Jens Axboe
2020-10-26 20:32 ` [PATCH 2/4] kernel: add support for TIF_NOTIFY_SIGNAL Jens Axboe
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-26 20:32 UTC (permalink / raw)
To: linux-kernel, io-uring; +Cc: peterz, oleg, tglx, Jens Axboe
This is in preparation for maintaining signal_pending() as the decider
of whether or not a schedule() loop should be broken, or continue
sleeping. This is different than the core signal use cases, where we
really want to know if an actual signal is pending or not.
task_sigpending() returns non-zero if TIF_SIGPENDING is set.
Only core kernel use cases should care about the distinction between
the two, make sure those use the task_sigpending() helper.
Reviewed-by: Thomas Gleixner <[email protected]>
Reviewed-by: Oleg Nesterov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/sched/signal.h | 9 +++++++--
kernel/events/uprobes.c | 2 +-
kernel/signal.c | 8 ++++----
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1bad18a1d8ba..404145dc536e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -353,11 +353,16 @@ static inline int restart_syscall(void)
return -ERESTARTNOINTR;
}
-static inline int signal_pending(struct task_struct *p)
+static inline int task_sigpending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
+static inline int signal_pending(struct task_struct *p)
+{
+ return task_sigpending(p);
+}
+
static inline int __fatal_signal_pending(struct task_struct *p)
{
return unlikely(sigismember(&p->pending.signal, SIGKILL));
@@ -365,7 +370,7 @@ static inline int __fatal_signal_pending(struct task_struct *p)
static inline int fatal_signal_pending(struct task_struct *p)
{
- return signal_pending(p) && __fatal_signal_pending(p);
+ return task_sigpending(p) && __fatal_signal_pending(p);
}
static inline int signal_pending_state(long state, struct task_struct *p)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 00b0358739ab..bf9edd8d75be 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1973,7 +1973,7 @@ bool uprobe_deny_signal(void)
WARN_ON_ONCE(utask->state != UTASK_SSTEP);
- if (signal_pending(t)) {
+ if (task_sigpending(t)) {
spin_lock_irq(&t->sighand->siglock);
clear_tsk_thread_flag(t, TIF_SIGPENDING);
spin_unlock_irq(&t->sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index a38b3edc6851..9f86246a8637 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -983,7 +983,7 @@ static inline bool wants_signal(int sig, struct task_struct *p)
if (task_is_stopped_or_traced(p))
return false;
- return task_curr(p) || !signal_pending(p);
+ return task_curr(p) || !task_sigpending(p);
}
static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
@@ -2822,7 +2822,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
/* Remove the signals this thread can handle. */
sigandsets(&retarget, &retarget, &t->blocked);
- if (!signal_pending(t))
+ if (!task_sigpending(t))
signal_wake_up(t, 0);
if (sigisemptyset(&retarget))
@@ -2856,7 +2856,7 @@ void exit_signals(struct task_struct *tsk)
cgroup_threadgroup_change_end(tsk);
- if (!signal_pending(tsk))
+ if (!task_sigpending(tsk))
goto out;
unblocked = tsk->blocked;
@@ -2900,7 +2900,7 @@ long do_no_restart_syscall(struct restart_block *param)
static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
{
- if (signal_pending(tsk) && !thread_group_empty(tsk)) {
+ if (task_sigpending(tsk) && !thread_group_empty(tsk)) {
sigset_t newblocked;
/* A set of now blocked but previously unblocked signals. */
sigandnsets(&newblocked, newset, ¤t->blocked);
--
2.29.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/4] kernel: add support for TIF_NOTIFY_SIGNAL
2020-10-26 20:32 [PATCHSET v6a 0/4] Add support for TIF_NOTIFY_SIGNAL Jens Axboe
2020-10-26 20:32 ` [PATCH 1/4] kernel: add task_sigpending() helper Jens Axboe
@ 2020-10-26 20:32 ` Jens Axboe
2020-10-26 20:32 ` [PATCH 3/4] x86: wire up TIF_NOTIFY_SIGNAL Jens Axboe
2020-10-26 20:32 ` [PATCH 4/4] task_work: use TIF_NOTIFY_SIGNAL if available Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-26 20:32 UTC (permalink / raw)
To: linux-kernel, io-uring; +Cc: peterz, oleg, tglx, Jens Axboe
This adds TIF_NOTIFY_SIGNAL handling in the generic code, which if set,
will return true if signal_pending() is used in a wait loop. That causes
an exit of the loop so that notify_signal tracehooks can be run. If the
wait loop is currently inside a system call, the system call is restarted
once task_work has been processed.
In preparation for only having arch_do_signal() handle syscall restarts
if _TIF_SIGPENDING isn't set, rename it to arch_do_signal_or_restart().
Pass in a boolean that tells the arch signal handler if it should attempt
to get a signal, or just process a potential syscall restart.
For !CONFIG_GENERIC_ENTRY archs, we add the TIF_NOTIFY_SIGNAL handling
to get_signal(). This is done to minimize the needed arch changes to
support this feature.
Reviewed-by: Oleg Nesterov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
arch/x86/kernel/signal.c | 4 ++--
include/linux/entry-common.h | 11 ++++++++---
include/linux/entry-kvm.h | 4 ++--
include/linux/sched/signal.h | 11 ++++++++++-
include/linux/tracehook.h | 27 +++++++++++++++++++++++++++
kernel/entry/common.c | 14 +++++++++++---
kernel/entry/kvm.c | 3 +++
kernel/signal.c | 14 ++++++++++++++
8 files changed, 77 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index be0d7d4152ec..ea794a083c44 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -804,11 +804,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*/
-void arch_do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
{
struct ksignal ksig;
- if (get_signal(&ksig)) {
+ if (has_signal && get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
handle_signal(&ksig, regs);
return;
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 474f29638d2c..b9711e813ec2 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -37,6 +37,10 @@
# define _TIF_UPROBE (0)
#endif
+#ifndef _TIF_NOTIFY_SIGNAL
+# define _TIF_NOTIFY_SIGNAL (0)
+#endif
+
/*
* TIF flags handled in syscall_enter_from_user_mode()
*/
@@ -69,7 +73,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \
+ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
@@ -259,12 +263,13 @@ static __always_inline void arch_exit_to_user_mode(void) { }
#endif
/**
- * arch_do_signal - Architecture specific signal delivery function
+ * arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs
+ * @has_signal: actual signal to handle
*
* Invoked from exit_to_user_mode_loop().
*/
-void arch_do_signal(struct pt_regs *regs);
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
/**
* arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 0cef17afb41a..9b93f8584ff7 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -11,8 +11,8 @@
# define ARCH_XFER_TO_GUEST_MODE_WORK (0)
#endif
-#define XFER_TO_GUEST_MODE_WORK \
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define XFER_TO_GUEST_MODE_WORK \
+ (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 404145dc536e..bd5afa076189 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -360,6 +360,15 @@ static inline int task_sigpending(struct task_struct *p)
static inline int signal_pending(struct task_struct *p)
{
+#if defined(TIF_NOTIFY_SIGNAL)
+ /*
+ * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
+ * behavior in terms of ensuring that we break out of wait loops
+ * so that notify signal callbacks can be processed.
+ */
+ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
+ return 1;
+#endif
return task_sigpending(p);
}
@@ -507,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);
static inline void restore_saved_sigmask_unless(bool interrupted)
{
if (interrupted)
- WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+ WARN_ON(!signal_pending(current));
else
restore_saved_sigmask();
}
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b480e1a07ed8..f7d82e4fafd6 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -198,4 +198,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
blkcg_maybe_throttle_current();
}
+/*
+ * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This
+ * is currently used by TWA_SIGNAL based task_work, which requires breaking
+ * wait loops to ensure that task_work is noticed and run.
+ */
+static inline void tracehook_notify_signal(void)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
+ smp_mb__after_atomic();
+ if (current->task_works)
+ task_work_run();
+#endif
+}
+
+/*
+ * Called when we have work to process from exit_to_user_mode_loop()
+ */
+static inline void set_notify_signal(struct task_struct *task)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE))
+ kick_process(task);
+#endif
+}
+
#endif /* <linux/tracehook.h> */
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 2b8366693d5c..54c07d386aeb 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -135,7 +135,15 @@ static __always_inline void exit_to_user_mode(void)
}
/* Workaround to allow gradual conversion of architecture code */
-void __weak arch_do_signal(struct pt_regs *regs) { }
+void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { }
+
+static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
+{
+ if (ti_work & _TIF_NOTIFY_SIGNAL)
+ tracehook_notify_signal();
+
+ arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
+}
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work)
@@ -157,8 +165,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
if (ti_work & _TIF_PATCH_PENDING)
klp_update_patch_state(current);
- if (ti_work & _TIF_SIGPENDING)
- arch_do_signal(regs);
+ if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ handle_signal_work(regs, ti_work);
if (ti_work & _TIF_NOTIFY_RESUME) {
tracehook_notify_resume(regs);
diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
index b6678a5e3cf6..49972ee99aff 100644
--- a/kernel/entry/kvm.c
+++ b/kernel/entry/kvm.c
@@ -8,6 +8,9 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
do {
int ret;
+ if (ti_work & _TIF_NOTIFY_SIGNAL)
+ tracehook_notify_signal();
+
if (ti_work & _TIF_SIGPENDING) {
kvm_handle_signal_exit(vcpu);
return -EINTR;
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f86246a8637..f67ea9a08ac0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2529,6 +2529,20 @@ bool get_signal(struct ksignal *ksig)
struct signal_struct *signal = current->signal;
int signr;
+ /*
+ * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so
+ * that the arch handlers don't all have to do it. If we get here
+ * without TIF_SIGPENDING, just exit after running signal work.
+ */
+#ifdef TIF_NOTIFY_SIGNAL
+ if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL))
+ tracehook_notify_signal();
+ if (!task_sigpending(current))
+ return false;
+ }
+#endif
+
if (unlikely(uprobe_deny_signal()))
return false;
--
2.29.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/4] x86: wire up TIF_NOTIFY_SIGNAL
2020-10-26 20:32 [PATCHSET v6a 0/4] Add support for TIF_NOTIFY_SIGNAL Jens Axboe
2020-10-26 20:32 ` [PATCH 1/4] kernel: add task_sigpending() helper Jens Axboe
2020-10-26 20:32 ` [PATCH 2/4] kernel: add support for TIF_NOTIFY_SIGNAL Jens Axboe
@ 2020-10-26 20:32 ` Jens Axboe
2020-10-26 20:32 ` [PATCH 4/4] task_work: use TIF_NOTIFY_SIGNAL if available Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-26 20:32 UTC (permalink / raw)
To: linux-kernel, io-uring; +Cc: peterz, oleg, tglx, Jens Axboe
All we need to do is define _TIF_NOTIFY_SIGNAL, the generic entry code
already handles everything else for us.
Signed-off-by: Jens Axboe <[email protected]>
---
arch/x86/include/asm/thread_info.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 44733a4bfc42..b3b32f4c4d2d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_SLD 18 /* Restore split lock detection on context switch */
+#define TIF_NOTIFY_SIGNAL 19 /* signal notifications exist */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -122,6 +123,7 @@ struct thread_info {
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_SLD (1 << TIF_SLD)
+#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
--
2.29.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/4] task_work: use TIF_NOTIFY_SIGNAL if available
2020-10-26 20:32 [PATCHSET v6a 0/4] Add support for TIF_NOTIFY_SIGNAL Jens Axboe
` (2 preceding siblings ...)
2020-10-26 20:32 ` [PATCH 3/4] x86: wire up TIF_NOTIFY_SIGNAL Jens Axboe
@ 2020-10-26 20:32 ` Jens Axboe
3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-26 20:32 UTC (permalink / raw)
To: linux-kernel, io-uring; +Cc: peterz, oleg, tglx, Jens Axboe, Roman Gershman
If the arch supports TIF_NOTIFY_SIGNAL, then use that for TWA_SIGNAL as
it's more efficient than using the signal delivery method. This is
especially true on threaded applications, where ->sighand is shared
across threads, but it's also lighter weight on non-shared cases.
io_uring is a heavy consumer of TWA_SIGNAL based task_work. On my test
box, even just using 16 threads shows a nice improvement running an
io_uring based echo server.
stock kernel:
0.01% <= 0.1 milliseconds
95.86% <= 0.2 milliseconds
98.27% <= 0.3 milliseconds
99.71% <= 0.4 milliseconds
100.00% <= 0.5 milliseconds
100.00% <= 0.6 milliseconds
100.00% <= 0.7 milliseconds
100.00% <= 0.8 milliseconds
100.00% <= 0.9 milliseconds
100.00% <= 1.0 milliseconds
100.00% <= 1.1 milliseconds
100.00% <= 2 milliseconds
100.00% <= 3 milliseconds
100.00% <= 3 milliseconds
1378930.00 requests per second
~1600% CPU
1.38M requests/second, and all 16 CPUs are maxed out.
patched kernel:
0.01% <= 0.1 milliseconds
98.24% <= 0.2 milliseconds
99.47% <= 0.3 milliseconds
99.99% <= 0.4 milliseconds
100.00% <= 0.5 milliseconds
100.00% <= 0.6 milliseconds
100.00% <= 0.7 milliseconds
100.00% <= 0.8 milliseconds
100.00% <= 0.9 milliseconds
100.00% <= 1.2 milliseconds
1666111.38 requests per second
~1450% CPU
1.67M requests/second, and we're no longer just hammering on the sighand
lock. The original reporter states:
"For 5.7.15 my benchmark achieves 1.6M qps and system cpu is at ~80%.
for 5.7.16 or later it achieves only 1M qps and the system cpu is is
at ~100%"
with the only difference there being that TWA_SIGNAL is used
unconditionally in 5.7.16, since we need it to be able to solve an
inability to run task_work if the application is waiting in the kernel
already on an event that needs task_work run to be satisfied. Also
see commit 0ba9c9edcd15.
Reported-by: Roman Gershman <[email protected]>
Reviewed-by: Oleg Nesterov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
kernel/task_work.c | 41 +++++++++++++++++++++++++++++------------
1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 8d6e1217c451..15b087286bea 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -5,6 +5,34 @@
static struct callback_head work_exited; /* all we need is ->next == NULL */
+/*
+ * TWA_SIGNAL signaling - use TIF_NOTIFY_SIGNAL, if available, as it's faster
+ * than TIF_SIGPENDING as there's no dependency on ->sighand. The latter is
+ * shared for threads, and can cause contention on sighand->lock. Even for
+ * the non-threaded case TIF_NOTIFY_SIGNAL is more efficient, as no locking
+ * or IRQ disabling is involved for notification (or running) purposes.
+ */
+static void task_work_notify_signal(struct task_struct *task)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ set_notify_signal(task);
+#else
+ unsigned long flags;
+
+ /*
+ * Only grab the sighand lock if we don't already have some
+ * task_work pending. This pairs with the smp_store_mb()
+ * in get_signal(), see comment there.
+ */
+ if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) &&
+ lock_task_sighand(task, &flags)) {
+ task->jobctl |= JOBCTL_TASK_WORK;
+ signal_wake_up(task, 0);
+ unlock_task_sighand(task, &flags);
+ }
+#endif
+}
+
/**
* task_work_add - ask the @task to execute @work->func()
* @task: the task which should run the callback
@@ -33,7 +61,6 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
enum task_work_notify_mode notify)
{
struct callback_head *head;
- unsigned long flags;
do {
head = READ_ONCE(task->task_works);
@@ -49,17 +76,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
set_notify_resume(task);
break;
case TWA_SIGNAL:
- /*
- * Only grab the sighand lock if we don't already have some
- * task_work pending. This pairs with the smp_store_mb()
- * in get_signal(), see comment there.
- */
- if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) &&
- lock_task_sighand(task, &flags)) {
- task->jobctl |= JOBCTL_TASK_WORK;
- signal_wake_up(task, 0);
- unlock_task_sighand(task, &flags);
- }
+ task_work_notify_signal(task);
break;
default:
WARN_ON_ONCE(1);
--
2.29.0
^ permalink raw reply related [flat|nested] 6+ messages in thread