* [RESEND RFC PATCH v8 1/3] selftests/x86: sysret_rip: Handle syscall on the Intel FRED architecture
2023-02-20 3:09 [RESEND RFC PATCH v8 0/3] Intel FRED architecture support for the sysret_rip selftest Ammar Faizi
@ 2023-02-20 3:09 ` Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 2/3] selftests/x86: sysret_rip: Add more tests to verify the 'syscall' behavior Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 3/3] selftests/x86: sysret_rip: Test SYSRET with a signal handler Ammar Faizi
2 siblings, 0 replies; 4+ messages in thread
From: Ammar Faizi @ 2023-02-20 3:09 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin, Dave Hansen, Xin Li
Cc: Ammar Faizi, Andrew Cooper, Andy Lutomirski, Borislav Petkov,
Thomas Gleixner, Dave Hansen, Peter Zijlstra, Kirill A. Shutemov,
x86 Mailing List, Linux Kernel Mailing List,
Linux Kselftest Mailing List, GNU/Weeb Mailing List
The current selftest asserts %r11 == %rflags after the 'syscall' returns
to userspace. However, such an assertion doesn't apply to the Intel FRED
system because, in that system, the 'syscall' instruction does not set
%r11=%rflags and %rcx=%rip.
Handle the FRED system case. Now, test that:
A) 'syscall' in a FRED system preserves %rcx and %r11.
B) 'syscall' in a non-FRED system sets %rcx=%rip and %r11=%rflags.
Note for the '__raise()' helper:
Those tests must manipulate registers before the 'syscall' instruction
is invoked. However, the current 'raise()' function from libc cannot
accomplish it. Therefore, create a syscall wrapper in inline Assembly to
control them.
Fixes: 660602140103 ("selftests/x86: Add a selftest for SYSRET to noncanonical addresses")
Link: https://lore.kernel.org/lkml/[email protected]
Reported-by: Xin Li <[email protected]>
Co-developed-by: H. Peter Anvin (Intel) <[email protected]>
Signed-off-by: H. Peter Anvin (Intel) <[email protected]>
Signed-off-by: Ammar Faizi <[email protected]>
---
tools/testing/selftests/x86/sysret_rip.c | 122 +++++++++++++++++++++--
1 file changed, 115 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index 84d74be1d90207ab..300104900192d396 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -39,6 +39,112 @@ asm (
extern const char test_page[];
static void const *current_test_page_addr = test_page;
+/*
+ * Arbitrary values.
+ */
+static const unsigned long r11_sentinel = 0xfeedfacedeadbeef;
+static const unsigned long rcx_sentinel = 0x5ca1ab1e0b57ac1e;
+
+/*
+ * An arbitrary *valid* RFLAGS value.
+ */
+static const unsigned long rflags_sentinel = 0x200a93;
+
+enum regs_ok {
+ REGS_UNDEFINED = -1,
+ REGS_SAVED = 0, /* Registers properly preserved (Intel FRED). */
+ REGS_SYSRET = 1 /* Registers match syscall/sysret. */
+};
+
+/*
+ * @rbx should be set to the syscall return %rip.
+ */
+static void check_regs_result(unsigned long r11, unsigned long rcx,
+ unsigned long rbx)
+{
+ static enum regs_ok regs_ok_state = REGS_UNDEFINED;
+ enum regs_ok ret;
+
+ if (r11 == r11_sentinel && rcx == rcx_sentinel) {
+ ret = REGS_SAVED;
+ } else if (r11 == rflags_sentinel && rcx == rbx) {
+ ret = REGS_SYSRET;
+ } else {
+ printf("[FAIL] check_regs_result\n");
+ printf(" r11_sentinel = %#lx; %%r11 = %#lx;\n", r11_sentinel, r11);
+ printf(" rcx_sentinel = %#lx; %%rcx = %#lx;\n", rcx_sentinel, rcx);
+ printf(" rflags_sentinel = %#lx\n", rflags_sentinel);
+ exit(1);
+ }
+
+
+ /*
+ * Test that we don't get a mix of REGS_SAVED and REGS_SYSRET.
+ * It needs at least calling check_regs_result() twice to assert.
+ */
+ if (regs_ok_state == REGS_UNDEFINED) {
+ /*
+ * First time calling check_regs_result().
+ */
+ regs_ok_state = ret;
+ } else {
+ assert(regs_ok_state == ret);
+ }
+}
+
+/*
+ * There are two cases:
+ *
+ * A) 'syscall' in a FRED system preserves %rcx and %r11.
+ * B) 'syscall' in a non-FRED system sets %rcx=%rip and %r11=%rflags.
+ *
+ * When the do_syscall() function is called for the first time,
+ * check_regs_result() will memorize the behavior, either (A) or (B).
+ * Then, the next do_syscall() call will verify that the 'syscall'
+ * behavior is the same.
+ *
+ * This function needs to be called at least twice to assert.
+ */
+static long do_syscall(long nr_syscall, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5, unsigned long arg6)
+{
+ unsigned long rbx;
+ unsigned long rcx = rcx_sentinel;
+ register unsigned long r11 __asm__("%r11") = r11_sentinel;
+ register unsigned long r10 __asm__("%r10") = arg4;
+ register unsigned long r8 __asm__("%r8") = arg5;
+ register unsigned long r9 __asm__("%r9") = arg6;
+
+ __asm__ volatile (
+ "movq -8(%%rsp), %%r12\n\t" // Do not clobber the red zone.
+ "pushq %[rflags_sentinel]\n\t"
+ "popfq\n\t"
+ "movq %%r12, -8(%%rsp)\n\t"
+ "leaq 1f(%%rip), %[rbx]\n\t"
+ "syscall\n"
+ "1:"
+
+ : "+a" (nr_syscall),
+ "+r" (r11),
+ "+c" (rcx),
+ [rbx] "=b" (rbx)
+
+ : [rflags_sentinel] "g" (rflags_sentinel),
+ "D" (arg1), /* %rdi */
+ "S" (arg2), /* %rsi */
+ "d" (arg3), /* %rdx */
+ "r" (r10),
+ "r" (r8),
+ "r" (r9)
+
+ : "r12", "memory"
+ );
+
+ check_regs_result(r11, rcx, rbx);
+ return nr_syscall;
+}
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -88,24 +194,26 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ check_regs_result(ctx->uc_mcontext.gregs[REG_R11],
+ ctx->uc_mcontext.gregs[REG_RCX],
+ ctx->uc_mcontext.gregs[REG_RBX]);
+
/* Set IP and CX to match so that SYSRET can happen. */
ctx->uc_mcontext.gregs[REG_RIP] = rip;
ctx->uc_mcontext.gregs[REG_RCX] = rip;
-
- /* R11 and EFLAGS should already match. */
- assert(ctx->uc_mcontext.gregs[REG_EFL] ==
- ctx->uc_mcontext.gregs[REG_R11]);
-
sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
+}
- return;
+static void __raise(int sig)
+{
+ do_syscall(__NR_kill, getpid(), sig, 0, 0, 0, 0);
}
static void test_sigreturn_to(unsigned long ip)
{
rip = ip;
printf("[RUN]\tsigreturn to 0x%lx\n", ip);
- raise(SIGUSR1);
+ __raise(SIGUSR1);
}
static jmp_buf jmpbuf;
--
Ammar Faizi
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RESEND RFC PATCH v8 2/3] selftests/x86: sysret_rip: Add more tests to verify the 'syscall' behavior
2023-02-20 3:09 [RESEND RFC PATCH v8 0/3] Intel FRED architecture support for the sysret_rip selftest Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 1/3] selftests/x86: sysret_rip: Handle syscall on the Intel FRED architecture Ammar Faizi
@ 2023-02-20 3:09 ` Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 3/3] selftests/x86: sysret_rip: Test SYSRET with a signal handler Ammar Faizi
2 siblings, 0 replies; 4+ messages in thread
From: Ammar Faizi @ 2023-02-20 3:09 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin, Dave Hansen, Xin Li
Cc: Ammar Faizi, Andrew Cooper, Andy Lutomirski, Borislav Petkov,
Thomas Gleixner, Dave Hansen, Peter Zijlstra, Kirill A. Shutemov,
x86 Mailing List, Linux Kernel Mailing List,
Linux Kselftest Mailing List, GNU/Weeb Mailing List
There are two cases:
A) 'syscall' in a FRED system preserves %rcx and %r11.
B) 'syscall' in a non-FRED system sets %rcx=%rip and %r11=%rflags.
When the do_syscall() function is called for the first time, it will
memorize the behavior, either (A) or (B). Then, the next do_syscall()
call will verify that the 'syscall' behavior is the same.
Test them with trivial system calls like __NR_getppid and friends, which
are highly likely to return with SYSRET on an IDT system.
The purposes of this test are:
- Ensure that the syscall behavior is consistent. It must always be
(A) or always be (B). Not a mix of them.
- Ensure that the kernel doesn't leak its internal data when returning
to userspace.
Cc: Xin Li <[email protected]>
Link: https://lore.kernel.org/lkml/[email protected]
Co-developed-by: H. Peter Anvin (Intel) <[email protected]>
Signed-off-by: H. Peter Anvin (Intel) <[email protected]>
Signed-off-by: Ammar Faizi <[email protected]>
---
tools/testing/selftests/x86/sysret_rip.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index 300104900192d396..1531593b50d02150 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -266,8 +266,21 @@ static void test_syscall_fallthrough_to(unsigned long ip)
printf("[OK]\tWe survived\n");
}
+/* See the comment in do_syscall(). */
+static void test_syscall_rcx_r11_consistent(void)
+{
+ do_syscall(__NR_getpid, 0, 0, 0, 0, 0, 0);
+ do_syscall(__NR_gettid, 0, 0, 0, 0, 0, 0);
+ do_syscall(__NR_getppid, 0, 0, 0, 0, 0, 0);
+}
+
int main()
{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ test_syscall_rcx_r11_consistent();
+
/*
* When the kernel returns from a slow-path syscall, it will
* detect whether SYSRET is appropriate. If it incorrectly
@@ -275,7 +288,7 @@ int main()
* it'll crash on Intel CPUs.
*/
sethandler(SIGUSR1, sigusr1, 0);
- for (int i = 47; i < 64; i++)
+ for (i = 47; i < 64; i++)
test_sigreturn_to(1UL<<i);
clearhandler(SIGUSR1);
@@ -286,7 +299,7 @@ int main()
test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
/* These are the interesting cases. */
- for (int i = 47; i < 64; i++) {
+ for (i = 47; i < 64; i++) {
test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
test_syscall_fallthrough_to(1UL<<i);
}
--
Ammar Faizi
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RESEND RFC PATCH v8 3/3] selftests/x86: sysret_rip: Test SYSRET with a signal handler
2023-02-20 3:09 [RESEND RFC PATCH v8 0/3] Intel FRED architecture support for the sysret_rip selftest Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 1/3] selftests/x86: sysret_rip: Handle syscall on the Intel FRED architecture Ammar Faizi
2023-02-20 3:09 ` [RESEND RFC PATCH v8 2/3] selftests/x86: sysret_rip: Add more tests to verify the 'syscall' behavior Ammar Faizi
@ 2023-02-20 3:09 ` Ammar Faizi
2 siblings, 0 replies; 4+ messages in thread
From: Ammar Faizi @ 2023-02-20 3:09 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin, Dave Hansen, Xin Li
Cc: Ammar Faizi, Andrew Cooper, Andy Lutomirski, Borislav Petkov,
Thomas Gleixner, Dave Hansen, Peter Zijlstra, Kirill A. Shutemov,
x86 Mailing List, Linux Kernel Mailing List,
Linux Kselftest Mailing List, GNU/Weeb Mailing List
The current test_sigreturn_to() goes to the slow-path syscall with
IRET due to non-canonical addresses. It uses the SIGUSR1 signal to
perform the test.
Add a similar test that goes to the SYSRET path instead of IRET using
the SIGUSR2 signal. There are two cases:
A) 'syscall' in a FRED system preserves %rcx and %r11.
B) 'syscall' in a non-FRED system sets %rcx=%rip and %r11=%rflags.
The __raise(SIGUSR2) call verifies the 'syscall' behavior consistency
when dealing with a signal handler. It must always be (A) or always be
(B). Not a mix of them.
Cc: Xin Li <[email protected]>
Link: https://lore.kernel.org/lkml/[email protected]
Suggested-by: H. Peter Anvin (Intel) <[email protected]>
Signed-off-by: Ammar Faizi <[email protected]>
---
tools/testing/selftests/x86/sysret_rip.c | 30 ++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index 1531593b50d02150..746801675fe77e9c 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -274,6 +274,28 @@ static void test_syscall_rcx_r11_consistent(void)
do_syscall(__NR_getppid, 0, 0, 0, 0, 0, 0);
}
+static unsigned long usr2_rcx;
+static unsigned long usr2_r11;
+
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ usr2_r11 = ctx->uc_mcontext.gregs[REG_R11];
+ usr2_rcx = ctx->uc_mcontext.gregs[REG_RCX];
+
+ check_regs_result(ctx->uc_mcontext.gregs[REG_R11],
+ ctx->uc_mcontext.gregs[REG_RCX],
+ ctx->uc_mcontext.gregs[REG_RBX]);
+}
+
+static void test_sysret_consistent(void)
+{
+ printf("[RUN]\ttest_sysret_consistent\n");
+ __raise(SIGUSR2);
+ printf("[OK]\tRCX = %#lx; R11 = %#lx\n", usr2_rcx, usr2_r11);
+}
+
int main()
{
int i;
@@ -291,6 +313,14 @@ int main()
for (i = 47; i < 64; i++)
test_sigreturn_to(1UL<<i);
+ /*
+ * test_sigreturn_to() above will test the IRET path. Now test
+ * the SYSRET path.
+ */
+ sethandler(SIGUSR2, sigusr2, 0);
+ for (i = 0; i < 32; i++)
+ test_sysret_consistent();
+
clearhandler(SIGUSR1);
sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
--
Ammar Faizi
^ permalink raw reply related [flat|nested] 4+ messages in thread