From 4b420f6e71f4242cb43adbd44b025eb4c70aa20c Mon Sep 17 00:00:00 2001 From: Christian Prochaska Date: Fri, 31 Jan 2014 16:50:08 +0100 Subject: [PATCH] Fix stack pointer alignment for x86_64 platforms The x86_64 ABI requires the stack pointer to be 16-byte aligned before the call of a function and decreased by 8 at the function entrypoint (after the return address has been pushed to the stack). Currently, when a new Genode thread gets created, the initial stack pointer is aligned to 16 byte. On Genode/Linux, the thread entry function is entered by a 'call' instruction, so the stack pointer alignment at the function entrypoint is correct. On Fiasco.OC and NOVA, however, the thread entry function gets executed without a return address being pushed to the stack, so at the function entrypoint the stack pointer is still aligned to 16 byte, which can cause problems with compiler-generated SSE instructions. With this patch, the stack pointer given to a new thread gets aligned to 16 bytes and decreased by 8 by default, since most of the currently supported base platforms execute the thread entry function without pushing a return address to the stack. For base-linux, the stack pointer gets realigned to 16 bytes before the thread entry function gets called. Fixes #1043. --- base-foc/src/base/thread/thread.cc | 6 ++++++ base-linux/src/base/thread/thread_linux.cc | 4 +--- base-linux/src/platform/x86_32/lx_clone.S | 3 +-- base-linux/src/platform/x86_64/lx_clone.S | 5 ++++- base/include/base/thread.h | 11 ++++++++--- base/src/base/thread/thread.cc | 6 ++++++ 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/base-foc/src/base/thread/thread.cc b/base-foc/src/base/thread/thread.cc index e9ceace89..f569c6fc6 100644 --- a/base-foc/src/base/thread/thread.cc +++ b/base-foc/src/base/thread/thread.cc @@ -153,6 +153,12 @@ Thread_base::Context *Thread_base::_alloc_context(size_t stack_size) context->stack_base = ds_addr; context->ds_cap = ds_cap; + /* + * The value at the top of the stack might get interpreted as return + * address of the thread start function by GDB, so we set it to 0. + */ + *(addr_t*)context->stack_top() = 0; + return context; } diff --git a/base-linux/src/base/thread/thread_linux.cc b/base-linux/src/base/thread/thread_linux.cc index d28cd76b1..94046d1e1 100644 --- a/base-linux/src/base/thread/thread_linux.cc +++ b/base-linux/src/base/thread/thread_linux.cc @@ -122,9 +122,7 @@ void Thread_base::start() threadlib_initialized = true; } - /* align initial stack to 16 byte boundary */ - void *thread_sp = (void *)((addr_t)(stack_top()) & ~0xf); - _tid.tid = lx_create_thread(Thread_base::_thread_start, thread_sp, this); + _tid.tid = lx_create_thread(Thread_base::_thread_start, stack_top(), this); _tid.pid = lx_getpid(); /* wait until the 'thread_start' function got entered */ diff --git a/base-linux/src/platform/x86_32/lx_clone.S b/base-linux/src/platform/x86_32/lx_clone.S index cc290f656..e162e718b 100644 --- a/base-linux/src/platform/x86_32/lx_clone.S +++ b/base-linux/src/platform/x86_32/lx_clone.S @@ -83,8 +83,7 @@ L(thread_start): .cfi_startproc; /* Clearing frame pointer is insufficient, use CFI. */ .cfi_undefined %eip; - /* Note: %esi is zero. */ - movl %esi,%ebp /* terminate the stack frame */ + xorl %ebp,%ebp /* terminate the stack frame */ call *%ebx #ifdef PIC call L(here) diff --git a/base-linux/src/platform/x86_64/lx_clone.S b/base-linux/src/platform/x86_64/lx_clone.S index 8edeea931..35f81b2d9 100644 --- a/base-linux/src/platform/x86_64/lx_clone.S +++ b/base-linux/src/platform/x86_64/lx_clone.S @@ -17,6 +17,9 @@ lx_clone: .cfi_startproc + /* Align the new stack pointer to 16 bytes. */ + andq $0xfffffffffffffff0, %rsi + /* Insert the argument onto the new stack. */ subq $16,%rsi movq %rcx,8(%rsi) @@ -51,7 +54,7 @@ L(thread_start): /* Clear the frame pointer. The ABI suggests this be done, to mark the outermost frame obviously. */ - xorl %ebp, %ebp + xorq %rbp, %rbp /* Set up arguments for the function call. */ popq %rax /* Function to call. */ diff --git a/base/include/base/thread.h b/base/include/base/thread.h index 3dd4f3839..47db0df41 100644 --- a/base/include/base/thread.h +++ b/base/include/base/thread.h @@ -100,11 +100,16 @@ namespace Genode { public: /** - * Top of stack aligned to 16 byte + * Top of stack * - * The alignment is also sufficient for the AMD64 ABI. + * The alignment matches an initial stack frame, which is + * sufficient for the AMD64 ABI (stack top + 8 is 16-byte + * aligned). */ - addr_t stack_top() const { return (addr_t)_stack & ~0xf; } + addr_t stack_top() const + { + return ((addr_t)_stack & ~0xf) - sizeof(addr_t); + } /** * Virtual address of the start of the stack diff --git a/base/src/base/thread/thread.cc b/base/src/base/thread/thread.cc index 868b339d2..8c9092689 100644 --- a/base/src/base/thread/thread.cc +++ b/base/src/base/thread/thread.cc @@ -153,6 +153,12 @@ Thread_base::Context *Thread_base::_alloc_context(size_t stack_size) context->stack_base = ds_addr; context->ds_cap = ds_cap; + /* + * The value at the top of the stack might get interpreted as return + * address of the thread start function by GDB, so we set it to 0. + */ + *(addr_t*)context->stack_top() = 0; + return context; }