X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-assem.S;h=fba970ba1a3beb9c5233da8dcd5860e2f8f1b45b;hb=4ba392170e98744f0ef0b8e08a5d42b988f1d0c9;hp=4a5090ce9606c646d58564223dd016b09123da24;hpb=485e3b5fa7cf787bf2e341b746a51c6c41b103c6;p=sbcl.git diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 4a5090c..fba970b 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -51,10 +51,12 @@ #define align_4byte 4 #define align_8byte 8 #define align_16byte 16 +#define align_page 4096 #else #define align_4byte 2 #define align_8byte 3 #define align_16byte 4 +#define align_page 12 #endif /* @@ -71,6 +73,92 @@ #define SIZE(name) #endif +/* Helper macros for access to thread-locals slots for both OS types: + * ------------------------------------------------------------------------ + * + * Windows TEB block + * ================== __________ + * | Win32 %FS base | ----> | | 0 + * ================== | | 1 + * z z + * TLS slots start here> |XXXXXXXX| e10 = TEB_STATIC_TLS_SLOTS_OFFSET + * |XXXXXXXX| e11 + * z ... z + * |XXXXXXXX| e4e + * TLS ends here> ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63 + * / z z + * | ---------- "os_address" ----. + * | | + * | big blob of SBCL-specific thread-local data | + * | |----------------------------------------| <--' + * | | CONTROL, BINDING, ALIEN STACK | + * | z z + * ================== | |----------------------------------------| + * | Linux %FS base | -->| | FFI stack pointer | + * ================== | | (extra page for mprotect) | + * \ |----------------------------------------| + * (union p_t_d) -----> \-> | struct thread { | dynamic_values[0] | + * . | ... | [1] | + * . z ... z ... z + * [tls data begins] | } | ... | <- + * [declared end of p_t_d] |----------------------------------------| . | + * . | ... | . | + * . | [TLS_SIZE-1] | <-| + * [tls data actually ends] |----------------------------------------| | + * . | ALTSTACK | | + * . |----------------------------------------| | + * . | struct nonpointer_thread_data { } | | + * . ------------------------------------------ | + * [blob actually ends] | + * / + * / + * / + * ______________________ / + * | struct symbol { | / + * z ... z / + * | fixnum tls_index; // fixnum value relative to union / + * | } | (< TLS_SIZE = 4096) + * ---------------------| + */ +#ifdef LISP_FEATURE_WIN32 +# define TEB_STATIC_TLS_SLOTS_OFFSET 0xE10 +# define TEB_SBCL_THREAD_BASE_OFFSET (TEB_STATIC_TLS_SLOTS_OFFSET+(63*4)) +# define SBCL_THREAD_BASE_EA %fs:TEB_SBCL_THREAD_BASE_OFFSET +# define MAYBE_FS(addr) addr +# define LoadTlSymbolValueAddress(symbol,reg) ; \ + movl SBCL_THREAD_BASE_EA, reg ; \ + addl (symbol+SYMBOL_TLS_INDEX_OFFSET), reg ; +# define LoadCurrentThreadSlot(offset,reg); \ + movl SBCL_THREAD_BASE_EA, reg ; \ + movl offset(reg), reg ; +#elif defined(LISP_FEATURE_LINUX) || defined(LISP_FEATURE_SUNOS) || defined(LISP_FEATURE_FREEBSD) + /* see comment in arch_os_thread_init */ +# define SBCL_THREAD_BASE_EA %fs:THREAD_SELFPTR_OFFSET +# define MAYBE_FS(addr) addr +#else + /* perhaps there's an OS out there that actually supports %fs without + * jumping through hoops, so just in case, here a default definition: */ +# define SBCL_THREAD_BASE_EA $0 +# define MAYBE_FS(addr) %fs:addr +#endif + +/* gas can't parse 4096LU; redefine */ +#if BACKEND_PAGE_BYTES == 4096 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 4096 +#elif BACKEND_PAGE_BYTES == 32768 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 32768 +#else +# error BACKEND_PAGE_BYTES mismatch +#endif + +/* OAOOM because we don't have the C headers here */ +#define THREAD_CSP_PAGE_SIZE BACKEND_PAGE_BYTES + +/* the CSP page sits right before the thread */ +#define THREAD_SAVED_CSP_OFFSET (-THREAD_CSP_PAGE_SIZE) + /* * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have @@ -79,7 +167,7 @@ * for this instruction in the SIGILL handler and if we see it, we * advance the EIP by two bytes to skip over ud2 instruction and * call sigtrap_handler. */ -#if defined(LISP_FEATURE_DARWIN) +#if defined(LISP_FEATURE_UD2_BREAKPOINTS) #define END() #define TRAP ud2 #else @@ -104,6 +192,58 @@ * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp, * so if you tweak this, change that too! */ +/* + * Note on sections specific to LISP_FEATURE_SB_SAFEPOINT: + * + * The code below is essential to safepoint-based garbage collection, + * and several details need to be considered for correct implementation. + * + * The stack spilling approach: + * On SB-SAFEPOINT platforms, the CALL-OUT vop is defined to spill all + * live Lisp TNs to the stack to provide information for conservative + * GC cooperatively (avoiding the need to retrieve register values + * from POSIX signal contexts or Windows GetThreadContext()). + * + * Finding the SP at all: + * The main remaining value needed by GC is the stack pointer (SP) at + * the moment of entering the foreign function. For this purpose, a + * thread-local field for the SP is used. Two stores to that field + * are done for each C call, one to save the SP before calling out and + * and one to undo that store afterwards. + * + * Stores as synchronization points: + * These two stores delimit the C call: While the SP is set, our + * thread is known not to run Lisp code: During GC, memory protection + * ensures that no thread proceeds across stores. + * + * The return PC issue: + * (Note that CALL-OUT has, in principle, two versions: Inline + * assembly in the VOP -or- alternatively the out-of-line version you + * are currently reading. In reality, safepoint builds currently + * lack the inline code entirely.) + * + * Both versions need to take special care with the return PC: + * - In the inline version of the code (if it existed), the two stores + * would be done directly in the CALL-OUT vop. In that theoretical + * implementation, there is a time interval between return of the + * actual C call and a second SP store during which the return + * address might not be on the stack anymore. + * - In this out-of-line version, the stores are done during + * call_into_c's frame, but an equivalent problem arises: In order + * to present the stack of arguments as our foreign function expects + * them, call_into_c has to pop the Lisp return address into a + * register first; this register has to be preserved by GENCGC + * separately: our return address is not in the stack anymore. + * In both case, stack scanning alone is not sufficient to pin + * the return address, and we communicate it to GC explicitly + * in addition to the SP. + * + * Note on look-alike accessor macros with vastly different behaviour: + * THREAD_PC_AROUND_FOREIGN_CALL_OFFSET is an "ordinary" field of the + * struct thread, whereas THREAD_SAVED_CSP_OFFSET is a synchronization + * point on a potentially write-protected page. +*/ + .text .align align_16byte,0x90 .globl GNAME(call_into_c) @@ -113,6 +253,11 @@ GNAME(call_into_c): popl %ebx /* Setup the NPX for C */ + /* The VOP says regarding CLD: "Clear out DF: Darwin, Windows, + * and Solaris at least require this, and it should not hurt + * others either." call_into_c didn't have it, but better safe than + * sorry. */ + cld fstp %st(0) fstp %st(0) fstp %st(0) @@ -122,8 +267,25 @@ GNAME(call_into_c): fstp %st(0) fstp %st(0) +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* enter safe region: store SP and return PC */ + movl SBCL_THREAD_BASE_EA,%edi + movl %esp,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ebx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) +#endif + + /* foreign call, preserving ESI, EDI, and EBX */ call *%eax # normal callout using Lisp stack - movl %eax,%ecx # remember integer return value + /* return values now in eax/edx OR st(0) */ + +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* leave region: clear the SP! (Also unpin the return PC.) */ + xorl %ecx,%ecx + movl %ecx,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ecx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) +#endif + + movl %eax,%ecx # remember integer return value /* Check for a return FP value. */ fxam @@ -163,7 +325,7 @@ Lfp_rtn_value: /* We don't need to restore eax, because the result is in st(0). */ -/* Return. FIXME: It would be nice to restructure this to use RET. */ +/* Return. FIXME: It would be nice to restructure this to use RET. */ jmp *%ebx SIZE(GNAME(call_into_c)) @@ -173,8 +335,7 @@ Lfp_rtn_value: .globl GNAME(call_into_lisp_first_time) TYPE(GNAME(call_into_lisp_first_time)) -/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when - * the stack changes. We don't worry too much about saving registers +/* We don't worry too much about saving registers * here, because we never expect to return from the initial call to lisp * anyway */ @@ -183,7 +344,6 @@ GNAME(call_into_lisp_first_time): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. #ifndef LISP_FEATURE_WIN32 - movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET movl GNAME(all_threads),%eax /* pthread machinery takes care of this for other threads */ movl THREAD_CONTROL_STACK_END_OFFSET(%eax) ,%esp @@ -204,6 +364,7 @@ GNAME(call_into_lisp_first_time): GNAME(call_into_lisp): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. + Lstack: /* Save the NPX state */ fwait # Catch any pending NPX exceptions. @@ -263,8 +424,13 @@ Ldone: #ifdef LISP_FEATURE_WIN32 /* Establish an SEH frame. */ #ifdef LISP_FEATURE_SB_THREAD - /* FIXME: need to save BSP here. */ -#error "need to save BSP here, but don't know how yet." + /* Save binding stack pointer */ + subl $4, %esp + pushl %eax + movl SBCL_THREAD_BASE_EA, %eax + movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax + movl %eax, 4(%esp) + popl %eax #else pushl BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET #endif @@ -274,10 +440,10 @@ Ldone: #endif /* Alloc new frame. */ - mov %esp,%ebx # The current sp marks start of new frame. - push %ebp # fp in save location S0 - sub $8,%esp # Ensure 3 slots are allocated, one above. - mov %ebx,%ebp # Switch to new frame. + push %ebp # Dummy for return address + push %ebp # fp in save location S1 + mov %esp,%ebp # The current sp marks start of new frame. + sub $4,%esp # Ensure 3 slots are allocated, two above. call *CLOSURE_FUN_OFFSET(%eax) @@ -290,6 +456,7 @@ LsingleValue: #ifdef LISP_FEATURE_WIN32 /* Remove our SEH frame. */ + mov %fs:0,%esp popl %fs:0 add $8, %esp #endif @@ -335,11 +502,12 @@ GNAME(fpu_restore): * the undefined-function trampoline */ .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(undefined_tramp) TYPE(GNAME(undefined_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(undefined_tramp): + pop 4(%ebp) # Save return PC for backtrace. TRAP .byte trap_Error .byte 2 @@ -348,11 +516,15 @@ GNAME(undefined_tramp): ret SIZE(GNAME(undefined_tramp)) +/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs + * to know the name of the function immediately following the + * undefined-function trampoline. */ + /* * the closure trampoline */ .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(closure_tramp) TYPE(GNAME(closure_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG @@ -368,7 +540,7 @@ GNAME(closure_tramp): SIZE(GNAME(closure_tramp)) .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(funcallable_instance_tramp) TYPE(GNAME(funcallable_instance_tramp)) GNAME(funcallable_instance_tramp): @@ -381,9 +553,15 @@ GNAME(funcallable_instance_tramp): /* * fun-end breakpoint magic */ + +/* + * For an explanation of the magic involved in function-end + * breakpoints, see the implementation in ppc-assem.S. + */ + .text .globl GNAME(fun_end_breakpoint_guts) - .align align_4byte + .align align_16byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ jc multiple_value_return @@ -410,7 +588,7 @@ GNAME(fun_end_breakpoint_end): .globl GNAME(do_pending_interrupt) TYPE(GNAME(do_pending_interrupt)) - .align align_4byte,0x90 + .align align_16byte,0x90 GNAME(do_pending_interrupt): TRAP .byte trap_PendingInterrupt @@ -451,7 +629,7 @@ GNAME(do_pending_interrupt): #define DEFINE_ALLOC_TO_EAX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %ecx; /* Save ECX and EDX */ \ pushl %edx; \ @@ -464,7 +642,7 @@ GNAME(name): \ #define DEFINE_ALLOC_TO_ECX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX and EDX */ \ pushl %edx; \ @@ -478,7 +656,7 @@ GNAME(name): \ #define DEFINE_ALLOC_TO_EDX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX and ECX */ \ pushl %ecx; \ @@ -492,7 +670,7 @@ GNAME(name): \ #define DEFINE_ALLOC_TO_REG(name,reg,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX, ECX, and EDX */ \ pushl %ecx; \ @@ -540,45 +718,53 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16) #define START_REGION GNAME(boxed_region) #endif -#define ALLOC_OVERFLOW(size) \ - /* Calculate the size for the allocation. */ \ - subl START_REGION,size; \ - ALLOC(size) +#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32) +#define ALLOC_OVERFLOW(size,scratch) \ + movl SBCL_THREAD_BASE_EA, scratch; \ + /* Calculate the size for the allocation. */ \ + subl THREAD_ALLOC_REGION_OFFSET(scratch),size; \ + ALLOC(size) +#else +#define ALLOC_OVERFLOW(size,scratch) \ + /* Calculate the size for the allocation. */ \ + subl START_REGION,size; \ + ALLOC(size) +#endif /* This routine handles an overflow with eax=crfp+size. So the size=eax-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_eax) TYPE(GNAME(alloc_overflow_eax)) GNAME(alloc_overflow_eax): pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%eax) + ALLOC_OVERFLOW(%eax,%edx) popl %edx # Restore edx. popl %ecx # Restore ecx. ret SIZE(GNAME(alloc_overflow_eax)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ecx) TYPE(GNAME(alloc_overflow_ecx)) GNAME(alloc_overflow_ecx): pushl %eax # Save eax pushl %edx # Save edx - ALLOC_OVERFLOW(%ecx) + ALLOC_OVERFLOW(%ecx,%edx) movl %eax,%ecx # setup the destination. popl %edx # Restore edx. popl %eax # Restore eax. ret SIZE(GNAME(alloc_overflow_ecx)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edx) TYPE(GNAME(alloc_overflow_edx)) GNAME(alloc_overflow_edx): pushl %eax # Save eax pushl %ecx # Save ecx - ALLOC_OVERFLOW(%edx) + ALLOC_OVERFLOW(%edx,%ecx) movl %eax,%edx # setup the destination. popl %ecx # Restore ecx. popl %eax # Restore eax. @@ -587,14 +773,14 @@ GNAME(alloc_overflow_edx): /* This routine handles an overflow with ebx=crfp+size. So the size=ebx-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ebx) TYPE(GNAME(alloc_overflow_ebx)) GNAME(alloc_overflow_ebx): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%ebx) + ALLOC_OVERFLOW(%ebx,%edx) movl %eax,%ebx # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -604,14 +790,14 @@ GNAME(alloc_overflow_ebx): /* This routine handles an overflow with esi=crfp+size. So the size=esi-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_esi) TYPE(GNAME(alloc_overflow_esi)) GNAME(alloc_overflow_esi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%esi) + ALLOC_OVERFLOW(%esi,%edx) movl %eax,%esi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -619,14 +805,14 @@ GNAME(alloc_overflow_esi): ret SIZE(GNAME(alloc_overflow_esi)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edi) TYPE(GNAME(alloc_overflow_edi)) GNAME(alloc_overflow_edi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%edi) + ALLOC_OVERFLOW(%edi,%edx) movl %eax,%edi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -642,7 +828,7 @@ GNAME(alloc_overflow_edi): * and EIP from the exception context and use it to fake * up a stack frame which will skip over the system SEH * code. */ - .align align_4byte + .align align_16byte .globl GNAME(exception_handler_wrapper) TYPE(GNAME(exception_handler_wrapper)) GNAME(exception_handler_wrapper): @@ -673,7 +859,7 @@ GNAME(exception_handler_wrapper): #endif #ifdef LISP_FEATURE_DARWIN - .align align_4byte + .align align_16byte .globl GNAME(call_into_lisp_tramp) TYPE(GNAME(call_into_lisp_tramp)) GNAME(call_into_lisp_tramp): @@ -722,7 +908,7 @@ GNAME(call_into_lisp_tramp): SIZE(call_into_lisp_tramp) #endif - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(post_signal_tramp) TYPE(GNAME(post_signal_tramp)) GNAME(post_signal_tramp): @@ -747,7 +933,7 @@ GNAME(post_signal_tramp): .globl GNAME(fast_bzero_pointer) .data - .align align_4byte + .align align_16byte GNAME(fast_bzero_pointer): /* Variable containing a pointer to the bzero function to use. * Initially points to a basic function. Change this variable @@ -755,7 +941,7 @@ GNAME(fast_bzero_pointer): .long GNAME(fast_bzero_base) .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero) TYPE(GNAME(fast_bzero)) GNAME(fast_bzero): @@ -765,7 +951,7 @@ GNAME(fast_bzero): .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_detect) TYPE(GNAME(fast_bzero_detect)) GNAME(fast_bzero_detect): @@ -803,7 +989,7 @@ Lrestore: .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_sse) TYPE(GNAME(fast_bzero_sse)) @@ -849,7 +1035,7 @@ Lend_sse: .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_base) TYPE(GNAME(fast_bzero_base)) @@ -876,6 +1062,81 @@ Lend_base: pop %eax ret SIZE(GNAME(fast_bzero_base)) - - + + +/* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub + * the control stack from C, largely due to not knowing where the + * active stack frame ends. On such platforms, we reimplement the + * core scrubbing logic in assembly, in this case here: + */ + .text + .align align_16byte,0x90 + .globl GNAME(arch_scrub_control_stack) + TYPE(GNAME(arch_scrub_control_stack)) +GNAME(arch_scrub_control_stack): + /* We are passed three parameters: + * A (struct thread *) at [ESP+4], + * the address of the guard page at [ESP+8], and + * the address of the hard guard page at [ESP+12]. + * We may trash EAX, ECX, and EDX with impunity. + * [ESP] is our return address, [ESP-4] is the first + * stack slot to scrub. */ + + /* We start by setting up our scrub pointer in EAX, our + * guard page upper bound in ECX, and our hard guard + * page upper bound in EDX. */ + lea -4(%esp), %eax + mov GNAME(os_vm_page_size),%edx + mov %edx, %ecx + add 8(%esp), %ecx + add 12(%esp), %edx + + /* We need to do a memory operation relative to the + * thread pointer, so put it in %ecx and our guard + * page upper bound in 4(%esp). */ + xchg 4(%esp), %ecx + + /* Now we begin our main scrub loop. */ +ascs_outer_loop: + + /* If we're about to scrub the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_check_guard_page + cmp 12(%esp), %eax + ja ascs_finished + +ascs_check_guard_page: + /* If we're about to scrub the guard page, and the guard + * page is protected, exit. */ + cmp 4(%esp), %eax + jae ascs_clear_loop + cmp 8(%esp), %eax + jbe ascs_clear_loop + cmpl $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%ecx) + jne ascs_finished + + /* Clear memory backwards to the start of the (4KiB) page */ +ascs_clear_loop: + movl $0, (%eax) + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_clear_loop + + /* If we're about to hit the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_finished + + /* If the next (previous?) 4KiB page contains a non-zero + * word, continue scrubbing. */ +ascs_check_loop: + testl $-1, (%eax) + jnz ascs_outer_loop + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_check_loop + +ascs_finished: + ret + SIZE(GNAME(arch_scrub_control_stack)) + END()