X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-assem.S;h=fba970ba1a3beb9c5233da8dcd5860e2f8f1b45b;hb=c8617f57d0413beb2890e94dabe227cef9c5ddad;hp=f8bf28e73ce3007ffd82ed11c4a01a5b127d533e;hpb=e4fab3873443ccb15db76b05f9be7eddc60fe37d;p=sbcl.git diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index f8bf28e..fba970b 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -51,10 +51,12 @@ #define align_4byte 4 #define align_8byte 8 #define align_16byte 16 +#define align_page 4096 #else #define align_4byte 2 #define align_8byte 3 #define align_16byte 4 +#define align_page 12 #endif /* @@ -71,6 +73,92 @@ #define SIZE(name) #endif +/* Helper macros for access to thread-locals slots for both OS types: + * ------------------------------------------------------------------------ + * + * Windows TEB block + * ================== __________ + * | Win32 %FS base | ----> | | 0 + * ================== | | 1 + * z z + * TLS slots start here> |XXXXXXXX| e10 = TEB_STATIC_TLS_SLOTS_OFFSET + * |XXXXXXXX| e11 + * z ... z + * |XXXXXXXX| e4e + * TLS ends here> ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63 + * / z z + * | ---------- "os_address" ----. + * | | + * | big blob of SBCL-specific thread-local data | + * | |----------------------------------------| <--' + * | | CONTROL, BINDING, ALIEN STACK | + * | z z + * ================== | |----------------------------------------| + * | Linux %FS base | -->| | FFI stack pointer | + * ================== | | (extra page for mprotect) | + * \ |----------------------------------------| + * (union p_t_d) -----> \-> | struct thread { | dynamic_values[0] | + * . | ... | [1] | + * . z ... z ... z + * [tls data begins] | } | ... | <- + * [declared end of p_t_d] |----------------------------------------| . | + * . | ... | . | + * . | [TLS_SIZE-1] | <-| + * [tls data actually ends] |----------------------------------------| | + * . | ALTSTACK | | + * . |----------------------------------------| | + * . | struct nonpointer_thread_data { } | | + * . ------------------------------------------ | + * [blob actually ends] | + * / + * / + * / + * ______________________ / + * | struct symbol { | / + * z ... z / + * | fixnum tls_index; // fixnum value relative to union / + * | } | (< TLS_SIZE = 4096) + * ---------------------| + */ +#ifdef LISP_FEATURE_WIN32 +# define TEB_STATIC_TLS_SLOTS_OFFSET 0xE10 +# define TEB_SBCL_THREAD_BASE_OFFSET (TEB_STATIC_TLS_SLOTS_OFFSET+(63*4)) +# define SBCL_THREAD_BASE_EA %fs:TEB_SBCL_THREAD_BASE_OFFSET +# define MAYBE_FS(addr) addr +# define LoadTlSymbolValueAddress(symbol,reg) ; \ + movl SBCL_THREAD_BASE_EA, reg ; \ + addl (symbol+SYMBOL_TLS_INDEX_OFFSET), reg ; +# define LoadCurrentThreadSlot(offset,reg); \ + movl SBCL_THREAD_BASE_EA, reg ; \ + movl offset(reg), reg ; +#elif defined(LISP_FEATURE_LINUX) || defined(LISP_FEATURE_SUNOS) || defined(LISP_FEATURE_FREEBSD) + /* see comment in arch_os_thread_init */ +# define SBCL_THREAD_BASE_EA %fs:THREAD_SELFPTR_OFFSET +# define MAYBE_FS(addr) addr +#else + /* perhaps there's an OS out there that actually supports %fs without + * jumping through hoops, so just in case, here a default definition: */ +# define SBCL_THREAD_BASE_EA $0 +# define MAYBE_FS(addr) %fs:addr +#endif + +/* gas can't parse 4096LU; redefine */ +#if BACKEND_PAGE_BYTES == 4096 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 4096 +#elif BACKEND_PAGE_BYTES == 32768 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 32768 +#else +# error BACKEND_PAGE_BYTES mismatch +#endif + +/* OAOOM because we don't have the C headers here */ +#define THREAD_CSP_PAGE_SIZE BACKEND_PAGE_BYTES + +/* the CSP page sits right before the thread */ +#define THREAD_SAVED_CSP_OFFSET (-THREAD_CSP_PAGE_SIZE) + /* * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have @@ -79,7 +167,7 @@ * for this instruction in the SIGILL handler and if we see it, we * advance the EIP by two bytes to skip over ud2 instruction and * call sigtrap_handler. */ -#if defined(LISP_FEATURE_DARWIN) +#if defined(LISP_FEATURE_UD2_BREAKPOINTS) #define END() #define TRAP ud2 #else @@ -104,6 +192,58 @@ * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp, * so if you tweak this, change that too! */ +/* + * Note on sections specific to LISP_FEATURE_SB_SAFEPOINT: + * + * The code below is essential to safepoint-based garbage collection, + * and several details need to be considered for correct implementation. + * + * The stack spilling approach: + * On SB-SAFEPOINT platforms, the CALL-OUT vop is defined to spill all + * live Lisp TNs to the stack to provide information for conservative + * GC cooperatively (avoiding the need to retrieve register values + * from POSIX signal contexts or Windows GetThreadContext()). + * + * Finding the SP at all: + * The main remaining value needed by GC is the stack pointer (SP) at + * the moment of entering the foreign function. For this purpose, a + * thread-local field for the SP is used. Two stores to that field + * are done for each C call, one to save the SP before calling out and + * and one to undo that store afterwards. + * + * Stores as synchronization points: + * These two stores delimit the C call: While the SP is set, our + * thread is known not to run Lisp code: During GC, memory protection + * ensures that no thread proceeds across stores. + * + * The return PC issue: + * (Note that CALL-OUT has, in principle, two versions: Inline + * assembly in the VOP -or- alternatively the out-of-line version you + * are currently reading. In reality, safepoint builds currently + * lack the inline code entirely.) + * + * Both versions need to take special care with the return PC: + * - In the inline version of the code (if it existed), the two stores + * would be done directly in the CALL-OUT vop. In that theoretical + * implementation, there is a time interval between return of the + * actual C call and a second SP store during which the return + * address might not be on the stack anymore. + * - In this out-of-line version, the stores are done during + * call_into_c's frame, but an equivalent problem arises: In order + * to present the stack of arguments as our foreign function expects + * them, call_into_c has to pop the Lisp return address into a + * register first; this register has to be preserved by GENCGC + * separately: our return address is not in the stack anymore. + * In both case, stack scanning alone is not sufficient to pin + * the return address, and we communicate it to GC explicitly + * in addition to the SP. + * + * Note on look-alike accessor macros with vastly different behaviour: + * THREAD_PC_AROUND_FOREIGN_CALL_OFFSET is an "ordinary" field of the + * struct thread, whereas THREAD_SAVED_CSP_OFFSET is a synchronization + * point on a potentially write-protected page. +*/ + .text .align align_16byte,0x90 .globl GNAME(call_into_c) @@ -113,6 +253,11 @@ GNAME(call_into_c): popl %ebx /* Setup the NPX for C */ + /* The VOP says regarding CLD: "Clear out DF: Darwin, Windows, + * and Solaris at least require this, and it should not hurt + * others either." call_into_c didn't have it, but better safe than + * sorry. */ + cld fstp %st(0) fstp %st(0) fstp %st(0) @@ -122,15 +267,29 @@ GNAME(call_into_c): fstp %st(0) fstp %st(0) - cld # clear out DF: Darwin, Solaris and Win32 at - # least need this, and it should not hurt others - +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* enter safe region: store SP and return PC */ + movl SBCL_THREAD_BASE_EA,%edi + movl %esp,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ebx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) +#endif + + /* foreign call, preserving ESI, EDI, and EBX */ call *%eax # normal callout using Lisp stack - movl %eax,%ecx # remember integer return value + /* return values now in eax/edx OR st(0) */ + +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* leave region: clear the SP! (Also unpin the return PC.) */ + xorl %ecx,%ecx + movl %ecx,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ecx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) +#endif + + movl %eax,%ecx # remember integer return value /* Check for a return FP value. */ fxam - fnstsw %eax + fnstsw %ax andl $0x4500,%eax cmpl $0x4100,%eax jne Lfp_rtn_value @@ -166,7 +325,7 @@ Lfp_rtn_value: /* We don't need to restore eax, because the result is in st(0). */ -/* Return. FIXME: It would be nice to restructure this to use RET. */ +/* Return. FIXME: It would be nice to restructure this to use RET. */ jmp *%ebx SIZE(GNAME(call_into_c)) @@ -176,8 +335,7 @@ Lfp_rtn_value: .globl GNAME(call_into_lisp_first_time) TYPE(GNAME(call_into_lisp_first_time)) -/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when - * the stack changes. We don't worry too much about saving registers +/* We don't worry too much about saving registers * here, because we never expect to return from the initial call to lisp * anyway */ @@ -186,12 +344,9 @@ GNAME(call_into_lisp_first_time): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. #ifndef LISP_FEATURE_WIN32 - movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET movl GNAME(all_threads),%eax - movl THREAD_CONTROL_STACK_START_OFFSET(%eax) ,%esp - /* don't think too hard about what happens if we get interrupted - * here */ - addl $(THREAD_CONTROL_STACK_SIZE),%esp + /* pthread machinery takes care of this for other threads */ + movl THREAD_CONTROL_STACK_END_OFFSET(%eax) ,%esp #else /* Win32 -really- doesn't like you switching stacks out from under it. */ movl GNAME(all_threads),%eax @@ -209,6 +364,7 @@ GNAME(call_into_lisp_first_time): GNAME(call_into_lisp): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. + Lstack: /* Save the NPX state */ fwait # Catch any pending NPX exceptions. @@ -268,8 +424,13 @@ Ldone: #ifdef LISP_FEATURE_WIN32 /* Establish an SEH frame. */ #ifdef LISP_FEATURE_SB_THREAD - /* FIXME: need to save BSP here. */ -#error "need to save BSP here, but don't know how yet." + /* Save binding stack pointer */ + subl $4, %esp + pushl %eax + movl SBCL_THREAD_BASE_EA, %eax + movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax + movl %eax, 4(%esp) + popl %eax #else pushl BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET #endif @@ -279,10 +440,10 @@ Ldone: #endif /* Alloc new frame. */ - mov %esp,%ebx # The current sp marks start of new frame. - push %ebp # fp in save location S0 - sub $8,%esp # Ensure 3 slots are allocated, one above. - mov %ebx,%ebp # Switch to new frame. + push %ebp # Dummy for return address + push %ebp # fp in save location S1 + mov %esp,%ebp # The current sp marks start of new frame. + sub $4,%esp # Ensure 3 slots are allocated, two above. call *CLOSURE_FUN_OFFSET(%eax) @@ -295,6 +456,7 @@ LsingleValue: #ifdef LISP_FEATURE_WIN32 /* Remove our SEH frame. */ + mov %fs:0,%esp popl %fs:0 add $8, %esp #endif @@ -340,11 +502,12 @@ GNAME(fpu_restore): * the undefined-function trampoline */ .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(undefined_tramp) TYPE(GNAME(undefined_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(undefined_tramp): + pop 4(%ebp) # Save return PC for backtrace. TRAP .byte trap_Error .byte 2 @@ -353,11 +516,15 @@ GNAME(undefined_tramp): ret SIZE(GNAME(undefined_tramp)) +/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs + * to know the name of the function immediately following the + * undefined-function trampoline. */ + /* * the closure trampoline */ .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(closure_tramp) TYPE(GNAME(closure_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG @@ -373,7 +540,7 @@ GNAME(closure_tramp): SIZE(GNAME(closure_tramp)) .text - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(funcallable_instance_tramp) TYPE(GNAME(funcallable_instance_tramp)) GNAME(funcallable_instance_tramp): @@ -386,9 +553,15 @@ GNAME(funcallable_instance_tramp): /* * fun-end breakpoint magic */ + +/* + * For an explanation of the magic involved in function-end + * breakpoints, see the implementation in ppc-assem.S. + */ + .text .globl GNAME(fun_end_breakpoint_guts) - .align align_4byte + .align align_16byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ jc multiple_value_return @@ -415,7 +588,7 @@ GNAME(fun_end_breakpoint_end): .globl GNAME(do_pending_interrupt) TYPE(GNAME(do_pending_interrupt)) - .align align_4byte,0x90 + .align align_16byte,0x90 GNAME(do_pending_interrupt): TRAP .byte trap_PendingInterrupt @@ -432,27 +605,23 @@ GNAME(do_pending_interrupt): * So only eax, ecx, and edx need special care here. * * ALLOC factors out the logic of calling alloc(): stack alignment, etc. - * The TMP argument must be a preserved register different from the the - * SIZE argument (if it is a register.) * * DEFINE_ALLOC_TO_FOO defines an alloction routine. */ #ifdef LISP_FEATURE_DARWIN -#define ALLOC(size,tmp) \ - pushl tmp; /* Save TMP */ \ - movl %esp,tmp; /* Save ESP to TMP */ \ - andl $0xfffffff0,%esp; /* Align stack */ \ - pushl $0; /* Padding */ \ - pushl size; /* Argument to alloc */ \ - cld; /* Clear DF */ \ +#define ALLOC(size) \ + pushl %ebp; /* Save EBP */ \ + movl %esp,%ebp; /* Save ESP to EBP */ \ + pushl $0; /* Reserve space for arg */ \ + andl $0xfffffff0,%esp; /* Align stack to 16bytes */ \ + movl size, (%esp); /* Argument to alloc */ \ call GNAME(alloc); \ - movl tmp,%esp; /* Restore ESP from TMP */ \ - popl tmp; /* Restore TMP */ + movl %ebp,%esp; /* Restore ESP from EBP */ \ + popl %ebp; /* Restore EBP */ #else -#define ALLOC(size,tmp) \ +#define ALLOC(size) \ pushl size; /* Argument to alloc */ \ - cld; /* Clear DF */ \ call GNAME(alloc); \ addl $4,%esp; /* Pop argument */ #endif @@ -460,11 +629,11 @@ GNAME(do_pending_interrupt): #define DEFINE_ALLOC_TO_EAX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %ecx; /* Save ECX and EDX */ \ pushl %edx; \ - ALLOC(size,%esi) \ + ALLOC(size) \ popl %edx; /* Restore ECX and EDX */ \ popl %ecx; \ ret; \ @@ -473,11 +642,11 @@ GNAME(name): \ #define DEFINE_ALLOC_TO_ECX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX and EDX */ \ pushl %edx; \ - ALLOC(size,%esi) \ + ALLOC(size) \ movl %eax,%ecx; /* Result to destination */ \ popl %edx; \ popl %eax; \ @@ -487,26 +656,26 @@ GNAME(name): \ #define DEFINE_ALLOC_TO_EDX(name,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX and ECX */ \ pushl %ecx; \ - ALLOC(size,%edi) \ + ALLOC(size) \ movl %eax,%edx; /* Restore EAX and ECX */ \ popl %ecx; \ popl %eax; \ ret; \ SIZE(GNAME(name)) -#define DEFINE_ALLOC_TO_REG(name,reg,size,tmp) \ +#define DEFINE_ALLOC_TO_REG(name,reg,size) \ .globl GNAME(name); \ TYPE(GNAME(name)); \ - .align align_4byte,0x90; \ + .align align_16byte,0x90; \ GNAME(name): \ pushl %eax; /* Save EAX, ECX, and EDX */ \ pushl %ecx; \ pushl %edx; \ - ALLOC(size,tmp) \ + ALLOC(size) \ movl %eax,reg; /* Restore them */ \ popl %edx; \ popl %ecx; \ @@ -526,17 +695,17 @@ DEFINE_ALLOC_TO_EDX(alloc_to_edx,%edx) DEFINE_ALLOC_TO_EDX(alloc_8_to_edx,$8) DEFINE_ALLOC_TO_EDX(alloc_16_to_edx,$16) -DEFINE_ALLOC_TO_REG(alloc_to_ebx,%ebx,%ebx,%edi) -DEFINE_ALLOC_TO_REG(alloc_8_to_ebx,%ebx,$8,%edi) -DEFINE_ALLOC_TO_REG(alloc_16_to_ebx,%ebx,$16,%esi) +DEFINE_ALLOC_TO_REG(alloc_to_ebx,%ebx,%ebx) +DEFINE_ALLOC_TO_REG(alloc_8_to_ebx,%ebx,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_ebx,%ebx,$16) -DEFINE_ALLOC_TO_REG(alloc_to_esi,%esi,%esi,%edi) -DEFINE_ALLOC_TO_REG(alloc_8_to_esi,%esi,$8,%edi) -DEFINE_ALLOC_TO_REG(alloc_16_to_esi,%esi,$16,%edi) +DEFINE_ALLOC_TO_REG(alloc_to_esi,%esi,%esi) +DEFINE_ALLOC_TO_REG(alloc_8_to_esi,%esi,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_esi,%esi,$16) -DEFINE_ALLOC_TO_REG(alloc_to_edi,%edi,%edi,%esi) -DEFINE_ALLOC_TO_REG(alloc_8_to_edi,%edi,$8,%esi) -DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16,%esi) +DEFINE_ALLOC_TO_REG(alloc_to_edi,%edi,%edi) +DEFINE_ALLOC_TO_REG(alloc_8_to_edi,%edi,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16) /* Called from lisp when an inline allocation overflows. * Every register except the result needs to be preserved. @@ -549,45 +718,53 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16,%esi) #define START_REGION GNAME(boxed_region) #endif -#define ALLOC_OVERFLOW(size,tmp) \ - /* Calculate the size for the allocation. */ \ - subl START_REGION,size; \ - ALLOC(size,tmp) +#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32) +#define ALLOC_OVERFLOW(size,scratch) \ + movl SBCL_THREAD_BASE_EA, scratch; \ + /* Calculate the size for the allocation. */ \ + subl THREAD_ALLOC_REGION_OFFSET(scratch),size; \ + ALLOC(size) +#else +#define ALLOC_OVERFLOW(size,scratch) \ + /* Calculate the size for the allocation. */ \ + subl START_REGION,size; \ + ALLOC(size) +#endif /* This routine handles an overflow with eax=crfp+size. So the size=eax-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_eax) TYPE(GNAME(alloc_overflow_eax)) GNAME(alloc_overflow_eax): pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%eax,%esi) + ALLOC_OVERFLOW(%eax,%edx) popl %edx # Restore edx. popl %ecx # Restore ecx. ret SIZE(GNAME(alloc_overflow_eax)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ecx) TYPE(GNAME(alloc_overflow_ecx)) GNAME(alloc_overflow_ecx): pushl %eax # Save eax pushl %edx # Save edx - ALLOC_OVERFLOW(%ecx,%esi) + ALLOC_OVERFLOW(%ecx,%edx) movl %eax,%ecx # setup the destination. popl %edx # Restore edx. popl %eax # Restore eax. ret SIZE(GNAME(alloc_overflow_ecx)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edx) TYPE(GNAME(alloc_overflow_edx)) GNAME(alloc_overflow_edx): pushl %eax # Save eax pushl %ecx # Save ecx - ALLOC_OVERFLOW(%edx,%esi) + ALLOC_OVERFLOW(%edx,%ecx) movl %eax,%edx # setup the destination. popl %ecx # Restore ecx. popl %eax # Restore eax. @@ -596,14 +773,14 @@ GNAME(alloc_overflow_edx): /* This routine handles an overflow with ebx=crfp+size. So the size=ebx-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ebx) TYPE(GNAME(alloc_overflow_ebx)) GNAME(alloc_overflow_ebx): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%ebx,%edi) + ALLOC_OVERFLOW(%ebx,%edx) movl %eax,%ebx # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -613,14 +790,14 @@ GNAME(alloc_overflow_ebx): /* This routine handles an overflow with esi=crfp+size. So the size=esi-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_esi) TYPE(GNAME(alloc_overflow_esi)) GNAME(alloc_overflow_esi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%esi,%edi) + ALLOC_OVERFLOW(%esi,%edx) movl %eax,%esi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -628,14 +805,14 @@ GNAME(alloc_overflow_esi): ret SIZE(GNAME(alloc_overflow_esi)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edi) TYPE(GNAME(alloc_overflow_edi)) GNAME(alloc_overflow_edi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%edi,%esi) + ALLOC_OVERFLOW(%edi,%edx) movl %eax,%edi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -651,7 +828,7 @@ GNAME(alloc_overflow_edi): * and EIP from the exception context and use it to fake * up a stack frame which will skip over the system SEH * code. */ - .align align_4byte + .align align_16byte .globl GNAME(exception_handler_wrapper) TYPE(GNAME(exception_handler_wrapper)) GNAME(exception_handler_wrapper): @@ -682,7 +859,7 @@ GNAME(exception_handler_wrapper): #endif #ifdef LISP_FEATURE_DARWIN - .align align_4byte + .align align_16byte .globl GNAME(call_into_lisp_tramp) TYPE(GNAME(call_into_lisp_tramp)) GNAME(call_into_lisp_tramp): @@ -731,7 +908,7 @@ GNAME(call_into_lisp_tramp): SIZE(call_into_lisp_tramp) #endif - .align align_4byte,0x90 + .align align_16byte,0x90 .globl GNAME(post_signal_tramp) TYPE(GNAME(post_signal_tramp)) GNAME(post_signal_tramp): @@ -756,7 +933,7 @@ GNAME(post_signal_tramp): .globl GNAME(fast_bzero_pointer) .data - .align align_4byte + .align align_16byte GNAME(fast_bzero_pointer): /* Variable containing a pointer to the bzero function to use. * Initially points to a basic function. Change this variable @@ -764,7 +941,7 @@ GNAME(fast_bzero_pointer): .long GNAME(fast_bzero_base) .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero) TYPE(GNAME(fast_bzero)) GNAME(fast_bzero): @@ -774,7 +951,7 @@ GNAME(fast_bzero): .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_detect) TYPE(GNAME(fast_bzero_detect)) GNAME(fast_bzero_detect): @@ -812,7 +989,7 @@ Lrestore: .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_sse) TYPE(GNAME(fast_bzero_sse)) @@ -858,7 +1035,7 @@ Lend_sse: .text - .align align_8byte,0x90 + .align align_16byte,0x90 .globl GNAME(fast_bzero_base) TYPE(GNAME(fast_bzero_base)) @@ -874,7 +1051,6 @@ GNAME(fast_bzero_base): xor %eax, %eax /* Zero EAX */ shr $2, %ecx /* Amount of 4-byte blocks to copy */ jz Lend_base - cld /* Set direction of STOSL to increment */ rep stosl /* Store EAX to *EDI, ECX times, incrementing @@ -886,7 +1062,81 @@ Lend_base: pop %eax ret SIZE(GNAME(fast_bzero_base)) - - + + +/* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub + * the control stack from C, largely due to not knowing where the + * active stack frame ends. On such platforms, we reimplement the + * core scrubbing logic in assembly, in this case here: + */ + .text + .align align_16byte,0x90 + .globl GNAME(arch_scrub_control_stack) + TYPE(GNAME(arch_scrub_control_stack)) +GNAME(arch_scrub_control_stack): + /* We are passed three parameters: + * A (struct thread *) at [ESP+4], + * the address of the guard page at [ESP+8], and + * the address of the hard guard page at [ESP+12]. + * We may trash EAX, ECX, and EDX with impunity. + * [ESP] is our return address, [ESP-4] is the first + * stack slot to scrub. */ + + /* We start by setting up our scrub pointer in EAX, our + * guard page upper bound in ECX, and our hard guard + * page upper bound in EDX. */ + lea -4(%esp), %eax + mov GNAME(os_vm_page_size),%edx + mov %edx, %ecx + add 8(%esp), %ecx + add 12(%esp), %edx + + /* We need to do a memory operation relative to the + * thread pointer, so put it in %ecx and our guard + * page upper bound in 4(%esp). */ + xchg 4(%esp), %ecx + + /* Now we begin our main scrub loop. */ +ascs_outer_loop: + + /* If we're about to scrub the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_check_guard_page + cmp 12(%esp), %eax + ja ascs_finished + +ascs_check_guard_page: + /* If we're about to scrub the guard page, and the guard + * page is protected, exit. */ + cmp 4(%esp), %eax + jae ascs_clear_loop + cmp 8(%esp), %eax + jbe ascs_clear_loop + cmpl $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%ecx) + jne ascs_finished + + /* Clear memory backwards to the start of the (4KiB) page */ +ascs_clear_loop: + movl $0, (%eax) + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_clear_loop + + /* If we're about to hit the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_finished + + /* If the next (previous?) 4KiB page contains a non-zero + * word, continue scrubbing. */ +ascs_check_loop: + testl $-1, (%eax) + jnz ascs_outer_loop + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_check_loop + +ascs_finished: + ret + SIZE(GNAME(arch_scrub_control_stack)) + END() - \ No newline at end of file