X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-assem.S;h=fba970ba1a3beb9c5233da8dcd5860e2f8f1b45b;hb=7f1e94ae961a198e00daf281eb1dc858e5b2dcc7;hp=e3b36c7d8c513cdd9e6b65600c83cb5c12fc8f9a;hpb=f78e2d271f540d68d35b4f41696ce746ff129ee3;p=sbcl.git diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index e3b36c7..fba970b 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -17,6 +17,7 @@ #include "sbcl.h" #include "validate.h" #include "genesis/closure.h" +#include "genesis/funcallable-instance.h" #include "genesis/fdefn.h" #include "genesis/static-symbols.h" #include "genesis/symbol.h" @@ -50,10 +51,12 @@ #define align_4byte 4 #define align_8byte 8 #define align_16byte 16 +#define align_page 4096 #else #define align_4byte 2 #define align_8byte 3 #define align_16byte 4 +#define align_page 12 #endif /* @@ -62,7 +65,7 @@ * that are defined to be no-ops on win32. Hopefully this still works on * other platforms. */ -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN) #define TYPE(name) .type name,@function #define SIZE(name) .size name,.-name #else @@ -70,9 +73,110 @@ #define SIZE(name) #endif +/* Helper macros for access to thread-locals slots for both OS types: + * ------------------------------------------------------------------------ + * + * Windows TEB block + * ================== __________ + * | Win32 %FS base | ----> | | 0 + * ================== | | 1 + * z z + * TLS slots start here> |XXXXXXXX| e10 = TEB_STATIC_TLS_SLOTS_OFFSET + * |XXXXXXXX| e11 + * z ... z + * |XXXXXXXX| e4e + * TLS ends here> ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63 + * / z z + * | ---------- "os_address" ----. + * | | + * | big blob of SBCL-specific thread-local data | + * | |----------------------------------------| <--' + * | | CONTROL, BINDING, ALIEN STACK | + * | z z + * ================== | |----------------------------------------| + * | Linux %FS base | -->| | FFI stack pointer | + * ================== | | (extra page for mprotect) | + * \ |----------------------------------------| + * (union p_t_d) -----> \-> | struct thread { | dynamic_values[0] | + * . | ... | [1] | + * . z ... z ... z + * [tls data begins] | } | ... | <- + * [declared end of p_t_d] |----------------------------------------| . | + * . | ... | . | + * . | [TLS_SIZE-1] | <-| + * [tls data actually ends] |----------------------------------------| | + * . | ALTSTACK | | + * . |----------------------------------------| | + * . | struct nonpointer_thread_data { } | | + * . ------------------------------------------ | + * [blob actually ends] | + * / + * / + * / + * ______________________ / + * | struct symbol { | / + * z ... z / + * | fixnum tls_index; // fixnum value relative to union / + * | } | (< TLS_SIZE = 4096) + * ---------------------| + */ +#ifdef LISP_FEATURE_WIN32 +# define TEB_STATIC_TLS_SLOTS_OFFSET 0xE10 +# define TEB_SBCL_THREAD_BASE_OFFSET (TEB_STATIC_TLS_SLOTS_OFFSET+(63*4)) +# define SBCL_THREAD_BASE_EA %fs:TEB_SBCL_THREAD_BASE_OFFSET +# define MAYBE_FS(addr) addr +# define LoadTlSymbolValueAddress(symbol,reg) ; \ + movl SBCL_THREAD_BASE_EA, reg ; \ + addl (symbol+SYMBOL_TLS_INDEX_OFFSET), reg ; +# define LoadCurrentThreadSlot(offset,reg); \ + movl SBCL_THREAD_BASE_EA, reg ; \ + movl offset(reg), reg ; +#elif defined(LISP_FEATURE_LINUX) || defined(LISP_FEATURE_SUNOS) || defined(LISP_FEATURE_FREEBSD) + /* see comment in arch_os_thread_init */ +# define SBCL_THREAD_BASE_EA %fs:THREAD_SELFPTR_OFFSET +# define MAYBE_FS(addr) addr +#else + /* perhaps there's an OS out there that actually supports %fs without + * jumping through hoops, so just in case, here a default definition: */ +# define SBCL_THREAD_BASE_EA $0 +# define MAYBE_FS(addr) %fs:addr +#endif + +/* gas can't parse 4096LU; redefine */ +#if BACKEND_PAGE_BYTES == 4096 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 4096 +#elif BACKEND_PAGE_BYTES == 32768 +# undef BACKEND_PAGE_BYTES +# define BACKEND_PAGE_BYTES 32768 +#else +# error BACKEND_PAGE_BYTES mismatch +#endif + +/* OAOOM because we don't have the C headers here */ +#define THREAD_CSP_PAGE_SIZE BACKEND_PAGE_BYTES + +/* the CSP page sits right before the thread */ +#define THREAD_SAVED_CSP_OFFSET (-THREAD_CSP_PAGE_SIZE) + +/* + * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal + * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have + * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or + * 0F 0B in low-endian notation, that causes SIGILL to fire. We check + * for this instruction in the SIGILL handler and if we see it, we + * advance the EIP by two bytes to skip over ud2 instruction and + * call sigtrap_handler. */ +#if defined(LISP_FEATURE_UD2_BREAKPOINTS) +#define END() +#define TRAP ud2 +#else +#define END() .end +#define TRAP int3 +#endif + .text - .global GNAME(foreign_function_call_active) - .global GNAME(all_threads) + .globl GNAME(all_threads) /* * A call to call_into_c preserves esi, edi, and ebp. @@ -84,18 +188,76 @@ * floats. * * This should work for Lisp calls C calls Lisp calls C.. + * + * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp, + * so if you tweak this, change that too! */ +/* + * Note on sections specific to LISP_FEATURE_SB_SAFEPOINT: + * + * The code below is essential to safepoint-based garbage collection, + * and several details need to be considered for correct implementation. + * + * The stack spilling approach: + * On SB-SAFEPOINT platforms, the CALL-OUT vop is defined to spill all + * live Lisp TNs to the stack to provide information for conservative + * GC cooperatively (avoiding the need to retrieve register values + * from POSIX signal contexts or Windows GetThreadContext()). + * + * Finding the SP at all: + * The main remaining value needed by GC is the stack pointer (SP) at + * the moment of entering the foreign function. For this purpose, a + * thread-local field for the SP is used. Two stores to that field + * are done for each C call, one to save the SP before calling out and + * and one to undo that store afterwards. + * + * Stores as synchronization points: + * These two stores delimit the C call: While the SP is set, our + * thread is known not to run Lisp code: During GC, memory protection + * ensures that no thread proceeds across stores. + * + * The return PC issue: + * (Note that CALL-OUT has, in principle, two versions: Inline + * assembly in the VOP -or- alternatively the out-of-line version you + * are currently reading. In reality, safepoint builds currently + * lack the inline code entirely.) + * + * Both versions need to take special care with the return PC: + * - In the inline version of the code (if it existed), the two stores + * would be done directly in the CALL-OUT vop. In that theoretical + * implementation, there is a time interval between return of the + * actual C call and a second SP store during which the return + * address might not be on the stack anymore. + * - In this out-of-line version, the stores are done during + * call_into_c's frame, but an equivalent problem arises: In order + * to present the stack of arguments as our foreign function expects + * them, call_into_c has to pop the Lisp return address into a + * register first; this register has to be preserved by GENCGC + * separately: our return address is not in the stack anymore. + * In both case, stack scanning alone is not sufficient to pin + * the return address, and we communicate it to GC explicitly + * in addition to the SP. + * + * Note on look-alike accessor macros with vastly different behaviour: + * THREAD_PC_AROUND_FOREIGN_CALL_OFFSET is an "ordinary" field of the + * struct thread, whereas THREAD_SAVED_CSP_OFFSET is a synchronization + * point on a potentially write-protected page. +*/ + .text .align align_16byte,0x90 - .global GNAME(call_into_c) + .globl GNAME(call_into_c) TYPE(GNAME(call_into_c)) GNAME(call_into_c): - movl $1,GNAME(foreign_function_call_active) - /* Save the return Lisp address in ebx. */ popl %ebx /* Setup the NPX for C */ + /* The VOP says regarding CLD: "Clear out DF: Darwin, Windows, + * and Solaris at least require this, and it should not hurt + * others either." call_into_c didn't have it, but better safe than + * sorry. */ + cld fstp %st(0) fstp %st(0) fstp %st(0) @@ -105,17 +267,29 @@ GNAME(call_into_c): fstp %st(0) fstp %st(0) -#ifdef LISP_FEATURE_WIN32 - cld +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* enter safe region: store SP and return PC */ + movl SBCL_THREAD_BASE_EA,%edi + movl %esp,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ebx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) #endif - call *%eax # normal callout using Lisp stack + /* foreign call, preserving ESI, EDI, and EBX */ + call *%eax # normal callout using Lisp stack + /* return values now in eax/edx OR st(0) */ + +#ifdef LISP_FEATURE_SB_SAFEPOINT + /* leave region: clear the SP! (Also unpin the return PC.) */ + xorl %ecx,%ecx + movl %ecx,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi)) + movl %ecx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi)) +#endif - movl %eax,%ecx # remember integer return value + movl %eax,%ecx # remember integer return value /* Check for a return FP value. */ fxam - fnstsw %eax + fnstsw %ax andl $0x4500,%eax cmpl $0x4100,%eax jne Lfp_rtn_value @@ -134,7 +308,6 @@ GNAME(call_into_c): /* Restore the return value. */ movl %ecx,%eax # maybe return value - movl $0,GNAME(foreign_function_call_active) /* Return. */ jmp *%ebx @@ -152,19 +325,17 @@ Lfp_rtn_value: /* We don't need to restore eax, because the result is in st(0). */ - movl $0,GNAME(foreign_function_call_active) -/* Return. */ +/* Return. FIXME: It would be nice to restructure this to use RET. */ jmp *%ebx SIZE(GNAME(call_into_c)) .text - .global GNAME(call_into_lisp_first_time) + .globl GNAME(call_into_lisp_first_time) TYPE(GNAME(call_into_lisp_first_time)) -/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when - * the stack changes. We don't worry too much about saving registers +/* We don't worry too much about saving registers * here, because we never expect to return from the initial call to lisp * anyway */ @@ -173,12 +344,9 @@ GNAME(call_into_lisp_first_time): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. #ifndef LISP_FEATURE_WIN32 - movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET movl GNAME(all_threads),%eax - movl THREAD_CONTROL_STACK_START_OFFSET(%eax) ,%esp - /* don't think too hard about what happens if we get interrupted - * here */ - addl $THREAD_CONTROL_STACK_SIZE-4,%esp + /* pthread machinery takes care of this for other threads */ + movl THREAD_CONTROL_STACK_END_OFFSET(%eax) ,%esp #else /* Win32 -really- doesn't like you switching stacks out from under it. */ movl GNAME(all_threads),%eax @@ -186,7 +354,7 @@ GNAME(call_into_lisp_first_time): jmp Lstack .text - .global GNAME(call_into_lisp) + .globl GNAME(call_into_lisp) TYPE(GNAME(call_into_lisp)) /* The C conventions require that ebx, esi, edi, and ebp be preserved @@ -196,6 +364,7 @@ GNAME(call_into_lisp_first_time): GNAME(call_into_lisp): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. + Lstack: /* Save the NPX state */ fwait # Catch any pending NPX exceptions. @@ -232,8 +401,6 @@ Lstack: xorl %esi,%esi # third arg /* no longer in function call */ - movl %eax, GNAME(foreign_function_call_active) - movl %esp,%ebx # remember current stack pushl %ebx # Save entry stack on (maybe) new stack. @@ -254,11 +421,29 @@ Lstack: Ldone: /* Registers eax, ecx, edx, edi, and esi are now live. */ +#ifdef LISP_FEATURE_WIN32 + /* Establish an SEH frame. */ +#ifdef LISP_FEATURE_SB_THREAD + /* Save binding stack pointer */ + subl $4, %esp + pushl %eax + movl SBCL_THREAD_BASE_EA, %eax + movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax + movl %eax, 4(%esp) + popl %eax +#else + pushl BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET +#endif + pushl $GNAME(exception_handler_wrapper) + pushl %fs:0 + movl %esp, %fs:0 +#endif + /* Alloc new frame. */ - mov %esp,%ebx # The current sp marks start of new frame. - push %ebp # fp in save location S0 - sub $8,%esp # Ensure 3 slots are allocated, one above. - mov %ebx,%ebp # Switch to new frame. + push %ebp # Dummy for return address + push %ebp # fp in save location S1 + mov %esp,%ebp # The current sp marks start of new frame. + sub $4,%esp # Ensure 3 slots are allocated, two above. call *CLOSURE_FUN_OFFSET(%eax) @@ -269,6 +454,13 @@ Ldone: LsingleValue: /* A singled value function returns here */ +#ifdef LISP_FEATURE_WIN32 + /* Remove our SEH frame. */ + mov %fs:0,%esp + popl %fs:0 + add $8, %esp +#endif + /* Restore the stack, in case there was a stack change. */ popl %esp # c-sp @@ -288,7 +480,7 @@ LsingleValue: /* support for saving and restoring the NPX state from C */ .text - .global GNAME(fpu_save) + .globl GNAME(fpu_save) TYPE(GNAME(fpu_save)) .align 2,0x90 GNAME(fpu_save): @@ -297,7 +489,7 @@ GNAME(fpu_save): ret SIZE(GNAME(fpu_save)) - .global GNAME(fpu_restore) + .globl GNAME(fpu_restore) TYPE(GNAME(fpu_restore)) .align 2,0x90 GNAME(fpu_restore): @@ -310,12 +502,13 @@ GNAME(fpu_restore): * the undefined-function trampoline */ .text - .align align_4byte,0x90 - .global GNAME(undefined_tramp) + .align align_16byte,0x90 + .globl GNAME(undefined_tramp) TYPE(GNAME(undefined_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(undefined_tramp): - int3 + pop 4(%ebp) # Save return PC for backtrace. + TRAP .byte trap_Error .byte 2 .byte UNDEFINED_FUN_ERROR @@ -323,12 +516,16 @@ GNAME(undefined_tramp): ret SIZE(GNAME(undefined_tramp)) +/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs + * to know the name of the function immediately following the + * undefined-function trampoline. */ + /* * the closure trampoline */ .text - .align align_4byte,0x90 - .global GNAME(closure_tramp) + .align align_16byte,0x90 + .globl GNAME(closure_tramp) TYPE(GNAME(closure_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(closure_tramp): @@ -342,12 +539,29 @@ GNAME(closure_tramp): jmp *CLOSURE_FUN_OFFSET(%eax) SIZE(GNAME(closure_tramp)) + .text + .align align_16byte,0x90 + .globl GNAME(funcallable_instance_tramp) + TYPE(GNAME(funcallable_instance_tramp)) +GNAME(funcallable_instance_tramp): + movl FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%eax),%eax + /* KLUDGE: on this platform, whatever kind of function is in %rax + * now, the first word of it contains the address to jump to. */ + jmp *CLOSURE_FUN_OFFSET(%eax) + SIZE(GNAME(funcallable_instance_tramp)) + /* * fun-end breakpoint magic */ + +/* + * For an explanation of the magic involved in function-end + * breakpoints, see the implementation in ppc-assem.S. + */ + .text - .global GNAME(fun_end_breakpoint_guts) - .align align_4byte + .globl GNAME(fun_end_breakpoint_guts) + .align align_16byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ jc multiple_value_return @@ -357,33 +571,31 @@ GNAME(fun_end_breakpoint_guts): movl %esp,%ebx # Setup ebx - the ofp. subl $4,%esp # Allocate one stack slot for the return value movl $4,%ecx # Setup ecx for one return value. - movl $NIL,%edi # default second value - movl $NIL,%esi # default third value + movl $(NIL),%edi # default second value + movl $(NIL),%esi # default third value multiple_value_return: - .global GNAME(fun_end_breakpoint_trap) + .globl GNAME(fun_end_breakpoint_trap) GNAME(fun_end_breakpoint_trap): - int3 + TRAP .byte trap_FunEndBreakpoint hlt # We should never return here. - .global GNAME(fun_end_breakpoint_end) + .globl GNAME(fun_end_breakpoint_end) GNAME(fun_end_breakpoint_end): - .global GNAME(do_pending_interrupt) + .globl GNAME(do_pending_interrupt) TYPE(GNAME(do_pending_interrupt)) - .align align_4byte,0x90 + .align align_16byte,0x90 GNAME(do_pending_interrupt): - int3 + TRAP .byte trap_PendingInterrupt ret SIZE(GNAME(do_pending_interrupt)) - -/* - * Allocate bytes and return the start of the allocated space +/* Allocate bytes and return the start of the allocated space * in the specified destination register. * * In the general case the size will be in the destination register. @@ -391,361 +603,168 @@ GNAME(do_pending_interrupt): * All registers must be preserved except the destination. * The C conventions will preserve ebx, esi, edi, and ebp. * So only eax, ecx, and edx need special care here. + * + * ALLOC factors out the logic of calling alloc(): stack alignment, etc. + * + * DEFINE_ALLOC_TO_FOO defines an alloction routine. */ - - .globl GNAME(alloc_to_eax) - TYPE(GNAME(alloc_to_eax)) - .align align_4byte,0x90 -GNAME(alloc_to_eax): - pushl %ecx # Save ecx and edx as C could destroy them. - pushl %edx - pushl %eax # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - popl %edx # Restore ecx and edx. - popl %ecx - ret - SIZE(GNAME(alloc_to_eax)) - - .globl GNAME(alloc_8_to_eax) - TYPE(GNAME(alloc_8_to_eax)) - .align align_4byte,0x90 -GNAME(alloc_8_to_eax): - pushl %ecx # Save ecx and edx as C could destroy them. - pushl %edx - pushl $8 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - popl %edx # Restore ecx and edx. - popl %ecx - ret - SIZE(GNAME(alloc_8_to_eax)) - - .globl GNAME(alloc_8_to_eax) - TYPE(GNAME(alloc_8_to_eax)) - .align align_4byte,0x90 - - .globl GNAME(alloc_16_to_eax) - TYPE(GNAME(alloc_16_to_eax)) - .align align_4byte,0x90 -GNAME(alloc_16_to_eax): - pushl %ecx # Save ecx and edx as C could destroy them. - pushl %edx - pushl $16 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - popl %edx # Restore ecx and edx. - popl %ecx - ret - SIZE(GNAME(alloc_16_to_eax)) - - .globl GNAME(alloc_to_ecx) - TYPE(GNAME(alloc_to_ecx)) - .align align_4byte,0x90 -GNAME(alloc_to_ecx): - pushl %eax # Save eax and edx as C could destroy them. - pushl %edx - pushl %ecx # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%ecx # Set up the destination. - popl %edx # Restore eax and edx. - popl %eax - ret - SIZE(GNAME(alloc_to_ecx)) - - .globl GNAME(alloc_8_to_ecx) - TYPE(GNAME(alloc_8_to_ecx)) - .align align_4byte,0x90 -GNAME(alloc_8_to_ecx): - pushl %eax # Save eax and edx as C could destroy them. - pushl %edx - pushl $8 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%ecx # Set up the destination. - popl %edx # Restore eax and edx. - popl %eax - ret - SIZE(GNAME(alloc_8_to_ecx)) - - .globl GNAME(alloc_16_to_ecx) - TYPE(GNAME(alloc_16_to_ecx)) - .align align_4byte,0x90 -GNAME(alloc_16_to_ecx): - pushl %eax # Save eax and edx as C could destroy them. - pushl %edx - pushl $16 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%ecx # Set up the destination. - popl %edx # Restore eax and edx. - popl %eax - ret - SIZE(GNAME(alloc_16_to_ecx)) - - - .globl GNAME(alloc_to_edx) - TYPE(GNAME(alloc_to_edx)) - .align align_4byte,0x90 -GNAME(alloc_to_edx): - pushl %eax # Save eax and ecx as C could destroy them. - pushl %ecx - pushl %edx # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%edx # Set up the destination. - popl %ecx # Restore eax and ecx. - popl %eax - ret - SIZE(GNAME(alloc_to_edx)) - - .globl GNAME(alloc_8_to_edx) - TYPE(GNAME(alloc_8_to_edx)) - .align align_4byte,0x90 -GNAME(alloc_8_to_edx): - pushl %eax # Save eax and ecx as C could destroy them. - pushl %ecx - pushl $8 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%edx # Set up the destination. - popl %ecx # Restore eax and ecx. - popl %eax - ret - SIZE(GNAME(alloc_8_to_edx)) - - .globl GNAME(alloc_16_to_edx) - TYPE(GNAME(alloc_16_to_edx)) - .align align_4byte,0x90 -GNAME(alloc_16_to_edx): - pushl %eax # Save eax and ecx as C could destroy them. - pushl %ecx - pushl $16 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%edx # Set up the destination. - popl %ecx # Restore eax and ecx. - popl %eax - ret - SIZE(GNAME(alloc_16_to_edx)) - - - - .globl GNAME(alloc_to_ebx) - TYPE(GNAME(alloc_to_ebx)) - .align align_4byte,0x90 -GNAME(alloc_to_ebx): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl %ebx # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%ebx # Set up the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_to_ebx)) - - .globl GNAME(alloc_8_to_ebx) - TYPE(GNAME(alloc_8_to_ebx)) - .align align_4byte,0x90 -GNAME(alloc_8_to_ebx): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $8 # Push the size. - call GNAME(alloc) - addl $4,%esp # Pop the size arg. - movl %eax,%ebx # Set up the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_8_to_ebx)) - - .globl GNAME(alloc_16_to_ebx) - TYPE(GNAME(alloc_16_to_ebx)) - .align align_4byte,0x90 -GNAME(alloc_16_to_ebx): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $16 # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%ebx # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_16_to_ebx)) - - - - .globl GNAME(alloc_to_esi) - TYPE(GNAME(alloc_to_esi)) - .align align_4byte,0x90 -GNAME(alloc_to_esi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl %esi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%esi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_to_esi)) - - .globl GNAME(alloc_8_to_esi) - TYPE(GNAME(alloc_8_to_esi)) - .align align_4byte,0x90 -GNAME(alloc_8_to_esi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $8 # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%esi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_8_to_esi)) - - .globl GNAME(alloc_16_to_esi) - TYPE(GNAME(alloc_16_to_esi)) - .align align_4byte,0x90 -GNAME(alloc_16_to_esi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $16 # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%esi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_16_to_esi)) - - - .globl GNAME(alloc_to_edi) - TYPE(GNAME(alloc_to_edi)) - .align align_4byte,0x90 -GNAME(alloc_to_edi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl %edi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%edi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_to_edi)) - - .globl GNAME(alloc_8_to_edi) - TYPE(GNAME(alloc_8_to_edi)) - .align align_4byte,0x90 -GNAME(alloc_8_to_edi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $8 # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%edi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_8_to_edi)) - - .globl GNAME(alloc_16_to_edi) - TYPE(GNAME(alloc_16_to_edi)) - .align align_4byte,0x90 -GNAME(alloc_16_to_edi): - pushl %eax # Save eax, ecx, and edx as C could destroy them. - pushl %ecx - pushl %edx - pushl $16 # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%edi # setup the destination. - popl %edx # Restore eax, ecx and edx. - popl %ecx - popl %eax - ret - SIZE(GNAME(alloc_16_to_edi)) - +#ifdef LISP_FEATURE_DARWIN +#define ALLOC(size) \ + pushl %ebp; /* Save EBP */ \ + movl %esp,%ebp; /* Save ESP to EBP */ \ + pushl $0; /* Reserve space for arg */ \ + andl $0xfffffff0,%esp; /* Align stack to 16bytes */ \ + movl size, (%esp); /* Argument to alloc */ \ + call GNAME(alloc); \ + movl %ebp,%esp; /* Restore ESP from EBP */ \ + popl %ebp; /* Restore EBP */ +#else +#define ALLOC(size) \ + pushl size; /* Argument to alloc */ \ + call GNAME(alloc); \ + addl $4,%esp; /* Pop argument */ +#endif + +#define DEFINE_ALLOC_TO_EAX(name,size) \ + .globl GNAME(name); \ + TYPE(GNAME(name)); \ + .align align_16byte,0x90; \ +GNAME(name): \ + pushl %ecx; /* Save ECX and EDX */ \ + pushl %edx; \ + ALLOC(size) \ + popl %edx; /* Restore ECX and EDX */ \ + popl %ecx; \ + ret; \ + SIZE(GNAME(name)) + +#define DEFINE_ALLOC_TO_ECX(name,size) \ + .globl GNAME(name); \ + TYPE(GNAME(name)); \ + .align align_16byte,0x90; \ +GNAME(name): \ + pushl %eax; /* Save EAX and EDX */ \ + pushl %edx; \ + ALLOC(size) \ + movl %eax,%ecx; /* Result to destination */ \ + popl %edx; \ + popl %eax; \ + ret; \ + SIZE(GNAME(name)) + +#define DEFINE_ALLOC_TO_EDX(name,size) \ + .globl GNAME(name); \ + TYPE(GNAME(name)); \ + .align align_16byte,0x90; \ +GNAME(name): \ + pushl %eax; /* Save EAX and ECX */ \ + pushl %ecx; \ + ALLOC(size) \ + movl %eax,%edx; /* Restore EAX and ECX */ \ + popl %ecx; \ + popl %eax; \ + ret; \ + SIZE(GNAME(name)) + +#define DEFINE_ALLOC_TO_REG(name,reg,size) \ + .globl GNAME(name); \ + TYPE(GNAME(name)); \ + .align align_16byte,0x90; \ +GNAME(name): \ + pushl %eax; /* Save EAX, ECX, and EDX */ \ + pushl %ecx; \ + pushl %edx; \ + ALLOC(size) \ + movl %eax,reg; /* Restore them */ \ + popl %edx; \ + popl %ecx; \ + popl %eax; \ + ret; \ + SIZE(GNAME(name)) + +DEFINE_ALLOC_TO_EAX(alloc_to_eax,%eax) +DEFINE_ALLOC_TO_EAX(alloc_8_to_eax,$8) +DEFINE_ALLOC_TO_EAX(alloc_16_to_eax,$16) + +DEFINE_ALLOC_TO_ECX(alloc_to_ecx,%ecx) +DEFINE_ALLOC_TO_ECX(alloc_8_to_ecx,$8) +DEFINE_ALLOC_TO_ECX(alloc_16_to_ecx,$16) + +DEFINE_ALLOC_TO_EDX(alloc_to_edx,%edx) +DEFINE_ALLOC_TO_EDX(alloc_8_to_edx,$8) +DEFINE_ALLOC_TO_EDX(alloc_16_to_edx,$16) + +DEFINE_ALLOC_TO_REG(alloc_to_ebx,%ebx,%ebx) +DEFINE_ALLOC_TO_REG(alloc_8_to_ebx,%ebx,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_ebx,%ebx,$16) + +DEFINE_ALLOC_TO_REG(alloc_to_esi,%esi,%esi) +DEFINE_ALLOC_TO_REG(alloc_8_to_esi,%esi,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_esi,%esi,$16) + +DEFINE_ALLOC_TO_REG(alloc_to_edi,%edi,%edi) +DEFINE_ALLOC_TO_REG(alloc_8_to_edi,%edi,$8) +DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16) + /* Called from lisp when an inline allocation overflows. - Every register except the result needs to be preserved. - We depend on C to preserve ebx, esi, edi, and ebp. - But where necessary must save eax, ecx, edx. */ + * Every register except the result needs to be preserved. + * We depend on C to preserve ebx, esi, edi, and ebp. + * But where necessary must save eax, ecx, edx. */ #ifdef LISP_FEATURE_SB_THREAD #define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET #else #define START_REGION GNAME(boxed_region) #endif - + +#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32) +#define ALLOC_OVERFLOW(size,scratch) \ + movl SBCL_THREAD_BASE_EA, scratch; \ + /* Calculate the size for the allocation. */ \ + subl THREAD_ALLOC_REGION_OFFSET(scratch),size; \ + ALLOC(size) +#else +#define ALLOC_OVERFLOW(size,scratch) \ + /* Calculate the size for the allocation. */ \ + subl START_REGION,size; \ + ALLOC(size) +#endif + /* This routine handles an overflow with eax=crfp+size. So the size=eax-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_eax) TYPE(GNAME(alloc_overflow_eax)) GNAME(alloc_overflow_eax): pushl %ecx # Save ecx pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl START_REGION,%eax - pushl %eax # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%eax,%edx) popl %edx # Restore edx. popl %ecx # Restore ecx. ret SIZE(GNAME(alloc_overflow_eax)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ecx) TYPE(GNAME(alloc_overflow_ecx)) GNAME(alloc_overflow_ecx): pushl %eax # Save eax pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl START_REGION,%ecx - pushl %ecx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%ecx,%edx) movl %eax,%ecx # setup the destination. popl %edx # Restore edx. popl %eax # Restore eax. ret SIZE(GNAME(alloc_overflow_ecx)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edx) TYPE(GNAME(alloc_overflow_edx)) GNAME(alloc_overflow_edx): pushl %eax # Save eax pushl %ecx # Save ecx - /* Calculate the size for the allocation. */ - subl START_REGION,%edx - pushl %edx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%edx,%ecx) movl %eax,%edx # setup the destination. popl %ecx # Restore ecx. popl %eax # Restore eax. @@ -754,18 +773,14 @@ GNAME(alloc_overflow_edx): /* This routine handles an overflow with ebx=crfp+size. So the size=ebx-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_ebx) TYPE(GNAME(alloc_overflow_ebx)) GNAME(alloc_overflow_ebx): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl START_REGION,%ebx - pushl %ebx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%ebx,%edx) movl %eax,%ebx # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -775,18 +790,14 @@ GNAME(alloc_overflow_ebx): /* This routine handles an overflow with esi=crfp+size. So the size=esi-crfp. */ - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_esi) TYPE(GNAME(alloc_overflow_esi)) GNAME(alloc_overflow_esi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl START_REGION,%esi - pushl %esi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%esi,%edx) movl %eax,%esi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -794,18 +805,14 @@ GNAME(alloc_overflow_esi): ret SIZE(GNAME(alloc_overflow_esi)) - .align align_4byte + .align align_16byte .globl GNAME(alloc_overflow_edi) TYPE(GNAME(alloc_overflow_edi)) GNAME(alloc_overflow_edi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl START_REGION,%edi - pushl %edi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. + ALLOC_OVERFLOW(%edi,%edx) movl %eax,%edi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -813,7 +820,95 @@ GNAME(alloc_overflow_edi): ret SIZE(GNAME(alloc_overflow_edi)) - .align align_4byte,0x90 + +#ifdef LISP_FEATURE_WIN32 + /* The guts of the exception-handling system doesn't use + * frame pointers, which manages to throw off backtraces + * rather badly. So here we grab the (known-good) EBP + * and EIP from the exception context and use it to fake + * up a stack frame which will skip over the system SEH + * code. */ + .align align_16byte + .globl GNAME(exception_handler_wrapper) + TYPE(GNAME(exception_handler_wrapper)) +GNAME(exception_handler_wrapper): + /* Context layout is: */ + /* 7 dwords before FSA. (0x1c) */ + /* 8 dwords and 0x50 bytes in the FSA. (0x70/0x8c) */ + /* 4 dwords segregs. (0x10/0x9c) */ + /* 6 dwords non-stack GPRs. (0x18/0xb4) */ + /* EBP (at 0xb4) */ + /* EIP (at 0xb8) */ +#define CONTEXT_EBP_OFFSET 0xb4 +#define CONTEXT_EIP_OFFSET 0xb8 + /* some other stuff we don't care about. */ + pushl %ebp + movl 0x10(%esp), %ebp /* context */ + pushl CONTEXT_EIP_OFFSET(%ebp) + pushl CONTEXT_EBP_OFFSET(%ebp) + movl %esp, %ebp + pushl 0x1c(%esp) + pushl 0x1c(%esp) + pushl 0x1c(%esp) + pushl 0x1c(%esp) + call GNAME(handle_exception) + lea 8(%ebp), %esp + popl %ebp + ret + SIZE(GNAME(exception_handler_wrapper)) +#endif + +#ifdef LISP_FEATURE_DARWIN + .align align_16byte + .globl GNAME(call_into_lisp_tramp) + TYPE(GNAME(call_into_lisp_tramp)) +GNAME(call_into_lisp_tramp): + /* 1. build the stack frame from the block that's pointed to by ECX + 2. free the block + 3. set ECX to 0 + 4. call the function via call_into_lisp + */ + pushl 0(%ecx) /* return address */ + + pushl %ebp + movl %esp, %ebp + + pushl 32(%ecx) /* eflags */ + pushl 28(%ecx) /* EAX */ + pushl 20(%ecx) /* ECX */ + pushl 16(%ecx) /* EDX */ + pushl 24(%ecx) /* EBX */ + pushl $0 /* popal is going to ignore esp */ + pushl %ebp /* is this right?? */ + pushl 12(%ecx) /* ESI */ + pushl 8(%ecx) /* EDI */ + pushl $0 /* args for call_into_lisp */ + pushl $0 + pushl 4(%ecx) /* function to call */ + + /* free our save block */ + pushl %ecx /* reserve sufficient space on stack for args */ + pushl %ecx + andl $0xfffffff0, %esp /* align stack */ + movl $0x40, 4(%esp) + movl %ecx, (%esp) + call GNAME(os_invalidate) + + /* call call_into_lisp */ + leal -48(%ebp), %esp + call GNAME(call_into_lisp) + + /* Clean up our mess */ + leal -36(%ebp), %esp + popal + popfl + leave + ret + + SIZE(call_into_lisp_tramp) +#endif + + .align align_16byte,0x90 .globl GNAME(post_signal_tramp) TYPE(GNAME(post_signal_tramp)) GNAME(post_signal_tramp): @@ -823,51 +918,22 @@ GNAME(post_signal_tramp): addl $12,%esp /* clear call_into_lisp args from stack */ popal /* restore registers */ popfl +#ifdef LISP_FEATURE_DARWIN + /* skip two padding words */ + addl $8,%esp +#endif leave ret SIZE(GNAME(post_signal_tramp)) -#ifdef LISP_FEATURE_WIN32 - /* - * This is part of the funky magic for exception handling on win32. - * see sigtrap_emulator() in win32-os.c for details. - */ - .global GNAME(sigtrap_trampoline) -GNAME(sigtrap_trampoline): - pushl %eax - pushl %ebp - movl %esp, %ebp - call GNAME(sigtrap_wrapper) - pop %eax - pop %eax - int3 - .byte trap_ContextRestore - hlt # We should never return here. - - /* - * This is part of the funky magic for exception handling on win32. - * see handle_exception() in win32-os.c for details. - */ - .global GNAME(exception_trampoline) -GNAME(exception_trampoline): - pushl %eax - pushl %ebp - movl %esp, %ebp - call GNAME(handle_win32_exception_wrapper) - pop %eax - pop %eax - int3 - .byte trap_ContextRestore - hlt # We should never return here. -#endif /* fast_bzero implementations and code to detect which implementation * to use. */ - .global GNAME(fast_bzero_pointer) + .globl GNAME(fast_bzero_pointer) .data - .align 4 + .align align_16byte GNAME(fast_bzero_pointer): /* Variable containing a pointer to the bzero function to use. * Initially points to a basic function. Change this variable @@ -875,8 +941,8 @@ GNAME(fast_bzero_pointer): .long GNAME(fast_bzero_base) .text - .align align_8byte,0x90 - .global GNAME(fast_bzero) + .align align_16byte,0x90 + .globl GNAME(fast_bzero) TYPE(GNAME(fast_bzero)) GNAME(fast_bzero): /* Indirect function call */ @@ -885,8 +951,8 @@ GNAME(fast_bzero): .text - .align align_8byte,0x90 - .global GNAME(fast_bzero_detect) + .align align_16byte,0x90 + .globl GNAME(fast_bzero_detect) TYPE(GNAME(fast_bzero_detect)) GNAME(fast_bzero_detect): /* Decide whether to use SSE, MMX or REP version */ @@ -906,10 +972,10 @@ GNAME(fast_bzero_detect): * for all non-SSE2 processors. */ Lbase: - movl $GNAME(fast_bzero_base), GNAME(fast_bzero_pointer) + movl $(GNAME(fast_bzero_base)), GNAME(fast_bzero_pointer) jmp Lrestore Lsse2: - movl $GNAME(fast_bzero_sse), GNAME(fast_bzero_pointer) + movl $(GNAME(fast_bzero_sse)), GNAME(fast_bzero_pointer) jmp Lrestore Lrestore: @@ -923,8 +989,8 @@ Lrestore: .text - .align align_8byte,0x90 - .global GNAME(fast_bzero_sse) + .align align_16byte,0x90 + .globl GNAME(fast_bzero_sse) TYPE(GNAME(fast_bzero_sse)) GNAME(fast_bzero_sse): @@ -940,7 +1006,7 @@ GNAME(fast_bzero_sse): movups %xmm7, -16(%esp) /* Save XMM register */ xorps %xmm7, %xmm7 /* Zero the XMM register */ jmp Lloop_sse - .align 16 + .align align_16byte Lloop_sse: /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the @@ -969,8 +1035,8 @@ Lend_sse: .text - .align align_8byte,0x90 - .global GNAME(fast_bzero_base) + .align align_16byte,0x90 + .globl GNAME(fast_bzero_base) TYPE(GNAME(fast_bzero_base)) GNAME(fast_bzero_base): @@ -985,15 +1051,92 @@ GNAME(fast_bzero_base): xor %eax, %eax /* Zero EAX */ shr $2, %ecx /* Amount of 4-byte blocks to copy */ jz Lend_base - cld /* Set direction of STOSL to increment */ - rep stosl /* Store EAX to *EDI, ECX times, incrementing + + rep + stosl /* Store EAX to *EDI, ECX times, incrementing * EDI by 4 after each store */ + Lend_base: pop %edi /* Restore temp registers */ pop %ecx pop %eax ret SIZE(GNAME(fast_bzero_base)) - - - .end + + +/* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub + * the control stack from C, largely due to not knowing where the + * active stack frame ends. On such platforms, we reimplement the + * core scrubbing logic in assembly, in this case here: + */ + .text + .align align_16byte,0x90 + .globl GNAME(arch_scrub_control_stack) + TYPE(GNAME(arch_scrub_control_stack)) +GNAME(arch_scrub_control_stack): + /* We are passed three parameters: + * A (struct thread *) at [ESP+4], + * the address of the guard page at [ESP+8], and + * the address of the hard guard page at [ESP+12]. + * We may trash EAX, ECX, and EDX with impunity. + * [ESP] is our return address, [ESP-4] is the first + * stack slot to scrub. */ + + /* We start by setting up our scrub pointer in EAX, our + * guard page upper bound in ECX, and our hard guard + * page upper bound in EDX. */ + lea -4(%esp), %eax + mov GNAME(os_vm_page_size),%edx + mov %edx, %ecx + add 8(%esp), %ecx + add 12(%esp), %edx + + /* We need to do a memory operation relative to the + * thread pointer, so put it in %ecx and our guard + * page upper bound in 4(%esp). */ + xchg 4(%esp), %ecx + + /* Now we begin our main scrub loop. */ +ascs_outer_loop: + + /* If we're about to scrub the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_check_guard_page + cmp 12(%esp), %eax + ja ascs_finished + +ascs_check_guard_page: + /* If we're about to scrub the guard page, and the guard + * page is protected, exit. */ + cmp 4(%esp), %eax + jae ascs_clear_loop + cmp 8(%esp), %eax + jbe ascs_clear_loop + cmpl $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%ecx) + jne ascs_finished + + /* Clear memory backwards to the start of the (4KiB) page */ +ascs_clear_loop: + movl $0, (%eax) + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_clear_loop + + /* If we're about to hit the hard guard page, exit. */ + cmp %edx, %eax + jae ascs_finished + + /* If the next (previous?) 4KiB page contains a non-zero + * word, continue scrubbing. */ +ascs_check_loop: + testl $-1, (%eax) + jnz ascs_outer_loop + test $0xfff, %eax + lea -4(%eax), %eax + jnz ascs_check_loop + +ascs_finished: + ret + SIZE(GNAME(arch_scrub_control_stack)) + + END()