X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-assem.S;h=e1eba22f47e55e2dfa30d032611eae4b3dca04a0;hb=2e47ed527bdcb76cf5eb52f66cc08f4fb0a0041d;hp=0a65cd552af39a7acb0f744fdcc7f1bf6ce394ea;hpb=7fb597b585fc715537ea644f7d84440eca217ca1;p=sbcl.git diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 0a65cd5..e1eba22 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -17,6 +17,7 @@ #include "sbcl.h" #include "validate.h" #include "genesis/closure.h" +#include "genesis/funcallable-instance.h" #include "genesis/fdefn.h" #include "genesis/static-symbols.h" #include "genesis/symbol.h" @@ -62,7 +63,7 @@ * that are defined to be no-ops on win32. Hopefully this still works on * other platforms. */ -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN) #define TYPE(name) .type name,@function #define SIZE(name) .size name,.-name #else @@ -70,9 +71,24 @@ #define SIZE(name) #endif +/* + * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal + * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have + * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or + * 0F 0B in low-endian notation, that causes SIGILL to fire. We check + * for this instruction in the SIGILL handler and if we see it, we + * advance the EIP by two bytes to skip over ud2 instruction and + * call sigtrap_handler. */ +#if defined(LISP_FEATURE_DARWIN) +#define END() +#define TRAP ud2 +#else +#define END() .end +#define TRAP int3 +#endif + .text - .global GNAME(foreign_function_call_active) - .global GNAME(all_threads) + .globl GNAME(all_threads) /* * A call to call_into_c preserves esi, edi, and ebp. @@ -84,14 +100,15 @@ * floats. * * This should work for Lisp calls C calls Lisp calls C.. + * + * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp, + * so if you tweak this, change that too! */ .text .align align_16byte,0x90 - .global GNAME(call_into_c) + .globl GNAME(call_into_c) TYPE(GNAME(call_into_c)) GNAME(call_into_c): - movl $1,GNAME(foreign_function_call_active) - /* Save the return Lisp address in ebx. */ popl %ebx @@ -109,6 +126,9 @@ GNAME(call_into_c): cld #endif +#ifdef LISP_FEATURE_DARWIN + andl $0xfffffff0,%esp # align stack to 16-byte boundary before calling C +#endif call *%eax # normal callout using Lisp stack movl %eax,%ecx # remember integer return value @@ -134,7 +154,6 @@ GNAME(call_into_c): /* Restore the return value. */ movl %ecx,%eax # maybe return value - movl $0,GNAME(foreign_function_call_active) /* Return. */ jmp *%ebx @@ -152,7 +171,6 @@ Lfp_rtn_value: /* We don't need to restore eax, because the result is in st(0). */ - movl $0,GNAME(foreign_function_call_active) /* Return. */ jmp *%ebx @@ -160,7 +178,7 @@ Lfp_rtn_value: .text - .global GNAME(call_into_lisp_first_time) + .globl GNAME(call_into_lisp_first_time) TYPE(GNAME(call_into_lisp_first_time)) /* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when @@ -178,7 +196,7 @@ GNAME(call_into_lisp_first_time): movl THREAD_CONTROL_STACK_START_OFFSET(%eax) ,%esp /* don't think too hard about what happens if we get interrupted * here */ - addl $THREAD_CONTROL_STACK_SIZE-4,%esp + addl $(THREAD_CONTROL_STACK_SIZE),%esp #else /* Win32 -really- doesn't like you switching stacks out from under it. */ movl GNAME(all_threads),%eax @@ -186,7 +204,7 @@ GNAME(call_into_lisp_first_time): jmp Lstack .text - .global GNAME(call_into_lisp) + .globl GNAME(call_into_lisp) TYPE(GNAME(call_into_lisp)) /* The C conventions require that ebx, esi, edi, and ebp be preserved @@ -232,8 +250,6 @@ Lstack: xorl %esi,%esi # third arg /* no longer in function call */ - movl %eax, GNAME(foreign_function_call_active) - movl %esp,%ebx # remember current stack pushl %ebx # Save entry stack on (maybe) new stack. @@ -254,6 +270,19 @@ Lstack: Ldone: /* Registers eax, ecx, edx, edi, and esi are now live. */ +#ifdef LISP_FEATURE_WIN32 + /* Establish an SEH frame. */ +#ifdef LISP_FEATURE_SB_THREAD + /* FIXME: need to save BSP here. */ +#error "need to save BSP here, but don't know how yet." +#else + pushl BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET +#endif + pushl $GNAME(exception_handler_wrapper) + pushl %fs:0 + movl %esp, %fs:0 +#endif + /* Alloc new frame. */ mov %esp,%ebx # The current sp marks start of new frame. push %ebp # fp in save location S0 @@ -264,9 +293,17 @@ Ldone: /* If the function returned multiple values, it will return to this point. Lose them */ + jnc LsingleValue mov %ebx, %esp +LsingleValue: /* A singled value function returns here */ +#ifdef LISP_FEATURE_WIN32 + /* Remove our SEH frame. */ + popl %fs:0 + add $8, %esp +#endif + /* Restore the stack, in case there was a stack change. */ popl %esp # c-sp @@ -286,7 +323,7 @@ Ldone: /* support for saving and restoring the NPX state from C */ .text - .global GNAME(fpu_save) + .globl GNAME(fpu_save) TYPE(GNAME(fpu_save)) .align 2,0x90 GNAME(fpu_save): @@ -295,7 +332,7 @@ GNAME(fpu_save): ret SIZE(GNAME(fpu_save)) - .global GNAME(fpu_restore) + .globl GNAME(fpu_restore) TYPE(GNAME(fpu_restore)) .align 2,0x90 GNAME(fpu_restore): @@ -309,11 +346,11 @@ GNAME(fpu_restore): */ .text .align align_4byte,0x90 - .global GNAME(undefined_tramp) + .globl GNAME(undefined_tramp) TYPE(GNAME(undefined_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(undefined_tramp): - int3 + TRAP .byte trap_Error .byte 2 .byte UNDEFINED_FUN_ERROR @@ -326,7 +363,7 @@ GNAME(undefined_tramp): */ .text .align align_4byte,0x90 - .global GNAME(closure_tramp) + .globl GNAME(closure_tramp) TYPE(GNAME(closure_tramp)) .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(closure_tramp): @@ -340,41 +377,52 @@ GNAME(closure_tramp): jmp *CLOSURE_FUN_OFFSET(%eax) SIZE(GNAME(closure_tramp)) + .text + .align align_4byte,0x90 + .globl GNAME(funcallable_instance_tramp) + TYPE(GNAME(funcallable_instance_tramp)) +GNAME(funcallable_instance_tramp): + movl FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%eax),%eax + /* KLUDGE: on this platform, whatever kind of function is in %rax + * now, the first word of it contains the address to jump to. */ + jmp *CLOSURE_FUN_OFFSET(%eax) + SIZE(GNAME(funcallable_instance_tramp)) + /* * fun-end breakpoint magic */ .text - .global GNAME(fun_end_breakpoint_guts) + .globl GNAME(fun_end_breakpoint_guts) .align align_4byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ - jmp multiple_value_return + jc multiple_value_return /* Single value return: The eventual return will now use the multiple values return convention but with a return values count of one. */ movl %esp,%ebx # Setup ebx - the ofp. subl $4,%esp # Allocate one stack slot for the return value movl $4,%ecx # Setup ecx for one return value. - movl $NIL,%edi # default second value - movl $NIL,%esi # default third value + movl $(NIL),%edi # default second value + movl $(NIL),%esi # default third value multiple_value_return: - .global GNAME(fun_end_breakpoint_trap) + .globl GNAME(fun_end_breakpoint_trap) GNAME(fun_end_breakpoint_trap): - int3 + TRAP .byte trap_FunEndBreakpoint hlt # We should never return here. - .global GNAME(fun_end_breakpoint_end) + .globl GNAME(fun_end_breakpoint_end) GNAME(fun_end_breakpoint_end): - .global GNAME(do_pending_interrupt) + .globl GNAME(do_pending_interrupt) TYPE(GNAME(do_pending_interrupt)) .align align_4byte,0x90 GNAME(do_pending_interrupt): - int3 + TRAP .byte trap_PendingInterrupt ret SIZE(GNAME(do_pending_interrupt)) @@ -811,6 +859,94 @@ GNAME(alloc_overflow_edi): ret SIZE(GNAME(alloc_overflow_edi)) + +#ifdef LISP_FEATURE_WIN32 + /* The guts of the exception-handling system doesn't use + * frame pointers, which manages to throw off backtraces + * rather badly. So here we grab the (known-good) EBP + * and EIP from the exception context and use it to fake + * up a stack frame which will skip over the system SEH + * code. */ + .align align_4byte + .globl GNAME(exception_handler_wrapper) + TYPE(GNAME(exception_handler_wrapper)) +GNAME(exception_handler_wrapper): + /* Context layout is: */ + /* 7 dwords before FSA. (0x1c) */ + /* 8 dwords and 0x50 bytes in the FSA. (0x70/0x8c) */ + /* 4 dwords segregs. (0x10/0x9c) */ + /* 6 dwords non-stack GPRs. (0x18/0xb4) */ + /* EBP (at 0xb4) */ + /* EIP (at 0xb8) */ +#define CONTEXT_EBP_OFFSET 0xb4 +#define CONTEXT_EIP_OFFSET 0xb8 + /* some other stuff we don't care about. */ + pushl %ebp + movl 0x10(%esp), %ebp /* context */ + pushl CONTEXT_EIP_OFFSET(%ebp) + pushl CONTEXT_EBP_OFFSET(%ebp) + movl %esp, %ebp + pushl 0x1c(%esp) + pushl 0x1c(%esp) + pushl 0x1c(%esp) + pushl 0x1c(%esp) + call GNAME(handle_exception) + lea 8(%ebp), %esp + popl %ebp + ret + SIZE(GNAME(exception_handler_wrapper)) +#endif + +#ifdef LISP_FEATURE_DARWIN + .align align_4byte + .globl GNAME(call_into_lisp_tramp) + TYPE(GNAME(call_into_lisp_tramp)) +GNAME(call_into_lisp_tramp): + /* 1. build the stack frame from the block that's pointed to by ECX + 2. free the block + 3. set ECX to 0 + 4. call the function via call_into_lisp + */ + pushl 0(%ecx) /* return address */ + + pushl %ebp + movl %esp, %ebp + + pushl 32(%ecx) /* eflags */ + pushl 28(%ecx) /* EAX */ + pushl 20(%ecx) /* ECX */ + pushl 16(%ecx) /* EDX */ + pushl 24(%ecx) /* EBX */ + pushl $0 /* popal is going to ignore esp */ + pushl %ebp /* is this right?? */ + pushl 12(%ecx) /* ESI */ + pushl 8(%ecx) /* EDI */ + pushl $0 /* args for call_into_lisp */ + pushl $0 + pushl 4(%ecx) /* function to call */ + + /* free our save block */ + pushl %ecx /* reserve sufficient space on stack for args */ + pushl %ecx + andl $0xfffffff0, %esp /* align stack */ + movl $0x40, 4(%esp) + movl %ecx, (%esp) + call GNAME(os_invalidate) + + /* call call_into_lisp */ + leal -48(%ebp), %esp + call GNAME(call_into_lisp) + + /* Clean up our mess */ + leal -36(%ebp), %esp + popal + popfl + leave + ret + + SIZE(call_into_lisp_tramp) +#endif + .align align_4byte,0x90 .globl GNAME(post_signal_tramp) TYPE(GNAME(post_signal_tramp)) @@ -821,42 +957,152 @@ GNAME(post_signal_tramp): addl $12,%esp /* clear call_into_lisp args from stack */ popal /* restore registers */ popfl +#ifdef LISP_FEATURE_DARWIN + /* skip two padding words */ + addl $8,%esp +#endif leave ret SIZE(GNAME(post_signal_tramp)) -#ifdef LISP_FEATURE_WIN32 - /* - * This is part of the funky magic for exception handling on win32. - * see sigtrap_emulator() in win32-os.c for details. - */ - .global GNAME(sigtrap_trampoline) -GNAME(sigtrap_trampoline): - pushl %eax - pushl %ebp - movl %esp, %ebp - call GNAME(sigtrap_wrapper) - pop %eax - pop %eax - int3 - .byte trap_ContextRestore - hlt # We should never return here. - /* - * This is part of the funky magic for exception handling on win32. - * see handle_exception() in win32-os.c for details. - */ - .global GNAME(exception_trampoline) -GNAME(exception_trampoline): - pushl %eax - pushl %ebp - movl %esp, %ebp - call GNAME(handle_win32_exception_wrapper) - pop %eax - pop %eax - int3 - .byte trap_ContextRestore - hlt # We should never return here. -#endif - - .end + /* fast_bzero implementations and code to detect which implementation + * to use. + */ + + .globl GNAME(fast_bzero_pointer) + .data + .align align_4byte +GNAME(fast_bzero_pointer): + /* Variable containing a pointer to the bzero function to use. + * Initially points to a basic function. Change this variable + * to fast_bzero_detect if OS supports SSE. */ + .long GNAME(fast_bzero_base) + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero) + TYPE(GNAME(fast_bzero)) +GNAME(fast_bzero): + /* Indirect function call */ + jmp *GNAME(fast_bzero_pointer) + SIZE(GNAME(fast_bzero)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_detect) + TYPE(GNAME(fast_bzero_detect)) +GNAME(fast_bzero_detect): + /* Decide whether to use SSE, MMX or REP version */ + push %eax /* CPUID uses EAX-EDX */ + push %ebx + push %ecx + push %edx + mov $1, %eax + cpuid + test $0x04000000, %edx /* SSE2 needed for MOVNTDQ */ + jnz Lsse2 + /* Originally there was another case here for using the + * MOVNTQ instruction for processors that supported MMX but + * not SSE2. This turned out to be a loss especially on + * Athlons (where this instruction is apparently microcoded + * somewhat slowly). So for simplicity revert to REP STOSL + * for all non-SSE2 processors. + */ +Lbase: + movl $(GNAME(fast_bzero_base)), GNAME(fast_bzero_pointer) + jmp Lrestore +Lsse2: + movl $(GNAME(fast_bzero_sse)), GNAME(fast_bzero_pointer) + jmp Lrestore + +Lrestore: + pop %edx + pop %ecx + pop %ebx + pop %eax + jmp *GNAME(fast_bzero_pointer) + + SIZE(GNAME(fast_bzero_detect)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_sse) + TYPE(GNAME(fast_bzero_sse)) + +GNAME(fast_bzero_sse): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + push %esi /* Save temporary registers */ + push %edi + mov 16(%esp), %esi /* Parameter: amount of bytes to fill */ + mov 12(%esp), %edi /* Parameter: start address */ + shr $6, %esi /* Amount of 64-byte blocks to copy */ + jz Lend_sse /* If none, stop */ + movups %xmm7, -16(%esp) /* Save XMM register */ + xorps %xmm7, %xmm7 /* Zero the XMM register */ + jmp Lloop_sse + .align align_16byte +Lloop_sse: + + /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the + * non-caching double-quadword moving variant, i.e. the memory areas + * we're touching are not fetched into the L1 cache, since we're just + * going to overwrite the memory soon anyway. + */ + movntdq %xmm7, 0(%edi) + movntdq %xmm7, 16(%edi) + movntdq %xmm7, 32(%edi) + movntdq %xmm7, 48(%edi) + + add $64, %edi /* Advance pointer */ + dec %esi /* Decrement 64-byte block count */ + jnz Lloop_sse + movups -16(%esp), %xmm7 /* Restore the XMM register */ + sfence /* Ensure that weakly ordered writes are flushed. */ +Lend_sse: + mov 12(%esp), %esi /* Parameter: start address */ + prefetcht0 0(%esi) /* Prefetch the start of the block into cache, + * since it's likely to be used immediately. */ + pop %edi /* Restore temp registers */ + pop %esi + ret + SIZE(GNAME(fast_bzero_sse)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_base) + TYPE(GNAME(fast_bzero_base)) + +GNAME(fast_bzero_base): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + push %eax /* Save temporary registers */ + push %ecx + push %edi + mov 20(%esp), %ecx /* Parameter: amount of bytes to fill */ + mov 16(%esp), %edi /* Parameter: start address */ + xor %eax, %eax /* Zero EAX */ + shr $2, %ecx /* Amount of 4-byte blocks to copy */ + jz Lend_base + cld /* Set direction of STOSL to increment */ + + rep + stosl /* Store EAX to *EDI, ECX times, incrementing + * EDI by 4 after each store */ + +Lend_base: + pop %edi /* Restore temp registers */ + pop %ecx + pop %eax + ret + SIZE(GNAME(fast_bzero_base)) + + + END() + \ No newline at end of file