X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-assem.S;h=0deae4f381350445a3c2d966d6c2bdd0b14ce69e;hb=5de74c72e5a9522c7fdd3dbb31a39641e9de8877;hp=02504792fb53b9e76dfefa0619627327d835e944;hpb=a530bbe337109d898d5b4a001fc8f1afa3b5dc39;p=sbcl.git diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 0250479..0deae4f 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -1,7 +1,5 @@ /* * very-low-level utilities for runtime support - * - * $Header$ */ /* @@ -15,21 +13,52 @@ * files for more information. */ -#include "x86-validate.h" - #define LANGUAGE_ASSEMBLY #include "sbcl.h" - -/* Minimize conditionalization for different OS naming schemes. */ -#if defined __linux__ || defined __FreeBSD__ /* (but *not* OpenBSD) */ +#include "validate.h" +#include "genesis/closure.h" +#include "genesis/fdefn.h" +#include "genesis/static-symbols.h" +#include "genesis/symbol.h" +#include "genesis/thread.h" + +/* Minimize conditionalization for different OS naming schemes. + * + * (As of sbcl-0.8.10, this seems no longer to be much of an issue, + * since everyone has converged on ELF. If this generality really + * turns out not to matter, perhaps it's just clutter we could get + * rid of? -- WHN 2004-04-18) + * + * (Except Win32, which is unlikely ever to be ELF, sorry. -- AB 2005-12-08) + */ +#if defined __linux__ || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __sun #define GNAME(var) var #else #define GNAME(var) _##var #endif -/* Get the right type of alignment. Linux and FreeBSD (but not OpenBSD) - * want alignment in bytes. */ -#if defined(__linux__) || defined(__FreeBSD__) +#if defined __linux__ || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __sun +#define GNAMEDOLLAR(var) $##var +#else +#define GNAMEDOLLAR(var) $_##var +#endif + +#if defined __linux__ || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __sun +#define DOLLARLITERAL(var) $##var +#else +#define DOLLARLITERAL(var) $##(var) +#endif + +/* Get the right type of alignment. Linux, FreeBSD and NetBSD (but not OpenBSD) + * want alignment in bytes. + * + * (As in the GNAME() definitions above, as of sbcl-0.8.10, this seems + * no longer to be much of an issue, since everyone has converged on + * the same value. If this generality really turns out not to + * matter any more, perhaps it's just clutter we could get + * rid of? -- WHN 2004-04-18) + */ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__sun) || defined(LISP_FEATURE_WIN32) #define align_4byte 4 #define align_8byte 8 #define align_16byte 16 @@ -39,9 +68,39 @@ #define align_16byte 4 #endif +/* + * The assembler used for win32 doesn't like .type or .size directives, + * so we want to conditionally kill them out. So let's wrap them in macros + * that are defined to be no-ops on win32. Hopefully this still works on + * other platforms. + */ +#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN) +#define TYPE(name) .type name,@function +#define SIZE(name) .size name,.-name +#else +#define TYPE(name) +#define SIZE(name) +#endif + +/* + * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal + * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have + * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or + * 0F 0B in low-endian notation, that causes SIGILL to fire. We check + * for this instruction in the SIGILL handler and if we see it, we + * advance the EIP by two bytes to skip over ud2 instruction and + * call sigtrap_handler. */ +#if defined(LISP_FEATURE_DARWIN) +#define END() +#define TRAP ud2 +#else +#define END() .end +#define TRAP int3 +#endif + .text - .global GNAME(foreign_function_call_active) - + .globl GNAME(foreign_function_call_active) + .globl GNAME(all_threads) /* * A call to call_into_c preserves esi, edi, and ebp. @@ -56,8 +115,8 @@ */ .text .align align_16byte,0x90 - .global GNAME(call_into_c) - .type GNAME(call_into_c),@function + .globl GNAME(call_into_c) + TYPE(GNAME(call_into_c)) GNAME(call_into_c): movl $1,GNAME(foreign_function_call_active) @@ -74,6 +133,13 @@ GNAME(call_into_c): fstp %st(0) fstp %st(0) +#ifdef LISP_FEATURE_WIN32 + cld +#endif + +#ifdef LISP_FEATURE_DARWIN + andl $0xfffffff0,%esp # align stack to 16-byte boundary before calling C +#endif call *%eax # normal callout using Lisp stack movl %eax,%ecx # remember integer return value @@ -121,31 +187,55 @@ Lfp_rtn_value: /* Return. */ jmp *%ebx - .size GNAME(call_into_c), . - GNAME(call_into_c) + SIZE(GNAME(call_into_c)) .text - .global GNAME(call_into_lisp) - .type GNAME(call_into_lisp),@function + .globl GNAME(call_into_lisp_first_time) + TYPE(GNAME(call_into_lisp_first_time)) + +/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when + * the stack changes. We don't worry too much about saving registers + * here, because we never expect to return from the initial call to lisp + * anyway */ + + .align align_16byte,0x90 +GNAME(call_into_lisp_first_time): + pushl %ebp # Save old frame pointer. + movl %esp,%ebp # Establish new frame. +#ifndef LISP_FEATURE_WIN32 + movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET + movl GNAME(all_threads),%eax + movl THREAD_CONTROL_STACK_START_OFFSET(%eax) ,%esp + /* don't think too hard about what happens if we get interrupted + * here */ + addl DOLLARLITERAL(THREAD_CONTROL_STACK_SIZE),%esp +#else +/* Win32 -really- doesn't like you switching stacks out from under it. */ + movl GNAME(all_threads),%eax +#endif + jmp Lstack + + .text + .globl GNAME(call_into_lisp) + TYPE(GNAME(call_into_lisp)) /* The C conventions require that ebx, esi, edi, and ebp be preserved * across function calls. */ -/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when - * the stack changes. */ .align align_16byte,0x90 GNAME(call_into_lisp): pushl %ebp # Save old frame pointer. movl %esp,%ebp # Establish new frame. - +Lstack: /* Save the NPX state */ fwait # Catch any pending NPX exceptions. subl $108,%esp # Make room for the NPX state. - fnsave (%esp) # resets NPX + fnsave (%esp) # save and reset NPX movl (%esp),%eax # Load NPX control word. - andl $0xfffff3ff,%eax # Set rounding mode to nearest. - orl $0x00000300,%eax # Set precision to 64 bits. + andl $0xfffff2ff,%eax # Set rounding mode to nearest. + orl $0x00000200,%eax # Set precision to 64 bits. (53-bit mantissa) pushl %eax fldcw (%esp) # Recover modes. popl %eax @@ -176,15 +266,6 @@ GNAME(call_into_lisp): movl %eax, GNAME(foreign_function_call_active) movl %esp,%ebx # remember current stack - cmpl $CONTROL_STACK_START,%esp - jbe ChangeToLispStack - cmpl $CONTROL_STACK_END,%esp - jbe OnLispStack -ChangeToLispStack: - /* Setup the *alien-stack* pointer */ - movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET - movl $CONTROL_STACK_END,%esp # new stack -OnLispStack: pushl %ebx # Save entry stack on (maybe) new stack. /* Establish Lisp args. */ @@ -210,12 +291,14 @@ Ldone: sub $8,%esp # Ensure 3 slots are allocated, one above. mov %ebx,%ebp # Switch to new frame. - /* Indirect the closure. */ - call *CLOSURE_FUNCTION_OFFSET(%eax) + call *CLOSURE_FUN_OFFSET(%eax) - /* Multi-value return; blow off any extra values. */ + /* If the function returned multiple values, it will return to + this point. Lose them */ + jnc LsingleValue mov %ebx, %esp - /* single value return */ +LsingleValue: + /* A singled value function returns here */ /* Restore the stack, in case there was a stack change. */ popl %esp # c-sp @@ -232,183 +315,102 @@ Ldone: popl %ebp # c-sp movl %edx,%eax # c-val ret - .size GNAME(call_into_lisp), . - GNAME(call_into_lisp) + SIZE(GNAME(call_into_lisp)) /* support for saving and restoring the NPX state from C */ .text - .global GNAME(fpu_save) - .type GNAME(fpu_save),@function + .globl GNAME(fpu_save) + TYPE(GNAME(fpu_save)) .align 2,0x90 GNAME(fpu_save): movl 4(%esp),%eax fnsave (%eax) # Save the NPX state. (resets NPX) ret - .size GNAME(fpu_save),.-GNAME(fpu_save) + SIZE(GNAME(fpu_save)) - .global GNAME(fpu_restore) - .type GNAME(fpu_restore),@function + .globl GNAME(fpu_restore) + TYPE(GNAME(fpu_restore)) .align 2,0x90 GNAME(fpu_restore): movl 4(%esp),%eax frstor (%eax) # Restore the NPX state. ret - .size GNAME(fpu_restore),.-GNAME(fpu_restore) + SIZE(GNAME(fpu_restore)) /* * the undefined-function trampoline */ .text .align align_4byte,0x90 - .global GNAME(undefined_tramp) - .type GNAME(undefined_tramp),@function + .globl GNAME(undefined_tramp) + TYPE(GNAME(undefined_tramp)) + .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(undefined_tramp): - int3 + TRAP .byte trap_Error .byte 2 -#ifdef type_LongFloat - .byte 24 -#else - .byte 23 -#endif + .byte UNDEFINED_FUN_ERROR .byte sc_DescriptorReg # eax in the Descriptor-reg SC ret - .size GNAME(undefined_tramp), .-GNAME(undefined_tramp) + SIZE(GNAME(undefined_tramp)) /* * the closure trampoline */ .text .align align_4byte,0x90 - .global GNAME(closure_tramp) - .type GNAME(closure_tramp),@function + .globl GNAME(closure_tramp) + TYPE(GNAME(closure_tramp)) + .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG GNAME(closure_tramp): - movl FDEFN_FUNCTION_OFFSET(%eax),%eax + movl FDEFN_FUN_OFFSET(%eax),%eax /* FIXME: The '*' after "jmp" in the next line is from PVE's * patch posted to the CMU CL mailing list Oct 6, 1999. It looks * reasonable, and it certainly seems as though if CMU CL needs it, * SBCL needs it too, but I haven't actually verified that it's * right. It would be good to find a way to force the flow of * control through here to test it. */ - jmp *CLOSURE_FUNCTION_OFFSET(%eax) - .size GNAME(closure_tramp), .-GNAME(closure_tramp) + jmp *CLOSURE_FUN_OFFSET(%eax) + SIZE(GNAME(closure_tramp)) /* - * function-end breakpoint magic + * fun-end breakpoint magic */ .text - .global GNAME(function_end_breakpoint_guts) + .globl GNAME(fun_end_breakpoint_guts) .align align_4byte -GNAME(function_end_breakpoint_guts): +GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ - jmp multiple_value_return + jc multiple_value_return /* Single value return: The eventual return will now use the multiple values return convention but with a return values count of one. */ movl %esp,%ebx # Setup ebx - the ofp. subl $4,%esp # Allocate one stack slot for the return value movl $4,%ecx # Setup ecx for one return value. - movl $NIL,%edi # default second value - movl $NIL,%esi # default third value + movl DOLLARLITERAL(NIL),%edi # default second value + movl DOLLARLITERAL(NIL),%esi # default third value multiple_value_return: - .global GNAME(function_end_breakpoint_trap) -GNAME(function_end_breakpoint_trap): - int3 - .byte trap_FunctionEndBreakpoint + .globl GNAME(fun_end_breakpoint_trap) +GNAME(fun_end_breakpoint_trap): + TRAP + .byte trap_FunEndBreakpoint hlt # We should never return here. - .global GNAME(function_end_breakpoint_end) -GNAME(function_end_breakpoint_end): + .globl GNAME(fun_end_breakpoint_end) +GNAME(fun_end_breakpoint_end): - .global GNAME(do_pending_interrupt) - .type GNAME(do_pending_interrupt),@function + .globl GNAME(do_pending_interrupt) + TYPE(GNAME(do_pending_interrupt)) .align align_4byte,0x90 GNAME(do_pending_interrupt): - int3 + TRAP .byte trap_PendingInterrupt ret - .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt) - -#ifdef WANT_CGC -/* This is a copy function which is optimized for the Pentium and - * works OK on 486 as well. This assumes (does not check) that the - * input byte count is a multiple of 8 bytes (one Lisp object). - * This code takes advantage of pairing in the Pentium as well - * as the 128-bit cache line. - */ - .global GNAME(fastcopy16) - .type GNAME(fastcopy16),@function - .align align_4byte,0x90 -GNAME(fastcopy16): - pushl %ebp - movl %esp,%ebp - movl 8(%ebp), %edx # dst - movl 12(%ebp),%eax # src - movl 16(%ebp),%ecx # bytes - pushl %ebx - pushl %esi - pushl %edi - movl %edx,%edi - movl %eax,%esi - sarl $3,%ecx # number 8-byte units - testl $1,%ecx # odd? - jz Lquad - movl (%esi),%eax - movl 4(%esi),%ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - leal 8(%esi),%esi - leal 8(%edi),%edi -Lquad: sarl $1,%ecx # count 16-byte units - jz Lend - movl %ecx,%ebp # use ebp for loop counter - .align align_16byte,0x90 -Ltop: - movl (%edi),%eax # prefetch! MAJOR Pentium win.. - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edx - movl %eax, (%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx,12(%edi) - leal 16(%esi),%esi - leal 16(%edi),%edi - decl %ebp - jnz Ltop # non-prefixed jump saves cycles -Lend: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret - .size GNAME(fastcopy16),.-GNAME(fastcopy16) -#endif - -#ifdef GENCGC -/* This is a fast bzero using the FPU. The first argument is the start - * address which needs to be aligned on an 8 byte boundary, the second - * argument is the number of bytes, which must be a nonzero multiple - * of 8 bytes. */ - .text - .globl GNAME(i586_bzero) - .type GNAME(i586_bzero),@function - .align align_4byte,0x90 -GNAME(i586_bzero): - movl 4(%esp),%edx # Load the start address. - movl 8(%esp),%eax # Load the number of bytes. - fldz -l1: fstl 0(%edx) - addl $8,%edx - subl $8,%eax - jnz l1 - fstp %st(0) - ret - .size GNAME(i586_bzero),.-GNAME(i586_bzero) -#endif + SIZE(GNAME(do_pending_interrupt)) /* @@ -423,7 +425,7 @@ l1: fstl 0(%edx) */ .globl GNAME(alloc_to_eax) - .type GNAME(alloc_to_eax),@function + TYPE(GNAME(alloc_to_eax)) .align align_4byte,0x90 GNAME(alloc_to_eax): pushl %ecx # Save ecx and edx as C could destroy them. @@ -434,10 +436,10 @@ GNAME(alloc_to_eax): popl %edx # Restore ecx and edx. popl %ecx ret - .size GNAME(alloc_to_eax),.-GNAME(alloc_to_eax) + SIZE(GNAME(alloc_to_eax)) .globl GNAME(alloc_8_to_eax) - .type GNAME(alloc_8_to_eax),@function + TYPE(GNAME(alloc_8_to_eax)) .align align_4byte,0x90 GNAME(alloc_8_to_eax): pushl %ecx # Save ecx and edx as C could destroy them. @@ -448,14 +450,14 @@ GNAME(alloc_8_to_eax): popl %edx # Restore ecx and edx. popl %ecx ret - .size GNAME(alloc_8_to_eax),.-GNAME(alloc_8_to_eax) + SIZE(GNAME(alloc_8_to_eax)) .globl GNAME(alloc_8_to_eax) - .type GNAME(alloc_8_to_eax),@function + TYPE(GNAME(alloc_8_to_eax)) .align align_4byte,0x90 .globl GNAME(alloc_16_to_eax) - .type GNAME(alloc_16_to_eax),@function + TYPE(GNAME(alloc_16_to_eax)) .align align_4byte,0x90 GNAME(alloc_16_to_eax): pushl %ecx # Save ecx and edx as C could destroy them. @@ -466,10 +468,10 @@ GNAME(alloc_16_to_eax): popl %edx # Restore ecx and edx. popl %ecx ret - .size GNAME(alloc_16_to_eax),.-GNAME(alloc_16_to_eax) + SIZE(GNAME(alloc_16_to_eax)) .globl GNAME(alloc_to_ecx) - .type GNAME(alloc_to_ecx),@function + TYPE(GNAME(alloc_to_ecx)) .align align_4byte,0x90 GNAME(alloc_to_ecx): pushl %eax # Save eax and edx as C could destroy them. @@ -481,10 +483,10 @@ GNAME(alloc_to_ecx): popl %edx # Restore eax and edx. popl %eax ret - .size GNAME(alloc_to_ecx),.-GNAME(alloc_to_ecx) + SIZE(GNAME(alloc_to_ecx)) .globl GNAME(alloc_8_to_ecx) - .type GNAME(alloc_8_to_ecx),@function + TYPE(GNAME(alloc_8_to_ecx)) .align align_4byte,0x90 GNAME(alloc_8_to_ecx): pushl %eax # Save eax and edx as C could destroy them. @@ -496,10 +498,10 @@ GNAME(alloc_8_to_ecx): popl %edx # Restore eax and edx. popl %eax ret - .size GNAME(alloc_8_to_ecx),.-GNAME(alloc_8_to_ecx) + SIZE(GNAME(alloc_8_to_ecx)) .globl GNAME(alloc_16_to_ecx) - .type GNAME(alloc_16_to_ecx),@function + TYPE(GNAME(alloc_16_to_ecx)) .align align_4byte,0x90 GNAME(alloc_16_to_ecx): pushl %eax # Save eax and edx as C could destroy them. @@ -511,11 +513,11 @@ GNAME(alloc_16_to_ecx): popl %edx # Restore eax and edx. popl %eax ret - .size GNAME(alloc_16_to_ecx),.-GNAME(alloc_16_to_ecx) + SIZE(GNAME(alloc_16_to_ecx)) .globl GNAME(alloc_to_edx) - .type GNAME(alloc_to_edx),@function + TYPE(GNAME(alloc_to_edx)) .align align_4byte,0x90 GNAME(alloc_to_edx): pushl %eax # Save eax and ecx as C could destroy them. @@ -527,10 +529,10 @@ GNAME(alloc_to_edx): popl %ecx # Restore eax and ecx. popl %eax ret - .size GNAME(alloc_to_edx),.-GNAME(alloc_to_edx) + SIZE(GNAME(alloc_to_edx)) .globl GNAME(alloc_8_to_edx) - .type GNAME(alloc_8_to_edx),@function + TYPE(GNAME(alloc_8_to_edx)) .align align_4byte,0x90 GNAME(alloc_8_to_edx): pushl %eax # Save eax and ecx as C could destroy them. @@ -542,10 +544,10 @@ GNAME(alloc_8_to_edx): popl %ecx # Restore eax and ecx. popl %eax ret - .size GNAME(alloc_8_to_edx),.-GNAME(alloc_8_to_edx) + SIZE(GNAME(alloc_8_to_edx)) .globl GNAME(alloc_16_to_edx) - .type GNAME(alloc_16_to_edx),@function + TYPE(GNAME(alloc_16_to_edx)) .align align_4byte,0x90 GNAME(alloc_16_to_edx): pushl %eax # Save eax and ecx as C could destroy them. @@ -557,12 +559,12 @@ GNAME(alloc_16_to_edx): popl %ecx # Restore eax and ecx. popl %eax ret - .size GNAME(alloc_16_to_edx),.-GNAME(alloc_16_to_edx) + SIZE(GNAME(alloc_16_to_edx)) .globl GNAME(alloc_to_ebx) - .type GNAME(alloc_to_ebx),@function + TYPE(GNAME(alloc_to_ebx)) .align align_4byte,0x90 GNAME(alloc_to_ebx): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -576,10 +578,10 @@ GNAME(alloc_to_ebx): popl %ecx popl %eax ret - .size GNAME(alloc_to_ebx),.-GNAME(alloc_to_ebx) + SIZE(GNAME(alloc_to_ebx)) .globl GNAME(alloc_8_to_ebx) - .type GNAME(alloc_8_to_ebx),@function + TYPE(GNAME(alloc_8_to_ebx)) .align align_4byte,0x90 GNAME(alloc_8_to_ebx): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -593,10 +595,10 @@ GNAME(alloc_8_to_ebx): popl %ecx popl %eax ret - .size GNAME(alloc_8_to_ebx),.-GNAME(alloc_8_to_ebx) + SIZE(GNAME(alloc_8_to_ebx)) .globl GNAME(alloc_16_to_ebx) - .type GNAME(alloc_16_to_ebx),@function + TYPE(GNAME(alloc_16_to_ebx)) .align align_4byte,0x90 GNAME(alloc_16_to_ebx): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -610,12 +612,12 @@ GNAME(alloc_16_to_ebx): popl %ecx popl %eax ret - .size GNAME(alloc_16_to_ebx),.-GNAME(alloc_16_to_ebx) + SIZE(GNAME(alloc_16_to_ebx)) .globl GNAME(alloc_to_esi) - .type GNAME(alloc_to_esi),@function + TYPE(GNAME(alloc_to_esi)) .align align_4byte,0x90 GNAME(alloc_to_esi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -629,10 +631,10 @@ GNAME(alloc_to_esi): popl %ecx popl %eax ret - .size GNAME(alloc_to_esi),.-GNAME(alloc_to_esi) + SIZE(GNAME(alloc_to_esi)) .globl GNAME(alloc_8_to_esi) - .type GNAME(alloc_8_to_esi),@function + TYPE(GNAME(alloc_8_to_esi)) .align align_4byte,0x90 GNAME(alloc_8_to_esi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -646,10 +648,10 @@ GNAME(alloc_8_to_esi): popl %ecx popl %eax ret - .size GNAME(alloc_8_to_esi),.-GNAME(alloc_8_to_esi) + SIZE(GNAME(alloc_8_to_esi)) .globl GNAME(alloc_16_to_esi) - .type GNAME(alloc_16_to_esi),@function + TYPE(GNAME(alloc_16_to_esi)) .align align_4byte,0x90 GNAME(alloc_16_to_esi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -663,11 +665,11 @@ GNAME(alloc_16_to_esi): popl %ecx popl %eax ret - .size GNAME(alloc_16_to_esi),.-GNAME(alloc_16_to_esi) + SIZE(GNAME(alloc_16_to_esi)) .globl GNAME(alloc_to_edi) - .type GNAME(alloc_to_edi),@function + TYPE(GNAME(alloc_to_edi)) .align align_4byte,0x90 GNAME(alloc_to_edi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -681,10 +683,10 @@ GNAME(alloc_to_edi): popl %ecx popl %eax ret - .size GNAME(alloc_to_edi),.-GNAME(alloc_to_edi) + SIZE(GNAME(alloc_to_edi)) .globl GNAME(alloc_8_to_edi) - .type GNAME(alloc_8_to_edi),@function + TYPE(GNAME(alloc_8_to_edi)) .align align_4byte,0x90 GNAME(alloc_8_to_edi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -698,10 +700,10 @@ GNAME(alloc_8_to_edi): popl %ecx popl %eax ret - .size GNAME(alloc_8_to_edi),.-GNAME(alloc_8_to_edi) + SIZE(GNAME(alloc_8_to_edi)) .globl GNAME(alloc_16_to_edi) - .type GNAME(alloc_16_to_edi),@function + TYPE(GNAME(alloc_16_to_edi)) .align align_4byte,0x90 GNAME(alloc_16_to_edi): pushl %eax # Save eax, ecx, and edx as C could destroy them. @@ -715,142 +717,318 @@ GNAME(alloc_16_to_edi): popl %ecx popl %eax ret - .size GNAME(alloc_16_to_edi),.-GNAME(alloc_16_to_edi) - - - -#ifdef GENCGC + SIZE(GNAME(alloc_16_to_edi)) -/* These routines are called from Lisp when an inline allocation - * overflows. Every register except the result needs to be preserved. - * We depend on C to preserve ebx, esi, edi, and ebp. - * But where necessary must save eax, ecx, edx. */ + +/* Called from lisp when an inline allocation overflows. + Every register except the result needs to be preserved. + We depend on C to preserve ebx, esi, edi, and ebp. + But where necessary must save eax, ecx, edx. */ +#ifdef LISP_FEATURE_SB_THREAD +#define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET +#else +#define START_REGION GNAME(boxed_region) +#endif + /* This routine handles an overflow with eax=crfp+size. So the - * size=eax-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_eax) - .type GNAME(alloc_overflow_eax),@function + size=eax-crfp. */ + .align align_4byte + .globl GNAME(alloc_overflow_eax) + TYPE(GNAME(alloc_overflow_eax)) GNAME(alloc_overflow_eax): - pushl %ecx # Save ecx - pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%eax - pushl %eax # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - popl %edx # Restore edx. - popl %ecx # Restore ecx. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_eax),.-GNAME(alloc_overflow_eax) - -/* This routine handles an overflow with ecx=crfp+size. So the - * size=ecx-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_ecx) - .type GNAME(alloc_overflow_ecx),@function + pushl %ecx # Save ecx + pushl %edx # Save edx + /* Calculate the size for the allocation. */ + subl START_REGION,%eax + pushl %eax # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + popl %edx # Restore edx. + popl %ecx # Restore ecx. + ret + SIZE(GNAME(alloc_overflow_eax)) + + .align align_4byte + .globl GNAME(alloc_overflow_ecx) + TYPE(GNAME(alloc_overflow_ecx)) GNAME(alloc_overflow_ecx): - pushl %eax # Save eax - pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%ecx - pushl %ecx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%ecx # setup the destination. - popl %edx # Restore edx. - popl %eax # Restore eax. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_ecx),.-GNAME(alloc_overflow_ecx) - -/* This routine handles an overflow with edx=crfp+size. So the - * size=edx-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_edx) - .type GNAME(alloc_overflow_edx),@function + pushl %eax # Save eax + pushl %edx # Save edx + /* Calculate the size for the allocation. */ + subl START_REGION,%ecx + pushl %ecx # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + movl %eax,%ecx # setup the destination. + popl %edx # Restore edx. + popl %eax # Restore eax. + ret + SIZE(GNAME(alloc_overflow_ecx)) + + .align align_4byte + .globl GNAME(alloc_overflow_edx) + TYPE(GNAME(alloc_overflow_edx)) GNAME(alloc_overflow_edx): - pushl %eax # Save eax - pushl %ecx # Save ecx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%edx - pushl %edx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%edx # setup the destination. - popl %ecx # Restore ecx. - popl %eax # Restore eax. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_edx),.-GNAME(alloc_overflow_edx) + pushl %eax # Save eax + pushl %ecx # Save ecx + /* Calculate the size for the allocation. */ + subl START_REGION,%edx + pushl %edx # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + movl %eax,%edx # setup the destination. + popl %ecx # Restore ecx. + popl %eax # Restore eax. + ret + SIZE(GNAME(alloc_overflow_edx)) /* This routine handles an overflow with ebx=crfp+size. So the - * size=ebx-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_ebx) - .type GNAME(alloc_overflow_ebx),@function + size=ebx-crfp. */ + .align align_4byte + .globl GNAME(alloc_overflow_ebx) + TYPE(GNAME(alloc_overflow_ebx)) GNAME(alloc_overflow_ebx): - pushl %eax # Save eax - pushl %ecx # Save ecx - pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%ebx - pushl %ebx # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%ebx # setup the destination. - popl %edx # Restore edx. - popl %ecx # Restore ecx. - popl %eax # Restore eax. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_ebx),.-GNAME(alloc_overflow_ebx) + pushl %eax # Save eax + pushl %ecx # Save ecx + pushl %edx # Save edx + /* Calculate the size for the allocation. */ + subl START_REGION,%ebx + pushl %ebx # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + movl %eax,%ebx # setup the destination. + popl %edx # Restore edx. + popl %ecx # Restore ecx. + popl %eax # Restore eax. + ret + SIZE(GNAME(alloc_overflow_ebx)) /* This routine handles an overflow with esi=crfp+size. So the - * size=esi-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_esi) - .type GNAME(alloc_overflow_esi),@function + size=esi-crfp. */ + .align align_4byte + .globl GNAME(alloc_overflow_esi) + TYPE(GNAME(alloc_overflow_esi)) GNAME(alloc_overflow_esi): - pushl %eax # Save eax - pushl %ecx # Save ecx - pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%esi - pushl %esi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%esi # setup the destination. - popl %edx # Restore edx. - popl %ecx # Restore ecx. - popl %eax # Restore eax. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_esi),.-GNAME(alloc_overflow_esi) - -/* This routine handles an overflow with edi=crfp+size. So the - * size=edi-crfp. */ - .align align_4byte - .globl GNAME(alloc_overflow_edi) - .type GNAME(alloc_overflow_edi),@function + pushl %eax # Save eax + pushl %ecx # Save ecx + pushl %edx # Save edx + /* Calculate the size for the allocation. */ + subl START_REGION,%esi + pushl %esi # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + movl %eax,%esi # setup the destination. + popl %edx # Restore edx. + popl %ecx # Restore ecx. + popl %eax # Restore eax. + ret + SIZE(GNAME(alloc_overflow_esi)) + + .align align_4byte + .globl GNAME(alloc_overflow_edi) + TYPE(GNAME(alloc_overflow_edi)) GNAME(alloc_overflow_edi): - pushl %eax # Save eax - pushl %ecx # Save ecx - pushl %edx # Save edx - /* Calculate the size for the allocation. */ - subl GNAME(current_region_free_pointer),%edi - pushl %edi # Push the size - call GNAME(alloc) - addl $4,%esp # pop the size arg. - movl %eax,%edi # setup the destination. - popl %edx # Restore edx. - popl %ecx # Restore ecx. - popl %eax # Restore eax. - addl $6,(%esp) # Adjust the return address to skip the next inst. - ret - .size GNAME(alloc_overflow_edi),.-GNAME(alloc_overflow_edi) + pushl %eax # Save eax + pushl %ecx # Save ecx + pushl %edx # Save edx + /* Calculate the size for the allocation. */ + subl START_REGION,%edi + pushl %edi # Push the size + call GNAME(alloc) + addl $4,%esp # pop the size arg. + movl %eax,%edi # setup the destination. + popl %edx # Restore edx. + popl %ecx # Restore ecx. + popl %eax # Restore eax. + ret + SIZE(GNAME(alloc_overflow_edi)) + .align align_4byte,0x90 + .globl GNAME(post_signal_tramp) + TYPE(GNAME(post_signal_tramp)) +GNAME(post_signal_tramp): + /* this is notionally the second half of a function whose first half + * doesn't exist. This is where call_into_lisp returns when called + * using return_to_lisp_function */ + addl $12,%esp /* clear call_into_lisp args from stack */ + popal /* restore registers */ + popfl + leave + ret + SIZE(GNAME(post_signal_tramp)) + +#ifdef LISP_FEATURE_WIN32 + /* + * This is part of the funky magic for exception handling on win32. + * see sigtrap_emulator() in win32-os.c for details. + */ + .globl GNAME(sigtrap_trampoline) +GNAME(sigtrap_trampoline): + pushl %eax + pushl %ebp + movl %esp, %ebp + call GNAME(sigtrap_wrapper) + pop %eax + pop %eax + TRAP + .byte trap_ContextRestore + hlt # We should never return here. + + /* + * This is part of the funky magic for exception handling on win32. + * see handle_exception() in win32-os.c for details. + */ + .globl GNAME(exception_trampoline) +GNAME(exception_trampoline): + pushl %eax + pushl %ebp + movl %esp, %ebp + call GNAME(handle_win32_exception_wrapper) + pop %eax + pop %eax + TRAP + .byte trap_ContextRestore + hlt # We should never return here. #endif - .end + /* fast_bzero implementations and code to detect which implementation + * to use. + */ + + .globl GNAME(fast_bzero_pointer) + .data + .align align_4byte +GNAME(fast_bzero_pointer): + /* Variable containing a pointer to the bzero function to use. + * Initially points to a basic function. Change this variable + * to fast_bzero_detect if OS supports SSE. */ + .long GNAME(fast_bzero_base) + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero) + TYPE(GNAME(fast_bzero)) +GNAME(fast_bzero): + /* Indirect function call */ + jmp *GNAME(fast_bzero_pointer) + SIZE(GNAME(fast_bzero)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_detect) + TYPE(GNAME(fast_bzero_detect)) +GNAME(fast_bzero_detect): + /* Decide whether to use SSE, MMX or REP version */ + push %eax /* CPUID uses EAX-EDX */ + push %ebx + push %ecx + push %edx + mov $1, %eax + cpuid + test $0x04000000, %edx /* SSE2 needed for MOVNTDQ */ + jnz Lsse2 + /* Originally there was another case here for using the + * MOVNTQ instruction for processors that supported MMX but + * not SSE2. This turned out to be a loss especially on + * Athlons (where this instruction is apparently microcoded + * somewhat slowly). So for simplicity revert to REP STOSL + * for all non-SSE2 processors. + */ +Lbase: + movl GNAMEDOLLAR(fast_bzero_base), GNAME(fast_bzero_pointer) + jmp Lrestore +Lsse2: + movl GNAMEDOLLAR(fast_bzero_sse), GNAME(fast_bzero_pointer) + jmp Lrestore + +Lrestore: + pop %edx + pop %ecx + pop %ebx + pop %eax + jmp *GNAME(fast_bzero_pointer) + + SIZE(GNAME(fast_bzero_detect)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_sse) + TYPE(GNAME(fast_bzero_sse)) + +GNAME(fast_bzero_sse): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + push %esi /* Save temporary registers */ + push %edi + mov 16(%esp), %esi /* Parameter: amount of bytes to fill */ + mov 12(%esp), %edi /* Parameter: start address */ + shr $6, %esi /* Amount of 64-byte blocks to copy */ + jz Lend_sse /* If none, stop */ + movups %xmm7, -16(%esp) /* Save XMM register */ + xorps %xmm7, %xmm7 /* Zero the XMM register */ + jmp Lloop_sse + .align align_16byte +Lloop_sse: + + /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the + * non-caching double-quadword moving variant, i.e. the memory areas + * we're touching are not fetched into the L1 cache, since we're just + * going to overwrite the memory soon anyway. + */ + movntdq %xmm7, 0(%edi) + movntdq %xmm7, 16(%edi) + movntdq %xmm7, 32(%edi) + movntdq %xmm7, 48(%edi) + + add $64, %edi /* Advance pointer */ + dec %esi /* Decrement 64-byte block count */ + jnz Lloop_sse + movups -16(%esp), %xmm7 /* Restore the XMM register */ + sfence /* Ensure that weakly ordered writes are flushed. */ +Lend_sse: + mov 12(%esp), %esi /* Parameter: start address */ + prefetcht0 0(%esi) /* Prefetch the start of the block into cache, + * since it's likely to be used immediately. */ + pop %edi /* Restore temp registers */ + pop %esi + ret + SIZE(GNAME(fast_bzero_sse)) + + + .text + .align align_8byte,0x90 + .globl GNAME(fast_bzero_base) + TYPE(GNAME(fast_bzero_base)) + +GNAME(fast_bzero_base): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + push %eax /* Save temporary registers */ + push %ecx + push %edi + mov 20(%esp), %ecx /* Parameter: amount of bytes to fill */ + mov 16(%esp), %edi /* Parameter: start address */ + xor %eax, %eax /* Zero EAX */ + shr $2, %ecx /* Amount of 4-byte blocks to copy */ + jz Lend_base + cld /* Set direction of STOSL to increment */ + + rep + stosl /* Store EAX to *EDI, ECX times, incrementing + * EDI by 4 after each store */ + +Lend_base: + pop %edi /* Restore temp registers */ + pop %ecx + pop %eax + ret + SIZE(GNAME(fast_bzero_base)) + + + END() + \ No newline at end of file