src/runtime/x86-assem.S

   1 /*
   2  * very-low-level utilities for runtime support
   3  */
   4
   5 /*
   6  * This software is part of the SBCL system. See the README file for
   7  * more information.
   8  *
   9  * This software is derived from the CMU CL system, which was
  10  * written at Carnegie Mellon University and released into the
  11  * public domain. The software is in the public domain and is
  12  * provided with absolutely no warranty. See the COPYING and CREDITS
  13  * files for more information.
  14  */
  15 \f
  16 #define LANGUAGE_ASSEMBLY
  17 #include "sbcl.h"
  18 #include "validate.h"
  19 #include "genesis/closure.h"
  20 #include "genesis/funcallable-instance.h"
  21 #include "genesis/fdefn.h"
  22 #include "genesis/static-symbols.h"
  23 #include "genesis/symbol.h"
  24 #include "genesis/thread.h"
  25
  26 /* Minimize conditionalization for different OS naming schemes.
  27  *
  28  * (As of sbcl-0.8.10, this seems no longer to be much of an issue,
  29  * since everyone has converged on ELF. If this generality really
  30  * turns out not to matter, perhaps it's just clutter we could get
  31  * rid of? -- WHN 2004-04-18)
  32  *
  33  * (Except Win32, which is unlikely ever to be ELF, sorry. -- AB 2005-12-08)
  34  */
  35 #if defined __linux__  || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __sun
  36 #define GNAME(var) var
  37 #else
  38 #define GNAME(var) _##var
  39 #endif
  40
  41 /* Get the right type of alignment. Linux, FreeBSD and NetBSD (but not OpenBSD)
  42  * want alignment in bytes.
  43  *
  44  * (As in the GNAME() definitions above, as of sbcl-0.8.10, this seems
  45  * no longer to be much of an issue, since everyone has converged on
  46  * the same value. If this generality really turns out not to
  47  * matter any more, perhaps it's just clutter we could get
  48  * rid of? -- WHN 2004-04-18)
  49  */
  50 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__sun) || defined(LISP_FEATURE_WIN32)
  51 #define align_4byte     4
  52 #define align_8byte     8
  53 #define align_16byte    16
  54 #define align_page      4096
  55 #else
  56 #define align_4byte     2
  57 #define align_8byte     3
  58 #define align_16byte    4
  59 #define align_page      12
  60 #endif
  61
  62 /*
  63  * The assembler used for win32 doesn't like .type or .size directives,
  64  * so we want to conditionally kill them out. So let's wrap them in macros
  65  * that are defined to be no-ops on win32. Hopefully this still works on
  66  * other platforms.
  67  */
  68 #if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
  69 #define TYPE(name) .type name,@function
  70 #define SIZE(name) .size name,.-name
  71 #else
  72 #define TYPE(name)
  73 #define SIZE(name)
  74 #endif
  75
  76 /* Helper macros for access to thread-locals slots for both OS types:
  77  * ------------------------------------------------------------------------
  78  *
  79  *                          Windows TEB block
  80  * ==================        __________
  81  * | Win32 %FS base | ---->  |        | 0
  82  * ==================        |        | 1
  83  *                           z        z
  84  *     TLS slots start here> |XXXXXXXX| e10 = TEB_STATIC_TLS_SLOTS_OFFSET
  85  *                           |XXXXXXXX| e11
  86  *                           z   ...  z
  87  *                           |XXXXXXXX| e4e
  88  *     TLS ends here>     ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63
  89  *                       /   z        z
  90  *                       |   ----------                    "os_address" ----.
  91  *                       |                                                   |
  92  *                       |   big blob of SBCL-specific thread-local data     |
  93  *                       |     |----------------------------------------| <--'
  94  *                       |     |   CONTROL, BINDING, ALIEN STACK        |
  95  *                       |     z                                        z
  96  * ==================    |     |----------------------------------------|
  97  * | Linux %FS base | -->|     |   FFI stack pointer                    |
  98  * ==================    |     |    (extra page for mprotect)           |
  99  *                        \    |----------------------------------------|
 100  *   (union p_t_d) ----->  \-> | struct thread {   | dynamic_values[0]  |
 101  *   .                         |   ...             |               [1]  |
 102  *   .                         z   ...             z               ...  z
 103  *   [tls data begins]         | }                 |               ...  | <-
 104  *   [declared end of p_t_d]   |----------------------------------------| . |
 105  *   .                         |                                   ...  | . |
 106  *   .                         |                           [TLS_SIZE-1] | <-|
 107  *   [tls data actually ends]  |----------------------------------------|   |
 108  *   .                         | ALTSTACK                               |   |
 109  *   .                         |----------------------------------------|   |
 110  *   .                         | struct nonpointer_thread_data { }      |   |
 111  *   .                         ------------------------------------------   |
 112  *   [blob actually ends]                                                   |
 113  *                                                                         /
 114  *                                                                        /
 115  *                                                                       /
 116  *          ______________________                                      /
 117  *          | struct symbol {    |                                     /
 118  *          z   ...              z                                    /
 119  *          |   fixnum tls_index;  // fixnum value relative to union /
 120  *          | }                  |           (< TLS_SIZE = 4096)
 121  *          ---------------------|
 122  */
 123 #ifdef LISP_FEATURE_WIN32
 124 # define TEB_STATIC_TLS_SLOTS_OFFSET 0xE10
 125 # define TEB_SBCL_THREAD_BASE_OFFSET (TEB_STATIC_TLS_SLOTS_OFFSET+(63*4))
 126 # define SBCL_THREAD_BASE_EA %fs:TEB_SBCL_THREAD_BASE_OFFSET
 127 # define MAYBE_FS(addr) addr
 128 # define LoadTlSymbolValueAddress(symbol,reg) ;         \
 129         movl    SBCL_THREAD_BASE_EA, reg ;              \
 130         addl    (symbol+SYMBOL_TLS_INDEX_OFFSET), reg ;
 131 # define LoadCurrentThreadSlot(offset,reg);     \
 132         movl    SBCL_THREAD_BASE_EA, reg ;      \
 133         movl    offset(reg), reg ;
 134 #elif defined(LISP_FEATURE_LINUX) || defined(LISP_FEATURE_SUNOS) || defined(LISP_FEATURE_FREEBSD)
 135   /* see comment in arch_os_thread_init */
 136 # define SBCL_THREAD_BASE_EA %fs:THREAD_SELFPTR_OFFSET
 137 # define MAYBE_FS(addr) addr
 138 #else
 139   /* perhaps there's an OS out there that actually supports %fs without
 140    * jumping through hoops, so just in case, here a default definition: */
 141 # define SBCL_THREAD_BASE_EA $0
 142 # define MAYBE_FS(addr) %fs:addr
 143 #endif
 144
 145 /* gas can't parse 4096LU; redefine */
 146 #if BACKEND_PAGE_BYTES == 4096
 147 # undef BACKEND_PAGE_BYTES
 148 # define BACKEND_PAGE_BYTES 4096
 149 #elif BACKEND_PAGE_BYTES == 32768
 150 # undef BACKEND_PAGE_BYTES
 151 # define BACKEND_PAGE_BYTES 32768
 152 #else
 153 # error BACKEND_PAGE_BYTES mismatch
 154 #endif
 155
 156 /* OAOOM because we don't have the C headers here */
 157 #define THREAD_CSP_PAGE_SIZE BACKEND_PAGE_BYTES
 158
 159 /* the CSP page sits right before the thread */
 160 #define THREAD_SAVED_CSP_OFFSET (-THREAD_CSP_PAGE_SIZE)
 161
 162 /*
 163  * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal
 164  * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
 165  * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
 166  * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
 167  * for this instruction in the SIGILL handler and if we see it, we
 168  * advance the EIP by two bytes to skip over ud2 instruction and
 169  * call sigtrap_handler. */
 170 #if defined(LISP_FEATURE_UD2_BREAKPOINTS)
 171 #define END()
 172 #define TRAP ud2
 173 #else
 174 #define END() .end
 175 #define TRAP int3
 176 #endif
 177
 178         .text
 179         .globl  GNAME(all_threads)
 180 \f
 181 /*
 182  * A call to call_into_c preserves esi, edi, and ebp.
 183  * (The C function will preserve ebx, esi, edi, and ebp across its
 184  * function call, but we trash ebx ourselves by using it to save the
 185  * return Lisp address.)
 186  *
 187  * Return values are in eax and maybe edx for quads, or st(0) for
 188  * floats.
 189  *
 190  * This should work for Lisp calls C calls Lisp calls C..
 191  *
 192  * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp,
 193  * so if you tweak this, change that too!
 194  */
 195 /*
 196  * Note on sections specific to LISP_FEATURE_SB_SAFEPOINT:
 197  *
 198  * The code below is essential to safepoint-based garbage collection,
 199  * and several details need to be considered for correct implementation.
 200  *
 201  * The stack spilling approach:
 202  *   On SB-SAFEPOINT platforms, the CALL-OUT vop is defined to spill all
 203  *   live Lisp TNs to the stack to provide information for conservative
 204  *   GC cooperatively (avoiding the need to retrieve register values
 205  *   from POSIX signal contexts or Windows GetThreadContext()).
 206  *
 207  * Finding the SP at all:
 208  *   The main remaining value needed by GC is the stack pointer (SP) at
 209  *   the moment of entering the foreign function.  For this purpose, a
 210  *   thread-local field for the SP is used.  Two stores to that field
 211  *   are done for each C call, one to save the SP before calling out and
 212  *   and one to undo that store afterwards.
 213  *
 214  * Stores as synchronization points:
 215  *   These two stores delimit the C call: While the SP is set, our
 216  *   thread is known not to run Lisp code: During GC, memory protection
 217  *   ensures that no thread proceeds across stores.
 218  *
 219  * The return PC issue:
 220  *   (Note that CALL-OUT has, in principle, two versions: Inline
 221  *   assembly in the VOP -or- alternatively the out-of-line version you
 222  *   are currently reading.  In reality, safepoint builds currently
 223  *   lack the inline code entirely.)
 224  *
 225  *   Both versions need to take special care with the return PC:
 226  *   - In the inline version of the code (if it existed), the two stores
 227  *     would be done directly in the CALL-OUT vop.  In that theoretical
 228  *     implementation, there is a time interval between return of the
 229  *     actual C call and a second SP store during which the return
 230  *     address might not be on the stack anymore.
 231  *   - In this out-of-line version, the stores are done during
 232  *     call_into_c's frame, but an equivalent problem arises: In order
 233  *     to present the stack of arguments as our foreign function expects
 234  *     them, call_into_c has to pop the Lisp return address into a
 235  *     register first; this register has to be preserved by GENCGC
 236  *     separately: our return address is not in the stack anymore.
 237  *   In both case, stack scanning alone is not sufficient to pin
 238  *   the return address, and we communicate it to GC explicitly
 239  *   in addition to the SP.
 240  *
 241  * Note on look-alike accessor macros with vastly different behaviour:
 242  *   THREAD_PC_AROUND_FOREIGN_CALL_OFFSET is an "ordinary" field of the
 243  *   struct thread, whereas THREAD_SAVED_CSP_OFFSET is a synchronization
 244  *   point on a potentially write-protected page.
 245 */
 246
 247         .text
 248         .align  align_16byte,0x90
 249         .globl GNAME(call_into_c)
 250         TYPE(GNAME(call_into_c))
 251 GNAME(call_into_c):
 252 /* Save the return Lisp address in ebx. */
 253         popl    %ebx
 254
 255 /* Setup the NPX for C */
 256         /* The VOP says regarding CLD: "Clear out DF: Darwin, Windows,
 257          * and Solaris at least require this, and it should not hurt
 258          * others either." call_into_c didn't have it, but better safe than
 259          * sorry. */
 260         cld
 261         fstp    %st(0)
 262         fstp    %st(0)
 263         fstp    %st(0)
 264         fstp    %st(0)
 265         fstp    %st(0)
 266         fstp    %st(0)
 267         fstp    %st(0)
 268         fstp    %st(0)
 269
 270 #ifdef LISP_FEATURE_SB_SAFEPOINT
 271         /* enter safe region: store SP and return PC */
 272         movl    SBCL_THREAD_BASE_EA,%edi
 273         movl    %esp,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi))
 274         movl    %ebx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi))
 275 #endif
 276
 277         /* foreign call, preserving ESI, EDI, and EBX */
 278         call    *%eax             # normal callout using Lisp stack
 279         /* return values now in eax/edx OR st(0) */
 280
 281 #ifdef LISP_FEATURE_SB_SAFEPOINT
 282         /* leave region: clear the SP!  (Also unpin the return PC.) */
 283         xorl    %ecx,%ecx
 284         movl    %ecx,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi))
 285         movl    %ecx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi))
 286 #endif
 287
 288         movl    %eax,%ecx         # remember integer return value
 289
 290 /* Check for a return FP value. */
 291         fxam
 292         fnstsw  %ax
 293         andl    $0x4500,%eax
 294         cmpl    $0x4100,%eax
 295         jne     Lfp_rtn_value
 296
 297 /* The return value is in eax, or eax,edx? */
 298 /* Set up the NPX stack for Lisp. */
 299         fldz                    # Ensure no regs are empty.
 300         fldz
 301         fldz
 302         fldz
 303         fldz
 304         fldz
 305         fldz
 306         fldz
 307
 308 /* Restore the return value. */
 309         movl    %ecx,%eax       # maybe return value
 310
 311 /* Return. */
 312         jmp     *%ebx
 313
 314 Lfp_rtn_value:
 315 /* The return result is in st(0). */
 316 /* Set up the NPX stack for Lisp, placing the result in st(0). */
 317         fldz                    # Ensure no regs are empty.
 318         fldz
 319         fldz
 320         fldz
 321         fldz
 322         fldz
 323         fldz
 324         fxch    %st(7)          # Move the result back to st(0).
 325
 326 /* We don't need to restore eax, because the result is in st(0). */
 327
 328 /* Return. FIXME: It would be nice to restructure this to use RET. */
 329         jmp     *%ebx
 330
 331         SIZE(GNAME(call_into_c))
 332
 333 \f
 334         .text
 335         .globl GNAME(call_into_lisp_first_time)
 336         TYPE(GNAME(call_into_lisp_first_time))
 337
 338 /* We don't worry too much about saving registers
 339  * here, because we never expect to return from the initial call to lisp
 340  * anyway */
 341
 342         .align  align_16byte,0x90
 343 GNAME(call_into_lisp_first_time):
 344         pushl   %ebp            # Save old frame pointer.
 345         movl    %esp,%ebp       # Establish new frame.
 346 #ifndef LISP_FEATURE_WIN32
 347         movl    GNAME(all_threads),%eax
 348         /* pthread machinery takes care of this for other threads */
 349         movl    THREAD_CONTROL_STACK_END_OFFSET(%eax) ,%esp
 350 #else
 351 /* Win32 -really- doesn't like you switching stacks out from under it. */
 352         movl    GNAME(all_threads),%eax
 353 #endif
 354         jmp     Lstack
 355 \f
 356         .text
 357         .globl GNAME(call_into_lisp)
 358         TYPE(GNAME(call_into_lisp))
 359
 360 /* The C conventions require that ebx, esi, edi, and ebp be preserved
 361  * across function calls. */
 362
 363         .align  align_16byte,0x90
 364 GNAME(call_into_lisp):
 365         pushl   %ebp            # Save old frame pointer.
 366         movl    %esp,%ebp       # Establish new frame.
 367
 368 Lstack:
 369 /* Save the NPX state */
 370         fwait                   # Catch any pending NPX exceptions.
 371         subl    $108,%esp       # Make room for the NPX state.
 372         fnsave  (%esp)          # save and reset NPX
 373
 374         movl    (%esp),%eax     # Load NPX control word.
 375         andl    $0xfffff2ff,%eax        # Set rounding mode to nearest.
 376         orl     $0x00000200,%eax        # Set precision to 64 bits.  (53-bit mantissa)
 377         pushl   %eax
 378         fldcw   (%esp)          # Recover modes.
 379         popl    %eax
 380
 381         fldz                    # Ensure no FP regs are empty.
 382         fldz
 383         fldz
 384         fldz
 385         fldz
 386         fldz
 387         fldz
 388         fldz
 389
 390 /* Save C regs: ebx esi edi. */
 391         pushl   %ebx
 392         pushl   %esi
 393         pushl   %edi
 394
 395 /* Clear descriptor regs. */
 396         xorl    %eax,%eax       # lexenv
 397         xorl    %ebx,%ebx       # available
 398         xorl    %ecx,%ecx       # arg count
 399         xorl    %edx,%edx       # first arg
 400         xorl    %edi,%edi       # second arg
 401         xorl    %esi,%esi       # third arg
 402
 403 /* no longer in function call */
 404         movl    %esp,%ebx       # remember current stack
 405         pushl   %ebx            # Save entry stack on (maybe) new stack.
 406
 407         /* Establish Lisp args. */
 408         movl     8(%ebp),%eax   # lexenv?
 409         movl    12(%ebp),%ebx   # address of arg vec
 410         movl    16(%ebp),%ecx   # num args
 411         shll    $2,%ecx         # Make num args into fixnum.
 412         cmpl    $0,%ecx
 413         je      Ldone
 414         movl    (%ebx),%edx     # arg0
 415         cmpl    $4,%ecx
 416         je      Ldone
 417         movl    4(%ebx),%edi    # arg1
 418         cmpl    $8,%ecx
 419         je      Ldone
 420         movl    8(%ebx),%esi    # arg2
 421 Ldone:
 422         /* Registers eax, ecx, edx, edi, and esi are now live. */
 423
 424 #ifdef LISP_FEATURE_WIN32
 425         /* Establish an SEH frame. */
 426 #ifdef LISP_FEATURE_SB_THREAD
 427         /* Save binding stack pointer */
 428         subl $4, %esp
 429         pushl %eax
 430         movl SBCL_THREAD_BASE_EA, %eax
 431         movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax
 432         movl %eax, 4(%esp)
 433         popl %eax
 434 #else
 435         pushl   BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET
 436 #endif
 437         pushl   $GNAME(exception_handler_wrapper)
 438         pushl   %fs:0
 439         movl    %esp, %fs:0
 440 #endif
 441
 442         /* Alloc new frame. */
 443         push    %ebp            # Dummy for return address
 444         push    %ebp            # fp in save location S1
 445         mov     %esp,%ebp       # The current sp marks start of new frame.
 446         sub     $4,%esp         # Ensure 3 slots are allocated, two above.
 447
 448         call    *CLOSURE_FUN_OFFSET(%eax)
 449
 450         /* If the function returned multiple values, it will return to
 451            this point.  Lose them */
 452         jnc     LsingleValue
 453         mov     %ebx, %esp
 454 LsingleValue:
 455         /* A singled value function returns here */
 456
 457 #ifdef LISP_FEATURE_WIN32
 458         /* Remove our SEH frame. */
 459         mov     %fs:0,%esp
 460         popl    %fs:0
 461         add     $8, %esp
 462 #endif
 463
 464 /* Restore the stack, in case there was a stack change. */
 465         popl    %esp            # c-sp
 466
 467 /* Restore C regs: ebx esi edi. */
 468         popl    %edi
 469         popl    %esi
 470         popl    %ebx
 471
 472 /* Restore the NPX state. */
 473         frstor  (%esp)
 474         addl    $108, %esp
 475
 476         popl    %ebp            # c-sp
 477         movl    %edx,%eax       # c-val
 478         ret
 479         SIZE(GNAME(call_into_lisp))
 480 \f
 481 /* support for saving and restoring the NPX state from C */
 482         .text
 483         .globl  GNAME(fpu_save)
 484         TYPE(GNAME(fpu_save))
 485         .align  2,0x90
 486 GNAME(fpu_save):
 487         movl    4(%esp),%eax
 488         fnsave  (%eax)          # Save the NPX state. (resets NPX)
 489         ret
 490         SIZE(GNAME(fpu_save))
 491
 492         .globl  GNAME(fpu_restore)
 493         TYPE(GNAME(fpu_restore))
 494         .align  2,0x90
 495 GNAME(fpu_restore):
 496         movl    4(%esp),%eax
 497         frstor  (%eax)          # Restore the NPX state.
 498         ret
 499         SIZE(GNAME(fpu_restore))
 500 \f
 501 /*
 502  * the undefined-function trampoline
 503  */
 504         .text
 505         .align  align_16byte,0x90
 506         .globl GNAME(undefined_tramp)
 507         TYPE(GNAME(undefined_tramp))
 508         .byte   0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
 509 GNAME(undefined_tramp):
 510         pop     4(%ebp)         # Save return PC for backtrace.
 511         TRAP
 512         .byte   trap_Error
 513         .byte   2
 514         .byte   UNDEFINED_FUN_ERROR
 515         .byte   sc_DescriptorReg # eax in the Descriptor-reg SC
 516         ret
 517         SIZE(GNAME(undefined_tramp))
 518
 519 /* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
 520  * to know the name of the function immediately following the
 521  * undefined-function trampoline. */
 522
 523 /*
 524  * the closure trampoline
 525  */
 526         .text
 527         .align  align_16byte,0x90
 528         .globl GNAME(closure_tramp)
 529         TYPE(GNAME(closure_tramp))
 530         .byte   0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
 531 GNAME(closure_tramp):
 532         movl    FDEFN_FUN_OFFSET(%eax),%eax
 533         /* FIXME: The '*' after "jmp" in the next line is from PVE's
 534          * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
 535          * reasonable, and it certainly seems as though if CMU CL needs it,
 536          * SBCL needs it too, but I haven't actually verified that it's
 537          * right. It would be good to find a way to force the flow of
 538          * control through here to test it. */
 539         jmp     *CLOSURE_FUN_OFFSET(%eax)
 540         SIZE(GNAME(closure_tramp))
 541
 542         .text
 543         .align  align_16byte,0x90
 544         .globl GNAME(funcallable_instance_tramp)
 545         TYPE(GNAME(funcallable_instance_tramp))
 546 GNAME(funcallable_instance_tramp):
 547         movl    FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%eax),%eax
 548         /* KLUDGE: on this platform, whatever kind of function is in %rax
 549          * now, the first word of it contains the address to jump to. */
 550         jmp     *CLOSURE_FUN_OFFSET(%eax)
 551         SIZE(GNAME(funcallable_instance_tramp))
 552
 553 /*
 554  * fun-end breakpoint magic
 555  */
 556
 557 /*
 558  * For an explanation of the magic involved in function-end
 559  * breakpoints, see the implementation in ppc-assem.S.
 560  */
 561
 562         .text
 563         .globl  GNAME(fun_end_breakpoint_guts)
 564         .align  align_16byte
 565 GNAME(fun_end_breakpoint_guts):
 566         /* Multiple Value return */
 567         jc      multiple_value_return
 568         /* Single value return: The eventual return will now use the
 569            multiple values return convention but with a return values
 570            count of one. */
 571         movl    %esp,%ebx       # Setup ebx - the ofp.
 572         subl    $4,%esp         # Allocate one stack slot for the return value
 573         movl    $4,%ecx         # Setup ecx for one return value.
 574         movl    $(NIL),%edi     # default second value
 575         movl    $(NIL),%esi     # default third value
 576
 577 multiple_value_return:
 578
 579         .globl GNAME(fun_end_breakpoint_trap)
 580 GNAME(fun_end_breakpoint_trap):
 581         TRAP
 582         .byte   trap_FunEndBreakpoint
 583         hlt                     # We should never return here.
 584
 585         .globl GNAME(fun_end_breakpoint_end)
 586 GNAME(fun_end_breakpoint_end):
 587
 588 \f
 589         .globl  GNAME(do_pending_interrupt)
 590         TYPE(GNAME(do_pending_interrupt))
 591         .align  align_16byte,0x90
 592 GNAME(do_pending_interrupt):
 593         TRAP
 594         .byte   trap_PendingInterrupt
 595         ret
 596         SIZE(GNAME(do_pending_interrupt))
 597 \f
 598 /* Allocate bytes and return the start of the allocated space
 599  * in the specified destination register.
 600  *
 601  * In the general case the size will be in the destination register.
 602  *
 603  * All registers must be preserved except the destination.
 604  * The C conventions will preserve ebx, esi, edi, and ebp.
 605  * So only eax, ecx, and edx need special care here.
 606  *
 607  * ALLOC factors out the logic of calling alloc(): stack alignment, etc.
 608  *
 609  * DEFINE_ALLOC_TO_FOO defines an alloction routine.
 610  */
 611
 612 #ifdef LISP_FEATURE_DARWIN
 613 #define ALLOC(size)                                             \
 614         pushl   %ebp;              /* Save EBP               */ \
 615         movl    %esp,%ebp;         /* Save ESP to EBP        */ \
 616         pushl   $0;                /* Reserve space for arg  */ \
 617         andl    $0xfffffff0,%esp;  /* Align stack to 16bytes */ \
 618         movl    size, (%esp);      /* Argument to alloc      */ \
 619         call    GNAME(alloc);                                   \
 620         movl    %ebp,%esp;         /* Restore ESP from EBP   */ \
 621         popl    %ebp;              /* Restore EBP            */
 622 #else
 623 #define ALLOC(size)                                             \
 624         pushl   size;              /* Argument to alloc      */ \
 625         call    GNAME(alloc);                                   \
 626         addl    $4,%esp;           /* Pop argument           */
 627 #endif
 628
 629 #define DEFINE_ALLOC_TO_EAX(name,size)                          \
 630         .globl  GNAME(name);                                    \
 631         TYPE(GNAME(name));                                      \
 632         .align  align_16byte,0x90;                              \
 633 GNAME(name):                                                    \
 634         pushl   %ecx;              /* Save ECX and EDX       */ \
 635         pushl   %edx;                                           \
 636         ALLOC(size)                                             \
 637         popl    %edx;              /* Restore ECX and EDX    */ \
 638         popl    %ecx;                                           \
 639         ret;                                                    \
 640         SIZE(GNAME(name))
 641
 642 #define DEFINE_ALLOC_TO_ECX(name,size)                          \
 643         .globl  GNAME(name);                                    \
 644         TYPE(GNAME(name));                                      \
 645         .align  align_16byte,0x90;                              \
 646 GNAME(name):                                                    \
 647         pushl   %eax;              /* Save EAX and EDX       */ \
 648         pushl   %edx;                                           \
 649         ALLOC(size)                                             \
 650         movl    %eax,%ecx;         /* Result to destination  */ \
 651         popl    %edx;                                           \
 652         popl    %eax;                                           \
 653         ret;                                                    \
 654         SIZE(GNAME(name))
 655
 656 #define DEFINE_ALLOC_TO_EDX(name,size)                          \
 657         .globl  GNAME(name);                                    \
 658         TYPE(GNAME(name));                                      \
 659         .align  align_16byte,0x90;                              \
 660 GNAME(name):                                                    \
 661         pushl   %eax;               /* Save EAX and ECX      */ \
 662         pushl   %ecx;                                           \
 663         ALLOC(size)                                             \
 664         movl    %eax,%edx;          /* Restore EAX and ECX   */ \
 665         popl    %ecx;                                           \
 666         popl    %eax;                                           \
 667         ret;                                                    \
 668         SIZE(GNAME(name))
 669
 670 #define DEFINE_ALLOC_TO_REG(name,reg,size)                      \
 671         .globl  GNAME(name);                                    \
 672         TYPE(GNAME(name));                                      \
 673         .align  align_16byte,0x90;                              \
 674 GNAME(name):                                                    \
 675         pushl   %eax;              /* Save EAX, ECX, and EDX */ \
 676         pushl   %ecx;                                           \
 677         pushl   %edx;                                           \
 678         ALLOC(size)                                             \
 679         movl    %eax,reg;          /* Restore them           */ \
 680         popl    %edx;                                           \
 681         popl    %ecx;                                           \
 682         popl    %eax;                                           \
 683         ret;                                                    \
 684         SIZE(GNAME(name))
 685
 686 DEFINE_ALLOC_TO_EAX(alloc_to_eax,%eax)
 687 DEFINE_ALLOC_TO_EAX(alloc_8_to_eax,$8)
 688 DEFINE_ALLOC_TO_EAX(alloc_16_to_eax,$16)
 689
 690 DEFINE_ALLOC_TO_ECX(alloc_to_ecx,%ecx)
 691 DEFINE_ALLOC_TO_ECX(alloc_8_to_ecx,$8)
 692 DEFINE_ALLOC_TO_ECX(alloc_16_to_ecx,$16)
 693
 694 DEFINE_ALLOC_TO_EDX(alloc_to_edx,%edx)
 695 DEFINE_ALLOC_TO_EDX(alloc_8_to_edx,$8)
 696 DEFINE_ALLOC_TO_EDX(alloc_16_to_edx,$16)
 697
 698 DEFINE_ALLOC_TO_REG(alloc_to_ebx,%ebx,%ebx)
 699 DEFINE_ALLOC_TO_REG(alloc_8_to_ebx,%ebx,$8)
 700 DEFINE_ALLOC_TO_REG(alloc_16_to_ebx,%ebx,$16)
 701
 702 DEFINE_ALLOC_TO_REG(alloc_to_esi,%esi,%esi)
 703 DEFINE_ALLOC_TO_REG(alloc_8_to_esi,%esi,$8)
 704 DEFINE_ALLOC_TO_REG(alloc_16_to_esi,%esi,$16)
 705
 706 DEFINE_ALLOC_TO_REG(alloc_to_edi,%edi,%edi)
 707 DEFINE_ALLOC_TO_REG(alloc_8_to_edi,%edi,$8)
 708 DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16)
 709
 710 /* Called from lisp when an inline allocation overflows.
 711  * Every register except the result needs to be preserved.
 712  * We depend on C to preserve ebx, esi, edi, and ebp.
 713  * But where necessary must save eax, ecx, edx. */
 714
 715 #ifdef LISP_FEATURE_SB_THREAD
 716 #define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET
 717 #else
 718 #define START_REGION GNAME(boxed_region)
 719 #endif
 720
 721 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32)
 722 #define ALLOC_OVERFLOW(size,scratch)                            \
 723         movl SBCL_THREAD_BASE_EA, scratch;                      \
 724         /* Calculate the size for the allocation. */            \
 725         subl THREAD_ALLOC_REGION_OFFSET(scratch),size;          \
 726         ALLOC(size)
 727 #else
 728 #define ALLOC_OVERFLOW(size,scratch)                    \
 729           /* Calculate the size for the allocation. */  \
 730           subl    START_REGION,size;                    \
 731           ALLOC(size)
 732 #endif
 733
 734 /* This routine handles an overflow with eax=crfp+size. So the
 735    size=eax-crfp. */
 736         .align  align_16byte
 737         .globl  GNAME(alloc_overflow_eax)
 738         TYPE(GNAME(alloc_overflow_eax))
 739 GNAME(alloc_overflow_eax):
 740         pushl   %ecx            # Save ecx
 741         pushl   %edx            # Save edx
 742         ALLOC_OVERFLOW(%eax,%edx)
 743         popl    %edx    # Restore edx.
 744         popl    %ecx    # Restore ecx.
 745         ret
 746         SIZE(GNAME(alloc_overflow_eax))
 747
 748         .align  align_16byte
 749         .globl  GNAME(alloc_overflow_ecx)
 750         TYPE(GNAME(alloc_overflow_ecx))
 751 GNAME(alloc_overflow_ecx):
 752         pushl   %eax            # Save eax
 753         pushl   %edx            # Save edx
 754         ALLOC_OVERFLOW(%ecx,%edx)
 755         movl    %eax,%ecx       # setup the destination.
 756         popl    %edx    # Restore edx.
 757         popl    %eax    # Restore eax.
 758         ret
 759         SIZE(GNAME(alloc_overflow_ecx))
 760
 761         .align  align_16byte
 762         .globl  GNAME(alloc_overflow_edx)
 763         TYPE(GNAME(alloc_overflow_edx))
 764 GNAME(alloc_overflow_edx):
 765         pushl   %eax            # Save eax
 766         pushl   %ecx            # Save ecx
 767         ALLOC_OVERFLOW(%edx,%ecx)
 768         movl    %eax,%edx       # setup the destination.
 769         popl    %ecx    # Restore ecx.
 770         popl    %eax    # Restore eax.
 771         ret
 772         SIZE(GNAME(alloc_overflow_edx))
 773
 774 /* This routine handles an overflow with ebx=crfp+size. So the
 775    size=ebx-crfp. */
 776         .align  align_16byte
 777         .globl  GNAME(alloc_overflow_ebx)
 778         TYPE(GNAME(alloc_overflow_ebx))
 779 GNAME(alloc_overflow_ebx):
 780         pushl   %eax            # Save eax
 781         pushl   %ecx            # Save ecx
 782         pushl   %edx            # Save edx
 783         ALLOC_OVERFLOW(%ebx,%edx)
 784         movl    %eax,%ebx       # setup the destination.
 785         popl    %edx    # Restore edx.
 786         popl    %ecx    # Restore ecx.
 787         popl    %eax    # Restore eax.
 788         ret
 789         SIZE(GNAME(alloc_overflow_ebx))
 790
 791 /* This routine handles an overflow with esi=crfp+size. So the
 792    size=esi-crfp. */
 793         .align  align_16byte
 794         .globl  GNAME(alloc_overflow_esi)
 795         TYPE(GNAME(alloc_overflow_esi))
 796 GNAME(alloc_overflow_esi):
 797         pushl   %eax            # Save eax
 798         pushl   %ecx            # Save ecx
 799         pushl   %edx            # Save edx
 800         ALLOC_OVERFLOW(%esi,%edx)
 801         movl    %eax,%esi       # setup the destination.
 802         popl    %edx    # Restore edx.
 803         popl    %ecx    # Restore ecx.
 804         popl    %eax    # Restore eax.
 805         ret
 806         SIZE(GNAME(alloc_overflow_esi))
 807
 808         .align  align_16byte
 809         .globl  GNAME(alloc_overflow_edi)
 810         TYPE(GNAME(alloc_overflow_edi))
 811 GNAME(alloc_overflow_edi):
 812         pushl   %eax            # Save eax
 813         pushl   %ecx            # Save ecx
 814         pushl   %edx            # Save edx
 815         ALLOC_OVERFLOW(%edi,%edx)
 816         movl    %eax,%edi       # setup the destination.
 817         popl    %edx    # Restore edx.
 818         popl    %ecx    # Restore ecx.
 819         popl    %eax    # Restore eax.
 820         ret
 821         SIZE(GNAME(alloc_overflow_edi))
 822
 823
 824 #ifdef LISP_FEATURE_WIN32
 825         /* The guts of the exception-handling system doesn't use
 826          * frame pointers, which manages to throw off backtraces
 827          * rather badly.  So here we grab the (known-good) EBP
 828          * and EIP from the exception context and use it to fake
 829          * up a stack frame which will skip over the system SEH
 830          * code. */
 831         .align  align_16byte
 832         .globl  GNAME(exception_handler_wrapper)
 833         TYPE(GNAME(exception_handler_wrapper))
 834 GNAME(exception_handler_wrapper):
 835         /* Context layout is: */
 836         /* 7 dwords before FSA. (0x1c) */
 837         /* 8 dwords and 0x50 bytes in the FSA. (0x70/0x8c) */
 838         /* 4 dwords segregs. (0x10/0x9c) */
 839         /* 6 dwords non-stack GPRs. (0x18/0xb4) */
 840         /* EBP (at 0xb4) */
 841         /* EIP (at 0xb8) */
 842 #define CONTEXT_EBP_OFFSET 0xb4
 843 #define CONTEXT_EIP_OFFSET 0xb8
 844         /* some other stuff we don't care about. */
 845         pushl   %ebp
 846         movl    0x10(%esp), %ebp        /* context */
 847         pushl   CONTEXT_EIP_OFFSET(%ebp)
 848         pushl   CONTEXT_EBP_OFFSET(%ebp)
 849         movl    %esp, %ebp
 850         pushl   0x1c(%esp)
 851         pushl   0x1c(%esp)
 852         pushl   0x1c(%esp)
 853         pushl   0x1c(%esp)
 854         call    GNAME(handle_exception)
 855         lea     8(%ebp), %esp
 856         popl    %ebp
 857         ret
 858         SIZE(GNAME(exception_handler_wrapper))
 859 #endif
 860
 861 #ifdef LISP_FEATURE_DARWIN
 862         .align align_16byte
 863         .globl GNAME(call_into_lisp_tramp)
 864         TYPE(GNAME(call_into_lisp_tramp))
 865 GNAME(call_into_lisp_tramp):
 866         /* 1. build the stack frame from the block that's pointed to by ECX
 867            2. free the block
 868            3. set ECX to 0
 869            4. call the function via call_into_lisp
 870         */
 871         pushl   0(%ecx)          /* return address */
 872
 873         pushl   %ebp
 874         movl    %esp, %ebp
 875
 876         pushl   32(%ecx)         /* eflags */
 877         pushl   28(%ecx)         /* EAX */
 878         pushl   20(%ecx)         /* ECX */
 879         pushl   16(%ecx)         /* EDX */
 880         pushl   24(%ecx)         /* EBX */
 881         pushl   $0                /* popal is going to ignore esp */
 882         pushl   %ebp              /* is this right?? */
 883         pushl   12(%ecx)         /* ESI */
 884         pushl   8(%ecx)          /* EDI */
 885         pushl   $0                /* args for call_into_lisp */
 886         pushl   $0
 887         pushl   4(%ecx)          /* function to call */
 888
 889         /* free our save block */
 890         pushl   %ecx              /* reserve sufficient space on stack for args */
 891         pushl   %ecx
 892         andl    $0xfffffff0, %esp  /* align stack */
 893         movl    $0x40, 4(%esp)
 894         movl    %ecx, (%esp)
 895         call    GNAME(os_invalidate)
 896
 897         /* call call_into_lisp */
 898         leal    -48(%ebp), %esp
 899         call    GNAME(call_into_lisp)
 900
 901         /* Clean up our mess */
 902         leal    -36(%ebp), %esp
 903         popal
 904         popfl
 905         leave
 906         ret
 907
 908         SIZE(call_into_lisp_tramp)
 909 #endif
 910
 911         .align  align_16byte,0x90
 912         .globl  GNAME(post_signal_tramp)
 913         TYPE(GNAME(post_signal_tramp))
 914 GNAME(post_signal_tramp):
 915         /* this is notionally the second half of a function whose first half
 916          * doesn't exist.  This is where call_into_lisp returns when called
 917          * using return_to_lisp_function */
 918         addl $12,%esp   /* clear call_into_lisp args from stack */
 919         popal           /* restore registers */
 920         popfl
 921 #ifdef LISP_FEATURE_DARWIN
 922         /* skip two padding words */
 923         addl $8,%esp
 924 #endif
 925         leave
 926         ret
 927         SIZE(GNAME(post_signal_tramp))
 928
 929
 930         /* fast_bzero implementations and code to detect which implementation
 931          * to use.
 932          */
 933 \f
 934         .globl GNAME(fast_bzero_pointer)
 935         .data
 936         .align  align_16byte
 937 GNAME(fast_bzero_pointer):
 938         /* Variable containing a pointer to the bzero function to use.
 939          * Initially points to a basic function.  Change this variable
 940          * to fast_bzero_detect if OS supports SSE.  */
 941         .long GNAME(fast_bzero_base)
 942 \f
 943         .globl GNAME(gc_safepoint_page)
 944         .data
 945         .align  align_page
 946 GNAME(gc_safepoint_page):
 947         .fill BACKEND_PAGE_BYTES,1,0
 948 \f
 949         .text
 950         .align  align_16byte,0x90
 951         .globl GNAME(fast_bzero)
 952         TYPE(GNAME(fast_bzero))
 953 GNAME(fast_bzero):
 954         /* Indirect function call */
 955         jmp *GNAME(fast_bzero_pointer)
 956         SIZE(GNAME(fast_bzero))
 957
 958 \f
 959         .text
 960         .align  align_16byte,0x90
 961         .globl GNAME(fast_bzero_detect)
 962         TYPE(GNAME(fast_bzero_detect))
 963 GNAME(fast_bzero_detect):
 964         /* Decide whether to use SSE, MMX or REP version */
 965         push %eax /* CPUID uses EAX-EDX */
 966         push %ebx
 967         push %ecx
 968         push %edx
 969         mov $1, %eax
 970         cpuid
 971         test $0x04000000, %edx    /* SSE2 needed for MOVNTDQ */
 972         jnz Lsse2
 973         /* Originally there was another case here for using the
 974          * MOVNTQ instruction for processors that supported MMX but
 975          * not SSE2. This turned out to be a loss especially on
 976          * Athlons (where this instruction is apparently microcoded
 977          * somewhat slowly). So for simplicity revert to REP STOSL
 978          * for all non-SSE2 processors.
 979          */
 980 Lbase:
 981         movl $(GNAME(fast_bzero_base)), GNAME(fast_bzero_pointer)
 982         jmp Lrestore
 983 Lsse2:
 984         movl $(GNAME(fast_bzero_sse)), GNAME(fast_bzero_pointer)
 985         jmp Lrestore
 986
 987 Lrestore:
 988         pop %edx
 989         pop %ecx
 990         pop %ebx
 991         pop %eax
 992         jmp *GNAME(fast_bzero_pointer)
 993
 994         SIZE(GNAME(fast_bzero_detect))
 995
 996 \f
 997         .text
 998         .align  align_16byte,0x90
 999         .globl GNAME(fast_bzero_sse)
1000         TYPE(GNAME(fast_bzero_sse))
1001
1002 GNAME(fast_bzero_sse):
1003         /* A fast routine for zero-filling blocks of memory that are
1004          * guaranteed to start and end at a 4096-byte aligned address.
1005          */
1006         push %esi                 /* Save temporary registers */
1007         push %edi
1008         mov 16(%esp), %esi        /* Parameter: amount of bytes to fill */
1009         mov 12(%esp), %edi        /* Parameter: start address */
1010         shr $6, %esi              /* Amount of 64-byte blocks to copy */
1011         jz Lend_sse               /* If none, stop */
1012         movups %xmm7, -16(%esp)   /* Save XMM register */
1013         xorps  %xmm7, %xmm7       /* Zero the XMM register */
1014         jmp Lloop_sse
1015         .align align_16byte
1016 Lloop_sse:
1017
1018         /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
1019          * non-caching double-quadword moving variant, i.e. the memory areas
1020          * we're touching are not fetched into the L1 cache, since we're just
1021          * going to overwrite the memory soon anyway.
1022          */
1023         movntdq %xmm7, 0(%edi)
1024         movntdq %xmm7, 16(%edi)
1025         movntdq %xmm7, 32(%edi)
1026         movntdq %xmm7, 48(%edi)
1027
1028         add $64, %edi /* Advance pointer */
1029         dec %esi      /* Decrement 64-byte block count */
1030         jnz Lloop_sse
1031         movups -16(%esp), %xmm7 /* Restore the XMM register */
1032         sfence        /* Ensure that weakly ordered writes are flushed. */
1033 Lend_sse:
1034         mov 12(%esp), %esi      /* Parameter: start address */
1035         prefetcht0 0(%esi)      /* Prefetch the start of the block into cache,
1036                                  * since it's likely to be used immediately. */
1037         pop %edi      /* Restore temp registers */
1038         pop %esi
1039         ret
1040         SIZE(GNAME(fast_bzero_sse))
1041
1042 \f
1043         .text
1044         .align  align_16byte,0x90
1045         .globl GNAME(fast_bzero_base)
1046         TYPE(GNAME(fast_bzero_base))
1047
1048 GNAME(fast_bzero_base):
1049         /* A fast routine for zero-filling blocks of memory that are
1050          * guaranteed to start and end at a 4096-byte aligned address.
1051          */
1052         push %eax                 /* Save temporary registers */
1053         push %ecx
1054         push %edi
1055         mov 20(%esp), %ecx        /* Parameter: amount of bytes to fill */
1056         mov 16(%esp), %edi        /* Parameter: start address */
1057         xor %eax, %eax            /* Zero EAX */
1058         shr $2, %ecx              /* Amount of 4-byte blocks to copy */
1059         jz  Lend_base
1060
1061         rep
1062         stosl                     /* Store EAX to *EDI, ECX times, incrementing
1063                                    * EDI by 4 after each store */
1064
1065 Lend_base:
1066         pop %edi                  /* Restore temp registers */
1067         pop %ecx
1068         pop %eax
1069         ret
1070         SIZE(GNAME(fast_bzero_base))
1071
1072 \f
1073 /* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub
1074  * the control stack from C, largely due to not knowing where the
1075  * active stack frame ends.  On such platforms, we reimplement the
1076  * core scrubbing logic in assembly, in this case here:
1077  */
1078         .text
1079         .align  align_16byte,0x90
1080         .globl GNAME(arch_scrub_control_stack)
1081         TYPE(GNAME(arch_scrub_control_stack))
1082 GNAME(arch_scrub_control_stack):
1083         /* We are passed three parameters:
1084          * A (struct thread *) at [ESP+4],
1085          * the address of the guard page at [ESP+8], and
1086          * the address of the hard guard page at [ESP+12].
1087          * We may trash EAX, ECX, and EDX with impunity.
1088          * [ESP] is our return address, [ESP-4] is the first
1089          * stack slot to scrub. */
1090
1091         /* We start by setting up our scrub pointer in EAX, our
1092          * guard page upper bound in ECX, and our hard guard
1093          * page upper bound in EDX. */
1094         lea     -4(%esp), %eax
1095         mov     GNAME(os_vm_page_size),%edx
1096         mov     %edx, %ecx
1097         add     8(%esp), %ecx
1098         add     12(%esp), %edx
1099
1100         /* We need to do a memory operation relative to the
1101          * thread pointer, so put it in %ecx and our guard
1102          * page upper bound in 4(%esp). */
1103         xchg    4(%esp), %ecx
1104
1105         /* Now we begin our main scrub loop. */
1106 ascs_outer_loop:
1107
1108         /* If we're about to scrub the hard guard page, exit. */
1109         cmp     %edx, %eax
1110         jae     ascs_check_guard_page
1111         cmp     12(%esp), %eax
1112         ja      ascs_finished
1113
1114 ascs_check_guard_page:
1115         /* If we're about to scrub the guard page, and the guard
1116          * page is protected, exit. */
1117         cmp     4(%esp), %eax
1118         jae     ascs_clear_loop
1119         cmp     8(%esp), %eax
1120         jbe     ascs_clear_loop
1121         cmpl    $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%ecx)
1122         jne     ascs_finished
1123
1124         /* Clear memory backwards to the start of the (4KiB) page */
1125 ascs_clear_loop:
1126         movl    $0, (%eax)
1127         test    $0xfff, %eax
1128         lea     -4(%eax), %eax
1129         jnz     ascs_clear_loop
1130
1131         /* If we're about to hit the hard guard page, exit. */
1132         cmp     %edx, %eax
1133         jae     ascs_finished
1134
1135         /* If the next (previous?) 4KiB page contains a non-zero
1136          * word, continue scrubbing. */
1137 ascs_check_loop:
1138         testl   $-1, (%eax)
1139         jnz     ascs_outer_loop
1140         test    $0xfff, %eax
1141         lea     -4(%eax), %eax
1142         jnz     ascs_check_loop
1143
1144 ascs_finished:
1145         ret
1146         SIZE(GNAME(arch_scrub_control_stack))
1147 \f
1148         END()