2 * very-low-level utilities for runtime support
6 * This software is part of the SBCL system. See the README file for
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
16 #define LANGUAGE_ASSEMBLY
17 #include "genesis/config.h"
20 #include "genesis/closure.h"
21 #include "genesis/funcallable-instance.h"
22 #include "genesis/fdefn.h"
23 #include "genesis/static-symbols.h"
24 #include "genesis/symbol.h"
25 #include "genesis/thread.h"
27 /* Minimize conditionalization for different OS naming schemes. */
28 #if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun || defined _WIN64
29 #define GNAME(var) var
31 #define GNAME(var) _##var
34 /* Get the right type of alignment. Linux, FreeBSD and OpenBSD
35 * want alignment in bytes. */
36 #if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) || defined _WIN64
39 #define align_16byte 16
40 #define align_32byte 32
41 #define align_page 32768
45 #define align_16byte 4
50 * The assembler used for win32 doesn't like .type or .size directives,
51 * so we want to conditionally kill them out. So let's wrap them in macros
52 * that are defined to be no-ops on win32. Hopefully this still works on
55 #if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
56 #define TYPE(name) .type name,@function
57 #define SIZE(name) .size name,.-name
58 #define DOLLAR(name) $(name)
65 * x86/darwin (as of MacOS X 10.4.5) doesn't reliably fire signal
66 * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
67 * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
68 * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
69 * for this instruction in the SIGILL handler and if we see it, we
70 * advance the EIP by two bytes to skip over ud2 instruction and
71 * call sigtrap_handler. */
72 #if defined(LISP_FEATURE_UD2_BREAKPOINTS)
79 * More Apple assembler hacks
82 #if defined(LISP_FEATURE_DARWIN)
83 /* global symbol x86-64 sym(%rip) hack:*/
84 #define GSYM(name) name(%rip)
87 #define GSYM(name) $name
93 .globl GNAME(all_threads)
97 /* From lower to higher-numbered addresses, the stack contains
98 * return address, arg 0, arg 1, arg 2 ...
99 * rax contains the address of the function to call
100 * Lisp expects return value in rax, whic is already consistent with C
101 * XXXX correct floating point handling is unimplemented so far
102 * Based on comments cleaned from x86-assem.S, we believe that
103 * Lisp is expecting us to preserve rsi, rdi, rsp (no idea about r8-15)
106 .align align_16byte,0x90
107 .globl GNAME(call_into_c)
108 TYPE(GNAME(call_into_c))
110 push %rbp # Save old frame pointer.
111 mov %rsp,%rbp # Establish new frame.
113 push %rsi # args are going in here
126 SIZE(GNAME(call_into_c))
130 .globl GNAME(call_into_lisp_first_time)
131 TYPE(GNAME(call_into_lisp_first_time))
133 /* We don't worry too much about saving registers
134 * here, because we never expect to return from the initial call to lisp
137 .align align_16byte,0x90
138 GNAME(call_into_lisp_first_time):
139 push %rbp # Save old frame pointer.
140 mov %rsp,%rbp # Establish new frame.
141 #if defined(LISP_FEATURE_DARWIN)
142 movq GSYM(GNAME(all_threads)),%rax
144 movq GNAME(all_threads),%rax
146 mov THREAD_CONTROL_STACK_END_OFFSET(%rax) ,%rsp
150 .globl GNAME(call_into_lisp)
151 TYPE(GNAME(call_into_lisp))
154 * amd64 calling convention: C expects that
155 * arguments go in rdi rsi rdx rcx r8 r9
156 * return values in rax rdx
157 * callee saves rbp rbx r12-15 if it uses them
159 #ifdef LISP_FEATURE_WIN32
160 # define SUPPORT_FOMIT_FRAME_POINTER
162 .align align_16byte,0x90
163 GNAME(call_into_lisp):
164 #ifdef SUPPORT_FOMIT_FRAME_POINTER
167 push %rbp # Save old frame pointer.
168 mov %rsp,%rbp # Establish new frame.
170 #ifdef SUPPORT_FOMIT_FRAME_POINTER
171 /* If called through call_into_lisp_first_time, %r15 becomes invalid
172 * here, but we will not return in that case. */
176 /* FIXME x86 saves FPU state here */
177 push %rbx # these regs are callee-saved according to C
178 push %r12 # so must be preserved and restored when
179 push %r13 # the lisp function returns
183 mov %rsp,%rbx # remember current stack
184 push %rbx # Save entry stack on (maybe) new stack.
186 push %rdi # args from C
189 #ifdef LISP_FEATURE_SB_THREAD
190 # ifdef SUPPORT_FOMIT_FRAME_POINTER
193 call GNAME(carry_frame_pointer)
197 #ifdef LISP_FEATURE_GCC_TLS
199 movq GNAME(current_thread)@TPOFF(%rax), %r12
201 #ifdef LISP_FEATURE_DARWIN
202 mov GSYM(GNAME(specials)),%rdi
206 call GNAME(pthread_getspecific)
211 pop %rbx # arg vector
212 pop %rax # function ptr/lexenv
214 xor %rdx,%rdx # clear any descriptor registers
215 xor %rdi,%rdi # that we can't be sure we'll
216 xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
217 shl $(N_FIXNUM_TAG_BITS),%rcx # (fixnumize num-args)
220 mov 0(%rbx),%rdx # arg0
223 mov 8(%rbx),%rdi # arg1
226 mov 16(%rbx),%rsi # arg2
228 /* Registers rax, rcx, rdx, rdi, and rsi are now live. */
229 xor %rbx,%rbx # available
231 /* Alloc new frame. */
232 push %rbp # Dummy for return address
233 push %rbp # fp in save location S1
234 mov %rsp,%rbp # The current sp marks start of new frame.
235 sub $8,%rsp # Ensure 3 slots are allocated, two above.
238 call *CLOSURE_FUN_OFFSET(%rax)
240 /* If the function returned multiple values, it will return to
241 this point. Lose them */
246 /* Restore the stack, in case there was a stack change. */
256 /* FIXME Restore the NPX state. */
258 mov %rdx,%rax # c-val
259 #ifdef SUPPORT_FOMIT_FRAME_POINTER
260 mov %r15,%rbp # orig rbp
262 add $8,%rsp # no need for saved (overridden) rbp
267 SIZE(GNAME(call_into_lisp))
269 /* support for saving and restoring the NPX state from C */
271 .globl GNAME(fpu_save)
272 TYPE(GNAME(fpu_save))
273 .align align_16byte,0x90
275 fnsave (%rdi) # Save the NPX state. (resets NPX)
277 SIZE(GNAME(fpu_save))
279 .globl GNAME(fpu_restore)
280 TYPE(GNAME(fpu_restore))
281 .align align_16byte,0x90
283 frstor (%rdi) # Restore the NPX state.
285 SIZE(GNAME(fpu_restore))
288 * the undefined-function trampoline
291 .align align_16byte,0x90
292 .globl GNAME(undefined_tramp)
293 TYPE(GNAME(undefined_tramp))
294 GNAME(undefined_tramp):
295 pop 8(%rbp) # Save return PC for backtrace.
299 .byte UNDEFINED_FUN_ERROR
300 .byte sc_DescriptorReg # eax in the Descriptor-reg SC
302 SIZE(GNAME(undefined_tramp))
305 .align align_16byte,0x90
306 .globl GNAME(undefined_alien_function)
307 TYPE(GNAME(undefined_alien_function))
308 GNAME(undefined_alien_function):
309 pop 8(%rbp) # Save return PC for backtrace.
313 .byte UNDEFINED_ALIEN_FUN_ERROR
315 FIXME: make independt of the encoding changes. */
320 SIZE(GNAME(undefined_alien_function))
322 /* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
323 * to know the name of the function immediately following the
324 * undefined-function trampoline. */
326 /* Our call-site does not take care of caller-saved xmm registers, so it
327 * falls to us spill them beforing hopping into C.
329 * We simply save all of them.
331 * (But for the sake of completeness, here is my understanding of the specs:)
333 * argument passing xmm0-7 xmm0-3
334 * caller-saved xmm8-15 xmm4-5
335 * callee-saved - xmm6-15
339 #define stkxmmsave(n) movaps %xmm##n, n*16(%rsp)
340 #define stkxmmload(n) movaps n*16(%rsp), %xmm##n
341 #define map_all_xmm(op) \
342 op(0);op(1);op(2);op(3);op(4);op(5);op(6);op(7); \
343 op(8);op(9);op(10);op(11);op(12);op(13);op(14);op(15);
346 .align align_16byte,0x90
347 .globl GNAME(alloc_tramp)
348 TYPE(GNAME(alloc_tramp))
351 push %rbp # Save old frame pointer.
352 mov %rsp,%rbp # Establish new frame.
355 map_all_xmm(stkxmmsave)
379 map_all_xmm(stkxmmload)
383 SIZE(GNAME(alloc_tramp))
387 * the closure trampoline
390 .align align_16byte,0x90
391 .globl GNAME(closure_tramp)
392 TYPE(GNAME(closure_tramp))
393 GNAME(closure_tramp):
394 mov FDEFN_FUN_OFFSET(%rax),%rax
395 /* FIXME: The '*' after "jmp" in the next line is from PVE's
396 * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
397 * reasonable, and it certainly seems as though if CMU CL needs it,
398 * SBCL needs it too, but I haven't actually verified that it's
399 * right. It would be good to find a way to force the flow of
400 * control through here to test it. */
401 jmp *CLOSURE_FUN_OFFSET(%rax)
402 SIZE(GNAME(closure_tramp))
405 .align align_16byte,0x90
406 .globl GNAME(funcallable_instance_tramp)
407 #if !defined(LISP_FEATURE_DARWIN)
408 TYPE(GNAME(funcallable_instance_tramp))
410 GNAME(funcallable_instance_tramp):
411 mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
412 /* KLUDGE: on this platform, whatever kind of function is in %rax
413 * now, the first word of it contains the address to jump to. */
414 jmp *CLOSURE_FUN_OFFSET(%rax)
415 #if !defined(LISP_FEATURE_DARWIN)
416 SIZE(GNAME(funcallable_instance_tramp))
419 * fun-end breakpoint magic
423 * For an explanation of the magic involved in function-end
424 * breakpoints, see the implementation in ppc-assem.S.
428 .globl GNAME(fun_end_breakpoint_guts)
430 GNAME(fun_end_breakpoint_guts):
431 /* Multiple Value return */
432 jc multiple_value_return
433 /* Single value return: The eventual return will now use the
434 multiple values return convention but with a return values
436 mov %rsp,%rbx # Setup ebx - the ofp.
437 sub $8,%rsp # Allocate one stack slot for the return value
438 mov $8,%rcx # Setup ecx for one return value.
439 #if defined(LISP_FEATURE_DARWIN)
440 mov GSYM(NIL),%rdi # default second value
441 mov GSYM(NIL),%rsi # default third value
443 mov $NIL,%rdi # default second value
444 mov $NIL,%rsi # default third value
446 multiple_value_return:
448 .globl GNAME(fun_end_breakpoint_trap)
449 .align align_16byte,0x90
450 GNAME(fun_end_breakpoint_trap):
452 .byte trap_FunEndBreakpoint
453 hlt # We should never return here.
455 .globl GNAME(fun_end_breakpoint_end)
456 GNAME(fun_end_breakpoint_end):
459 .globl GNAME(do_pending_interrupt)
460 TYPE(GNAME(do_pending_interrupt))
461 .align align_16byte,0x90
462 GNAME(do_pending_interrupt):
464 .byte trap_PendingInterrupt
466 SIZE(GNAME(do_pending_interrupt))
468 .globl GNAME(post_signal_tramp)
469 TYPE(GNAME(post_signal_tramp))
470 .align align_16byte,0x90
471 GNAME(post_signal_tramp):
472 /* this is notionally the second half of a function whose first half
473 * doesn't exist. This is where call_into_lisp returns when called
474 * using return_to_lisp_function */
485 /* skip RBP and RSP */
493 SIZE(GNAME(post_signal_tramp))
496 .align align_16byte,0x90
497 .globl GNAME(fast_bzero)
498 TYPE(GNAME(fast_bzero))
500 #ifdef LISP_FEATURE_WIN32
502 #define redsave(reg,off) movups reg,-off(%rsp)
503 #define redrestore(reg,off) movups -off(%rsp),reg
506 #define redsave(reg,off)
507 #define redrestore(reg,off)
511 /* A fast routine for zero-filling blocks of memory that are
512 * guaranteed to start and end at a 4096-byte aligned address.
514 shr $6, %rsi /* Amount of 64-byte blocks to copy */
515 jz Lend /* If none, stop */
516 mov %rsi, %rcx /* Save start address */
518 xorps %xmmreg, %xmmreg /* Zero the XMM register */
523 /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
524 * non-caching double-quadword moving variant, i.e. the memory areas
525 * we're touching are not fetched into the L1 cache, since we're just
526 * going to overwrite the memory soon anyway.
528 movntdq %xmmreg, 0(%rdi)
529 movntdq %xmmreg, 16(%rdi)
530 movntdq %xmmreg, 32(%rdi)
531 movntdq %xmmreg, 48(%rdi)
533 add $64, %rdi /* Advance pointer */
534 dec %rsi /* Decrement 64-byte block count */
536 mfence /* Ensure that the writes are globally visible, since
537 * MOVNTDQ is weakly ordered */
538 redrestore(%xmmreg,16)
539 prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
540 * since it's likely to be used immediately. */
543 SIZE(GNAME(fast_bzero))
546 /* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub
547 * the control stack from C, largely due to not knowing where the
548 * active stack frame ends. On such platforms, we reimplement the
549 * core scrubbing logic in assembly, in this case here:
552 .align align_16byte,0x90
553 .globl GNAME(arch_scrub_control_stack)
554 TYPE(GNAME(arch_scrub_control_stack))
555 GNAME(arch_scrub_control_stack):
556 /* We are passed three parameters:
557 * A (struct thread *) in RDI,
558 * the address of the guard page in RSI, and
559 * the address of the hard guard page in RDX.
560 * We may trash RAX, RCX, and R8-R11 with impunity.
561 * [RSP] is our return address, [RSP-8] is the first
562 * stack slot to scrub. */
564 /* We start by setting up our scrub pointer in RAX, our
565 * guard page upper bound in R8, and our hard guard
566 * page upper bound in R9. */
568 #ifdef LISP_FEATURE_DARWIN
569 mov GSYM(GNAME(os_vm_page_size)),%r9
571 mov os_vm_page_size,%r9
576 /* Now we begin our main scrub loop. */
579 /* If we're about to scrub the hard guard page, exit. */
581 jae ascs_check_guard_page
585 ascs_check_guard_page:
586 /* If we're about to scrub the guard page, and the guard
587 * page is protected, exit. */
592 cmpq $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%rdi)
595 /* Clear memory backwards to the start of the (4KiB) page */
602 /* If we're about to hit the hard guard page, exit. */
606 /* If the next (previous?) 4KiB page contains a non-zero
607 * word, continue scrubbing. */
617 SIZE(GNAME(arch_scrub_control_stack))