2 * very-low-level utilities for runtime support
6 * This software is part of the SBCL system. See the README file for
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
16 #define LANGUAGE_ASSEMBLY
17 #include "genesis/config.h"
20 #include "genesis/closure.h"
21 #include "genesis/funcallable-instance.h"
22 #include "genesis/fdefn.h"
23 #include "genesis/static-symbols.h"
24 #include "genesis/symbol.h"
25 #include "genesis/thread.h"
27 /* Minimize conditionalization for different OS naming schemes. */
28 #if defined __linux__ || defined __FreeBSD__ /* (but *not* OpenBSD) */
29 #define GNAME(var) var
31 #define GNAME(var) _##var
34 /* Get the right type of alignment. Linux and FreeBSD (but not OpenBSD)
35 * want alignment in bytes. */
36 #if defined(__linux__) || defined(__FreeBSD__)
39 #define align_16byte 16
40 #define align_32byte 32
44 #define align_16byte 4
48 * The assembler used for win32 doesn't like .type or .size directives,
49 * so we want to conditionally kill them out. So let's wrap them in macros
50 * that are defined to be no-ops on win32. Hopefully this still works on
53 #if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
54 #define TYPE(name) .type name,@function
55 #define SIZE(name) .size name,.-name
56 #define DOLLAR(name) $(name)
63 * x86/darwin (as of MacOS X 10.4.5) doesn't reliably fire signal
64 * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
65 * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
66 * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
67 * for this instruction in the SIGILL handler and if we see it, we
68 * advance the EIP by two bytes to skip over ud2 instruction and
69 * call sigtrap_handler. */
70 #if defined(LISP_FEATURE_DARWIN)
77 * More Apple assembler hacks
80 #if defined(LISP_FEATURE_DARWIN)
81 /* global symbol x86-64 sym(%rip) hack:*/
82 #define GSYM(name) name(%rip)
85 #define GSYM(name) $name
91 .globl GNAME(all_threads)
95 /* From lower to higher-numbered addresses, the stack contains
96 * return address, arg 0, arg 1, arg 2 ...
97 * rax contains the address of the function to call
98 * Lisp expects return value in rax, whic is already consistent with C
99 * XXXX correct floating point handling is unimplemented so far
100 * Based on comments cleaned from x86-assem.S, we believe that
101 * Lisp is expecting us to preserve rsi, rdi, rsp (no idea about r8-15)
104 .align align_16byte,0x90
105 .globl GNAME(call_into_c)
106 TYPE(GNAME(call_into_c))
108 push %rbp # Save old frame pointer.
109 mov %rsp,%rbp # Establish new frame.
111 push %rsi # args are going in here
124 SIZE(GNAME(call_into_c))
128 .globl GNAME(call_into_lisp_first_time)
129 TYPE(GNAME(call_into_lisp_first_time))
131 /* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
132 * the stack changes. We don't worry too much about saving registers
133 * here, because we never expect to return from the initial call to lisp
136 .align align_16byte,0x90
137 GNAME(call_into_lisp_first_time):
138 push %rbp # Save old frame pointer.
139 mov %rsp,%rbp # Establish new frame.
140 mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
141 #if defined(LISP_FEATURE_DARWIN)
142 movq GSYM(GNAME(all_threads)),%rax
144 movq GNAME(all_threads),%rax
146 mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp
147 /* don't think too hard about what happens if we get interrupted
149 add $(THREAD_CONTROL_STACK_SIZE)-16,%rsp
153 .globl GNAME(call_into_lisp)
154 TYPE(GNAME(call_into_lisp))
157 * amd64 calling convention: C expects that
158 * arguments go in rdi rsi rdx rcx r8 r9
159 * return values in rax rdx
160 * callee saves rbp rbx r12-15 if it uses them
163 .align align_16byte,0x90
164 GNAME(call_into_lisp):
165 push %rbp # Save old frame pointer.
166 mov %rsp,%rbp # Establish new frame.
168 /* FIXME x86 saves FPU state here */
169 push %rbx # these regs are callee-saved according to C
170 push %r12 # so must be preserved and restored when
171 push %r13 # the lisp function returns
175 mov %rsp,%rbx # remember current stack
176 push %rbx # Save entry stack on (maybe) new stack.
178 push %rdi # args from C
181 #ifdef LISP_FEATURE_SB_THREAD
182 #ifdef LISP_FEATURE_GCC_TLS
184 movq GNAME(current_thread)@TPOFF(%rax), %r12
186 #ifdef LISP_FEATURE_DARWIN
187 mov GSYM(GNAME(specials)),%rdi
191 call GNAME(pthread_getspecific)
196 pop %rbx # arg vector
197 pop %rax # function ptr/lexenv
199 xor %rdx,%rdx # clear any descriptor registers
200 xor %rdi,%rdi # that we can't be sure we'll
201 xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
202 shl $3,%rcx # (fixnumize num-args)
205 mov 0(%rbx),%rdx # arg0
208 mov 8(%rbx),%rdi # arg1
211 mov 16(%rbx),%rsi # arg2
213 /* Registers rax, rcx, rdx, rdi, and rsi are now live. */
214 xor %rbx,%rbx # available
216 /* Alloc new frame. */
217 mov %rsp,%rbx # The current sp marks start of new frame.
218 push %rbp # fp in save location S0
219 sub $16,%rsp # Ensure 3 slots are allocated, one above.
220 mov %rbx,%rbp # Switch to new frame.
223 call *CLOSURE_FUN_OFFSET(%rax)
225 /* If the function returned multiple values, it will return to
226 this point. Lose them */
231 /* Restore the stack, in case there was a stack change. */
241 /* FIXME Restore the NPX state. */
243 /* return value is already in rax where lisp expects it */
246 SIZE(GNAME(call_into_lisp))
248 /* support for saving and restoring the NPX state from C */
250 .globl GNAME(fpu_save)
251 TYPE(GNAME(fpu_save))
255 fnsave (%rax) # Save the NPX state. (resets NPX)
257 SIZE(GNAME(fpu_save))
259 .globl GNAME(fpu_restore)
260 TYPE(GNAME(fpu_restore))
264 frstor (%rax) # Restore the NPX state.
266 SIZE(GNAME(fpu_restore))
269 * the undefined-function trampoline
272 .align align_16byte,0x90
273 .globl GNAME(undefined_tramp)
274 TYPE(GNAME(undefined_tramp))
275 GNAME(undefined_tramp):
279 .byte UNDEFINED_FUN_ERROR
280 .byte sc_DescriptorReg # eax in the Descriptor-reg SC
282 SIZE(GNAME(undefined_tramp))
286 .align align_16byte,0x90
287 .globl GNAME(alloc_tramp)
288 TYPE(GNAME(alloc_tramp))
290 push %rbp # Save old frame pointer.
291 mov %rsp,%rbp # Establish new frame.
315 SIZE(GNAME(alloc_tramp))
319 * the closure trampoline
322 .align align_16byte,0x90
323 .globl GNAME(closure_tramp)
324 TYPE(GNAME(closure_tramp))
325 GNAME(closure_tramp):
326 mov FDEFN_FUN_OFFSET(%rax),%rax
327 /* FIXME: The '*' after "jmp" in the next line is from PVE's
328 * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
329 * reasonable, and it certainly seems as though if CMU CL needs it,
330 * SBCL needs it too, but I haven't actually verified that it's
331 * right. It would be good to find a way to force the flow of
332 * control through here to test it. */
333 jmp *CLOSURE_FUN_OFFSET(%rax)
334 SIZE(GNAME(closure_tramp))
337 .align align_16byte,0x90
338 .globl GNAME(funcallable_instance_tramp)
339 #if !defined(LISP_FEATURE_DARWIN)
340 .type GNAME(funcallable_instance_tramp),@function
342 GNAME(funcallable_instance_tramp):
343 mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
344 /* KLUDGE: on this platform, whatever kind of function is in %rax
345 * now, the first word of it contains the address to jump to. */
346 jmp *CLOSURE_FUN_OFFSET(%rax)
347 #if !defined(LISP_FEATURE_DARWIN)
348 .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp)
351 * fun-end breakpoint magic
354 .globl GNAME(fun_end_breakpoint_guts)
356 GNAME(fun_end_breakpoint_guts):
357 /* Multiple Value return */
358 jc multiple_value_return
359 /* Single value return: The eventual return will now use the
360 multiple values return convention but with a return values
362 mov %rsp,%rbx # Setup ebx - the ofp.
363 sub $8,%rsp # Allocate one stack slot for the return value
364 mov $8,%rcx # Setup ecx for one return value.
365 #if defined(LISP_FEATURE_DARWIN)
366 mov GSYM(NIL),%rdi # default second value
367 mov GSYM(NIL),%rsi # default third value
369 mov $NIL,%rdi # default second value
370 mov $NIL,%rsi # default third value
372 multiple_value_return:
374 .globl GNAME(fun_end_breakpoint_trap)
375 GNAME(fun_end_breakpoint_trap):
377 .byte trap_FunEndBreakpoint
378 hlt # We should never return here.
380 .globl GNAME(fun_end_breakpoint_end)
381 GNAME(fun_end_breakpoint_end):
384 .globl GNAME(do_pending_interrupt)
385 TYPE(GNAME(do_pending_interrupt))
386 .align align_16byte,0x90
387 GNAME(do_pending_interrupt):
389 .byte trap_PendingInterrupt
391 SIZE(GNAME(do_pending_interrupt))
393 .globl GNAME(post_signal_tramp)
394 TYPE(GNAME(post_signal_tramp))
395 .align align_16byte,0x90
396 GNAME(post_signal_tramp):
397 /* this is notionally the second half of a function whose first half
398 * doesn't exist. This is where call_into_lisp returns when called
399 * using return_to_lisp_function */
410 /* skip RBP and RSP */
418 SIZE(GNAME(post_signal_tramp))
421 .align align_16byte,0x90
422 .globl GNAME(fast_bzero)
423 TYPE(GNAME(fast_bzero))
426 /* A fast routine for zero-filling blocks of memory that are
427 * guaranteed to start and end at a 4096-byte aligned address.
429 shr $6, %rsi /* Amount of 64-byte blocks to copy */
430 jz Lend /* If none, stop */
431 mov %rsi, %rcx /* Save start address */
432 movups %xmm7, -16(%rsp) /* Save XMM register */
433 xorps %xmm7, %xmm7 /* Zero the XMM register */
438 /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
439 * non-caching double-quadword moving variant, i.e. the memory areas
440 * we're touching are not fetched into the L1 cache, since we're just
441 * going to overwrite the memory soon anyway.
443 movntdq %xmm7, 0(%rdi)
444 movntdq %xmm7, 16(%rdi)
445 movntdq %xmm7, 32(%rdi)
446 movntdq %xmm7, 48(%rdi)
448 add $64, %rdi /* Advance pointer */
449 dec %rsi /* Decrement 64-byte block count */
451 mfence /* Ensure that the writes are globally visible, since
452 * MOVNTDQ is weakly ordered */
453 movups -16(%rsp), %xmm7 /* Restore the XMM register */
454 prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
455 * since it's likely to be used immediately. */
458 SIZE(GNAME(fast_bzero))