2 * very-low-level utilities for runtime support
6 * This software is part of the SBCL system. See the README file for
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
16 #define LANGUAGE_ASSEMBLY
19 #include "genesis/closure.h"
20 #include "genesis/funcallable-instance.h"
21 #include "genesis/fdefn.h"
22 #include "genesis/static-symbols.h"
23 #include "genesis/symbol.h"
24 #include "genesis/thread.h"
26 /* Minimize conditionalization for different OS naming schemes. */
27 #if defined __linux__ || defined __FreeBSD__ /* (but *not* OpenBSD) */
28 #define GNAME(var) var
30 #define GNAME(var) _##var
33 /* Get the right type of alignment. Linux and FreeBSD (but not OpenBSD)
34 * want alignment in bytes. */
35 #if defined(__linux__) || defined(__FreeBSD__)
38 #define align_16byte 16
39 #define align_32byte 32
43 #define align_16byte 4
47 .global GNAME(foreign_function_call_active)
48 .global GNAME(all_threads)
51 /* From lower to higher-numbered addresses, the stack contains
52 * return address, arg 0, arg 1, arg 2 ...
53 * rax contains the address of the function to call
54 * Lisp expects return value in rax, whic is already consistent with C
55 * XXXX correct floating point handling is unimplemented so far
56 * Based on comments cleaned from x86-assem.S, we believe that
57 * Lisp is expecting us to preserve rsi, rdi, rsp (no idea about r8-15)
60 .align align_16byte,0x90
61 .global GNAME(call_into_c)
62 .type GNAME(call_into_c),@function
64 /* ABI requires that the direction flag be clear on function
68 push %rbp # Save old frame pointer.
69 mov %rsp,%rbp # Establish new frame.
71 push %rsi # args are going in here
84 .size GNAME(call_into_c), . - GNAME(call_into_c)
88 .global GNAME(call_into_lisp_first_time)
89 .type GNAME(call_into_lisp_first_time),@function
91 /* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
92 * the stack changes. We don't worry too much about saving registers
93 * here, because we never expect to return from the initial call to lisp
96 .align align_16byte,0x90
97 GNAME(call_into_lisp_first_time):
98 push %rbp # Save old frame pointer.
99 mov %rsp,%rbp # Establish new frame.
100 mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
101 mov GNAME(all_threads),%rax
102 mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp
103 /* don't think too hard about what happens if we get interrupted
105 add $THREAD_CONTROL_STACK_SIZE-16,%rsp
109 .global GNAME(call_into_lisp)
110 .type GNAME(call_into_lisp),@function
113 * amd64 calling convention: C expects that
114 * arguments go in rdi rsi rdx rcx r8 r9
115 * return values in rax rdx
116 * callee saves rbp rbx r12-15 if it uses them
119 .align align_16byte,0x90
120 GNAME(call_into_lisp):
121 push %rbp # Save old frame pointer.
122 mov %rsp,%rbp # Establish new frame.
124 /* FIXME x86 saves FPU state here */
125 push %rbx # these regs are callee-saved according to C
126 push %r12 # so must be preserved and restored when
127 push %r13 # the lisp function returns
131 mov %rsp,%rbx # remember current stack
132 push %rbx # Save entry stack on (maybe) new stack.
134 push %rdi # args from C
137 #ifdef LISP_FEATURE_SB_THREAD
139 call pthread_getspecific
143 pop %rbx # arg vector
144 pop %rax # function ptr/lexenv
146 xor %rdx,%rdx # clear any descriptor registers
147 xor %rdi,%rdi # that we can't be sure we'll
148 xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
149 shl $3,%rcx # (fixnumize num-args)
152 mov 0(%rbx),%rdx # arg0
155 mov 8(%rbx),%rdi # arg1
158 mov 16(%rbx),%rsi # arg2
160 /* Registers rax, rcx, rdx, rdi, and rsi are now live. */
161 xor %rbx,%rbx # available
163 /* Alloc new frame. */
164 mov %rsp,%rbx # The current sp marks start of new frame.
165 push %rbp # fp in save location S0
166 sub $16,%rsp # Ensure 3 slots are allocated, one above.
167 mov %rbx,%rbp # Switch to new frame.
170 call *CLOSURE_FUN_OFFSET(%rax)
172 /* If the function returned multiple values, it will return to
173 this point. Lose them */
178 /* Restore the stack, in case there was a stack change. */
188 /* ABI requires that the direction flag be clear on function
192 /* FIXME Restore the NPX state. */
194 /* return value is already in rax where lisp expects it */
197 .size GNAME(call_into_lisp), . - GNAME(call_into_lisp)
199 /* support for saving and restoring the NPX state from C */
201 .global GNAME(fpu_save)
202 .type GNAME(fpu_save),@function
206 fnsave (%rax) # Save the NPX state. (resets NPX)
208 .size GNAME(fpu_save),.-GNAME(fpu_save)
210 .global GNAME(fpu_restore)
211 .type GNAME(fpu_restore),@function
215 frstor (%rax) # Restore the NPX state.
217 .size GNAME(fpu_restore),.-GNAME(fpu_restore)
220 * the undefined-function trampoline
223 .align align_8byte,0x90
224 .global GNAME(undefined_tramp)
225 .type GNAME(undefined_tramp),@function
226 GNAME(undefined_tramp):
230 .byte UNDEFINED_FUN_ERROR
231 .byte sc_DescriptorReg # eax in the Descriptor-reg SC
233 .size GNAME(undefined_tramp), .-GNAME(undefined_tramp)
237 .align align_8byte,0x90
238 .global GNAME(alloc_tramp)
239 .type GNAME(alloc_tramp),@function
241 push %rbp # Save old frame pointer.
242 mov %rsp,%rbp # Establish new frame.
266 .size GNAME(alloc_tramp),.-GNAME(alloc_tramp)
270 * the closure trampoline
273 .align align_8byte,0x90
274 .global GNAME(closure_tramp)
275 .type GNAME(closure_tramp),@function
276 GNAME(closure_tramp):
277 mov FDEFN_FUN_OFFSET(%rax),%rax
278 /* FIXME: The '*' after "jmp" in the next line is from PVE's
279 * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
280 * reasonable, and it certainly seems as though if CMU CL needs it,
281 * SBCL needs it too, but I haven't actually verified that it's
282 * right. It would be good to find a way to force the flow of
283 * control through here to test it. */
284 jmp *CLOSURE_FUN_OFFSET(%rax)
285 .size GNAME(closure_tramp), .-GNAME(closure_tramp)
288 .align align_8byte,0x90
289 .global GNAME(funcallable_instance_tramp)
290 .type GNAME(funcallable_instance_tramp),@function
291 GNAME(funcallable_instance_tramp):
292 mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
293 /* KLUDGE: on this platform, whatever kind of function is in %rax
294 * now, the first word of it contains the address to jump to. */
295 jmp *CLOSURE_FUN_OFFSET(%rax)
296 .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp)
299 * fun-end breakpoint magic
302 .global GNAME(fun_end_breakpoint_guts)
304 GNAME(fun_end_breakpoint_guts):
305 /* Multiple Value return */
306 jc multiple_value_return
307 /* Single value return: The eventual return will now use the
308 multiple values return convention but with a return values
310 mov %rsp,%rbx # Setup ebx - the ofp.
311 sub $8,%rsp # Allocate one stack slot for the return value
312 mov $8,%rcx # Setup ecx for one return value.
313 mov $NIL,%rdi # default second value
314 mov $NIL,%rsi # default third value
316 multiple_value_return:
318 .global GNAME(fun_end_breakpoint_trap)
319 GNAME(fun_end_breakpoint_trap):
321 .byte trap_FunEndBreakpoint
322 hlt # We should never return here.
324 .global GNAME(fun_end_breakpoint_end)
325 GNAME(fun_end_breakpoint_end):
328 .global GNAME(do_pending_interrupt)
329 .type GNAME(do_pending_interrupt),@function
330 .align align_8byte,0x90
331 GNAME(do_pending_interrupt):
333 .byte trap_PendingInterrupt
335 .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt)
337 .globl GNAME(post_signal_tramp)
338 .type GNAME(post_signal_tramp),@function
339 .align align_8byte,0x90
340 GNAME(post_signal_tramp):
341 /* this is notionally the second half of a function whose first half
342 * doesn't exist. This is where call_into_lisp returns when called
343 * using return_to_lisp_function */
354 /* skip RBP and RSP */
362 .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp)
365 .align align_8byte,0x90
366 .global GNAME(fast_bzero)
367 .type GNAME(fast_bzero),@function
370 /* A fast routine for zero-filling blocks of memory that are
371 * guaranteed to start and end at a 4096-byte aligned address.
373 shr $6, %rsi /* Amount of 64-byte blocks to copy */
374 jz Lend /* If none, stop */
375 mov %rsi, %rcx /* Save start address */
376 movups %xmm7, -16(%rsp) /* Save XMM register */
377 xorps %xmm7, %xmm7 /* Zero the XMM register */
382 /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
383 * non-caching double-quadword moving variant, i.e. the memory areas
384 * we're touching are not fetched into the L1 cache, since we're just
385 * going to overwrite the memory soon anyway.
387 movntdq %xmm7, 0(%rdi)
388 movntdq %xmm7, 16(%rdi)
389 movntdq %xmm7, 32(%rdi)
390 movntdq %xmm7, 48(%rdi)
392 add $64, %rdi /* Advance pointer */
393 dec %rsi /* Decrement 64-byte block count */
395 mfence /* Ensure that the writes are globally visible, since
396 * MOVNTDQ is weakly ordered */
397 movups -16(%rsp), %xmm7 /* Restore the XMM register */
398 prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
399 * since it's likely to be used immediately. */
402 .size GNAME(fast_bzero), .-GNAME(fast_bzero)