2 * very-low-level utilities for runtime support
6 * This software is part of the SBCL system. See the README file for
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
16 #define LANGUAGE_ASSEMBLY
19 #include "genesis/closure.h"
20 #include "genesis/fdefn.h"
21 #include "genesis/static-symbols.h"
22 #include "genesis/symbol.h"
23 #include "genesis/thread.h"
25 /* Minimize conditionalization for different OS naming schemes. */
26 #if defined __linux__ || defined __FreeBSD__ /* (but *not* OpenBSD) */
27 #define GNAME(var) var
29 #define GNAME(var) _##var
32 /* Get the right type of alignment. Linux and FreeBSD (but not OpenBSD)
33 * want alignment in bytes. */
34 #if defined(__linux__) || defined(__FreeBSD__)
37 #define align_16byte 16
38 #define align_32byte 32
42 #define align_16byte 4
46 .global GNAME(foreign_function_call_active)
47 .global GNAME(all_threads)
50 /* From lower to higher-numbered addresses, the stack contains
51 * return address, arg 0, arg 1, arg 2 ...
52 * rax contains the address of the function to call
53 * Lisp expects return value in rax, whic is already consistent with C
54 * XXXX correct floating point handling is unimplemented so far
55 * Based on comments cleaned from x86-assem.S, we believe that
56 * Lisp is expecting us to preserve rsi, rdi, rsp (no idea about r8-15)
59 .align align_16byte,0x90
60 .global GNAME(call_into_c)
61 .type GNAME(call_into_c),@function
63 /* ABI requires that the direction flag be clear on function
67 push %rbp # Save old frame pointer.
68 mov %rsp,%rbp # Establish new frame.
70 push %rsi # args are going in here
83 .size GNAME(call_into_c), . - GNAME(call_into_c)
87 .global GNAME(call_into_lisp_first_time)
88 .type GNAME(call_into_lisp_first_time),@function
90 /* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
91 * the stack changes. We don't worry too much about saving registers
92 * here, because we never expect to return from the initial call to lisp
95 .align align_16byte,0x90
96 GNAME(call_into_lisp_first_time):
97 push %rbp # Save old frame pointer.
98 mov %rsp,%rbp # Establish new frame.
99 mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
100 mov GNAME(all_threads),%rax
101 mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp
102 /* don't think too hard about what happens if we get interrupted
104 add $THREAD_CONTROL_STACK_SIZE-16,%rsp
108 .global GNAME(call_into_lisp)
109 .type GNAME(call_into_lisp),@function
112 * amd64 calling convention: C expects that
113 * arguments go in rdi rsi rdx rcx r8 r9
114 * return values in rax rdx
115 * callee saves rbp rbx r12-15 if it uses them
118 .align align_16byte,0x90
119 GNAME(call_into_lisp):
120 push %rbp # Save old frame pointer.
121 mov %rsp,%rbp # Establish new frame.
123 /* FIXME x86 saves FPU state here */
124 push %rbx # these regs are callee-saved according to C
125 push %r12 # so must be preserved and restored when
126 push %r13 # the lisp function returns
130 mov %rsp,%rbx # remember current stack
131 push %rbx # Save entry stack on (maybe) new stack.
133 push %rdi # args from C
136 #ifdef LISP_FEATURE_SB_THREAD
138 call pthread_getspecific
142 pop %rbx # arg vector
143 pop %rax # function ptr/lexenv
145 xor %rdx,%rdx # clear any descriptor registers
146 xor %rdi,%rdi # that we can't be sure we'll
147 xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
148 shl $3,%rcx # (fixnumize num-args)
151 mov 0(%rbx),%rdx # arg0
154 mov 8(%rbx),%rdi # arg1
157 mov 16(%rbx),%rsi # arg2
159 /* Registers rax, rcx, rdx, rdi, and rsi are now live. */
160 xor %rbx,%rbx # available
162 /* Alloc new frame. */
163 mov %rsp,%rbx # The current sp marks start of new frame.
164 push %rbp # fp in save location S0
165 sub $16,%rsp # Ensure 3 slots are allocated, one above.
166 mov %rbx,%rbp # Switch to new frame.
169 call *CLOSURE_FUN_OFFSET(%rax)
171 /* If the function returned multiple values, it will return to
172 this point. Lose them */
174 /* A singled value function returns here */
176 /* Restore the stack, in case there was a stack change. */
186 /* ABI requires that the direction flag be clear on function
190 /* FIXME Restore the NPX state. */
192 /* return value is already in rax where lisp expects it */
195 .size GNAME(call_into_lisp), . - GNAME(call_into_lisp)
197 /* support for saving and restoring the NPX state from C */
199 .global GNAME(fpu_save)
200 .type GNAME(fpu_save),@function
204 fnsave (%rax) # Save the NPX state. (resets NPX)
206 .size GNAME(fpu_save),.-GNAME(fpu_save)
208 .global GNAME(fpu_restore)
209 .type GNAME(fpu_restore),@function
213 frstor (%rax) # Restore the NPX state.
215 .size GNAME(fpu_restore),.-GNAME(fpu_restore)
218 * the undefined-function trampoline
221 .align align_8byte,0x90
222 .global GNAME(undefined_tramp)
223 .type GNAME(undefined_tramp),@function
224 GNAME(undefined_tramp):
228 .byte UNDEFINED_FUN_ERROR
229 .byte sc_DescriptorReg # eax in the Descriptor-reg SC
231 .size GNAME(undefined_tramp), .-GNAME(undefined_tramp)
235 .align align_8byte,0x90
236 .global GNAME(alloc_tramp)
237 .type GNAME(alloc_tramp),@function
239 push %rbp # Save old frame pointer.
240 mov %rsp,%rbp # Establish new frame.
264 .size GNAME(alloc_tramp),.-GNAME(alloc_tramp)
268 * the closure trampoline
271 .align align_8byte,0x90
272 .global GNAME(closure_tramp)
273 .type GNAME(closure_tramp),@function
274 GNAME(closure_tramp):
275 mov FDEFN_FUN_OFFSET(%rax),%rax
276 /* FIXME: The '*' after "jmp" in the next line is from PVE's
277 * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
278 * reasonable, and it certainly seems as though if CMU CL needs it,
279 * SBCL needs it too, but I haven't actually verified that it's
280 * right. It would be good to find a way to force the flow of
281 * control through here to test it. */
282 jmp *CLOSURE_FUN_OFFSET(%rax)
283 .size GNAME(closure_tramp), .-GNAME(closure_tramp)
286 * fun-end breakpoint magic
289 .global GNAME(fun_end_breakpoint_guts)
291 GNAME(fun_end_breakpoint_guts):
292 /* Multiple Value return */
293 jmp multiple_value_return
294 /* the above jmp is only 2 bytes long, we need to add a nop for
295 * padding since the single value return convention jumps to original
296 * return address + 3 bytes */
298 /* Single value return: The eventual return will now use the
299 multiple values return convention but with a return values
301 mov %rsp,%rbx # Setup ebx - the ofp.
302 sub $8,%rsp # Allocate one stack slot for the return value
303 mov $8,%rcx # Setup ecx for one return value.
304 mov $NIL,%rdi # default second value
305 mov $NIL,%rsi # default third value
307 multiple_value_return:
309 .global GNAME(fun_end_breakpoint_trap)
310 GNAME(fun_end_breakpoint_trap):
312 .byte trap_FunEndBreakpoint
313 hlt # We should never return here.
315 .global GNAME(fun_end_breakpoint_end)
316 GNAME(fun_end_breakpoint_end):
319 .global GNAME(do_pending_interrupt)
320 .type GNAME(do_pending_interrupt),@function
321 .align align_8byte,0x90
322 GNAME(do_pending_interrupt):
324 .byte trap_PendingInterrupt
326 .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt)
328 .globl GNAME(post_signal_tramp)
329 .type GNAME(post_signal_tramp),@function
330 .align align_8byte,0x90
331 GNAME(post_signal_tramp):
332 /* this is notionally the second half of a function whose first half
333 * doesn't exist. This is where call_into_lisp returns when called
334 * using return_to_lisp_function */
345 /* skip RBP and RSP */
353 .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp)
356 .align align_8byte,0x90
357 .global GNAME(fast_bzero)
358 .type GNAME(fast_bzero),@function
361 /* A fast routine for zero-filling blocks of memory that are
362 * guaranteed to start and end at a 4096-byte aligned address.
364 shr $6, %rsi /* Amount of 64-byte blocks to copy */
365 jz Lend /* If none, stop */
366 mov %rsi, %rcx /* Save start address */
367 movups %xmm7, -16(%rsp) /* Save XMM register */
368 xorps %xmm7, %xmm7 /* Zero the XMM register */
373 /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
374 * non-caching double-quadword moving variant, i.e. the memory areas
375 * we're touching are not fetched into the L1 cache, since we're just
376 * going to overwrite the memory soon anyway.
378 movntdq %xmm7, 0(%rdi)
379 movntdq %xmm7, 16(%rdi)
380 movntdq %xmm7, 32(%rdi)
381 movntdq %xmm7, 48(%rdi)
383 add $64, %rdi /* Advance pointer */
384 dec %rsi /* Decrement 64-byte block count */
386 mfence /* Ensure that the writes are globally visible, since
387 * MOVNTDQ is weakly ordered */
388 movups -16(%rsp), %xmm7 /* Restore the XMM register */
389 prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
390 * since it's likely to be used immediately. */
393 .size GNAME(fast_bzero), .-GNAME(fast_bzero)