*/
\f
#define LANGUAGE_ASSEMBLY
+#include "genesis/config.h"
#include "validate.h"
#include "sbcl.h"
#include "genesis/closure.h"
#include "genesis/thread.h"
/* Minimize conditionalization for different OS naming schemes. */
-#if defined __linux__ || defined __FreeBSD__ /* (but *not* OpenBSD) */
+#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun || defined _WIN64
#define GNAME(var) var
#else
#define GNAME(var) _##var
#endif
-/* Get the right type of alignment. Linux and FreeBSD (but not OpenBSD)
+/* Get the right type of alignment. Linux, FreeBSD and OpenBSD
* want alignment in bytes. */
-#if defined(__linux__) || defined(__FreeBSD__)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) || defined _WIN64
#define align_4byte 4
#define align_8byte 8
#define align_16byte 16
#define align_32byte 32
+#define align_page 32768
#else
#define align_4byte 2
#define align_8byte 3
#define align_16byte 4
+#define align_page 15
#endif
+/*
+ * The assembler used for win32 doesn't like .type or .size directives,
+ * so we want to conditionally kill them out. So let's wrap them in macros
+ * that are defined to be no-ops on win32. Hopefully this still works on
+ * other platforms.
+ */
+#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
+#define TYPE(name) .type name,@function
+#define SIZE(name) .size name,.-name
+#define DOLLAR(name) $(name)
+#else
+#define TYPE(name)
+#define SIZE(name)
+#endif
+
+/*
+ * x86/darwin (as of MacOS X 10.4.5) doesn't reliably fire signal
+ * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
+ * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
+ * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
+ * for this instruction in the SIGILL handler and if we see it, we
+ * advance the EIP by two bytes to skip over ud2 instruction and
+ * call sigtrap_handler. */
+#if defined(LISP_FEATURE_UD2_BREAKPOINTS)
+#define TRAP ud2
+#else
+#define TRAP int3
+#endif
+
+/*
+ * More Apple assembler hacks
+ */
+
+#if defined(LISP_FEATURE_DARWIN)
+/* global symbol x86-64 sym(%rip) hack:*/
+#define GSYM(name) name(%rip)
+#define END()
+#else
+#define GSYM(name) $name
+#define END() .end
+#endif
+
+
.text
- .global GNAME(foreign_function_call_active)
- .global GNAME(all_threads)
+ .globl GNAME(all_threads)
+
\f
/* From lower to higher-numbered addresses, the stack contains
*/
.text
.align align_16byte,0x90
- .global GNAME(call_into_c)
- .type GNAME(call_into_c),@function
+ .globl GNAME(call_into_c)
+ TYPE(GNAME(call_into_c))
GNAME(call_into_c):
- /* ABI requires that the direction flag be clear on function
- * entry and exit. */
- cld
-
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
mov %rbp,%rsp
pop %rbp
ret
- .size GNAME(call_into_c), . - GNAME(call_into_c)
+ SIZE(GNAME(call_into_c))
\f
.text
- .global GNAME(call_into_lisp_first_time)
- .type GNAME(call_into_lisp_first_time),@function
+ .globl GNAME(call_into_lisp_first_time)
+ TYPE(GNAME(call_into_lisp_first_time))
-/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
- * the stack changes. We don't worry too much about saving registers
+/* We don't worry too much about saving registers
* here, because we never expect to return from the initial call to lisp
* anyway */
GNAME(call_into_lisp_first_time):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
- mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
- mov GNAME(all_threads),%rax
- mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp
- /* don't think too hard about what happens if we get interrupted
- * here */
- add $THREAD_CONTROL_STACK_SIZE-16,%rsp
+#if defined(LISP_FEATURE_DARWIN)
+ movq GSYM(GNAME(all_threads)),%rax
+#else
+ movq GNAME(all_threads),%rax
+#endif
+ mov THREAD_CONTROL_STACK_END_OFFSET(%rax) ,%rsp
jmp Lstack
\f
.text
- .global GNAME(call_into_lisp)
- .type GNAME(call_into_lisp),@function
+ .globl GNAME(call_into_lisp)
+ TYPE(GNAME(call_into_lisp))
/*
* amd64 calling convention: C expects that
* return values in rax rdx
* callee saves rbp rbx r12-15 if it uses them
*/
-
+#ifdef LISP_FEATURE_WIN32
+# define SUPPORT_FOMIT_FRAME_POINTER
+#endif
.align align_16byte,0x90
GNAME(call_into_lisp):
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov %rbp,%rax
+#endif
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
Lstack:
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ /* If called through call_into_lisp_first_time, %r15 becomes invalid
+ * here, but we will not return in that case. */
+ push %r15
+ mov %rax,%r15
+#endif
/* FIXME x86 saves FPU state here */
push %rbx # these regs are callee-saved according to C
push %r12 # so must be preserved and restored when
push %rsi #
push %rdx #
#ifdef LISP_FEATURE_SB_THREAD
+# ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov (%rbp),%rcx
+ sub $32,%rsp
+ call GNAME(carry_frame_pointer)
+ add $32,%rsp
+ mov %rax,(%rbp)
+# endif
+#ifdef LISP_FEATURE_GCC_TLS
+ movq %fs:0, %rax
+ movq GNAME(current_thread)@TPOFF(%rax), %r12
+#else
+#ifdef LISP_FEATURE_DARWIN
+ mov GSYM(GNAME(specials)),%rdi
+#else
mov specials,%rdi
- call pthread_getspecific
+#endif
+ call GNAME(pthread_getspecific)
mov %rax,%r12
#endif
+#endif
pop %rcx # num args
pop %rbx # arg vector
pop %rax # function ptr/lexenv
xor %rdx,%rdx # clear any descriptor registers
xor %rdi,%rdi # that we can't be sure we'll
xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
- shl $3,%rcx # (fixnumize num-args)
+ shl $(N_FIXNUM_TAG_BITS),%rcx # (fixnumize num-args)
cmp $0,%rcx
je Ldone
mov 0(%rbx),%rdx # arg0
xor %rbx,%rbx # available
/* Alloc new frame. */
- mov %rsp,%rbx # The current sp marks start of new frame.
- push %rbp # fp in save location S0
- sub $16,%rsp # Ensure 3 slots are allocated, one above.
- mov %rbx,%rbp # Switch to new frame.
+ push %rbp # Dummy for return address
+ push %rbp # fp in save location S1
+ mov %rsp,%rbp # The current sp marks start of new frame.
+ sub $8,%rsp # Ensure 3 slots are allocated, two above.
Lcall:
call *CLOSURE_FUN_OFFSET(%rax)
pop %r12
pop %rbx
- /* ABI requires that the direction flag be clear on function
- * entry and exit. */
- cld
-
/* FIXME Restore the NPX state. */
- /* return value is already in rax where lisp expects it */
+ mov %rdx,%rax # c-val
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov %r15,%rbp # orig rbp
+ pop %r15 # orig r15
+ add $8,%rsp # no need for saved (overridden) rbp
+#else
leave
+#endif
ret
- .size GNAME(call_into_lisp), . - GNAME(call_into_lisp)
+ SIZE(GNAME(call_into_lisp))
\f
/* support for saving and restoring the NPX state from C */
.text
- .global GNAME(fpu_save)
- .type GNAME(fpu_save),@function
- .align 2,0x90
+ .globl GNAME(fpu_save)
+ TYPE(GNAME(fpu_save))
+ .align align_16byte,0x90
GNAME(fpu_save):
- mov 4(%rsp),%rax
- fnsave (%rax) # Save the NPX state. (resets NPX)
+ fnsave (%rdi) # Save the NPX state. (resets NPX)
ret
- .size GNAME(fpu_save),.-GNAME(fpu_save)
+ SIZE(GNAME(fpu_save))
- .global GNAME(fpu_restore)
- .type GNAME(fpu_restore),@function
- .align 2,0x90
+ .globl GNAME(fpu_restore)
+ TYPE(GNAME(fpu_restore))
+ .align align_16byte,0x90
GNAME(fpu_restore):
- mov 4(%rsp),%rax
- frstor (%rax) # Restore the NPX state.
+ frstor (%rdi) # Restore the NPX state.
ret
- .size GNAME(fpu_restore),.-GNAME(fpu_restore)
+ SIZE(GNAME(fpu_restore))
\f
/*
* the undefined-function trampoline
*/
.text
- .align align_8byte,0x90
- .global GNAME(undefined_tramp)
- .type GNAME(undefined_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(undefined_tramp)
+ TYPE(GNAME(undefined_tramp))
GNAME(undefined_tramp):
- int3
+ pop 8(%rbp) # Save return PC for backtrace.
+ TRAP
.byte trap_Error
.byte 2
.byte UNDEFINED_FUN_ERROR
.byte sc_DescriptorReg # eax in the Descriptor-reg SC
ret
- .size GNAME(undefined_tramp), .-GNAME(undefined_tramp)
+ SIZE(GNAME(undefined_tramp))
+
+ .text
+ .align align_16byte,0x90
+ .globl GNAME(undefined_alien_function)
+ TYPE(GNAME(undefined_alien_function))
+GNAME(undefined_alien_function):
+ pop 8(%rbp) # Save return PC for backtrace.
+ TRAP
+ .byte trap_Error
+ .byte 4
+ .byte UNDEFINED_ALIEN_FUN_ERROR
+ /* Encode RBX
+ FIXME: make independt of the encoding changes. */
+ .byte 0xFE
+ .byte 0x9F
+ .byte 0x01
+ ret
+ SIZE(GNAME(undefined_alien_function))
+
+/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
+ * to know the name of the function immediately following the
+ * undefined-function trampoline. */
+
+/* Our call-site does not take care of caller-saved xmm registers, so it
+ * falls to us spill them beforing hopping into C.
+ *
+ * We simply save all of them.
+ *
+ * (But for the sake of completeness, here is my understanding of the specs:)
+ * System V Microsoft
+ * argument passing xmm0-7 xmm0-3
+ * caller-saved xmm8-15 xmm4-5
+ * callee-saved - xmm6-15
+ *
+ * --DFL */
+#define stkxmmsave(n) movaps %xmm##n, n*16(%rsp)
+#define stkxmmload(n) movaps n*16(%rsp), %xmm##n
+#define map_all_xmm(op) \
+ op(0);op(1);op(2);op(3);op(4);op(5);op(6);op(7); \
+ op(8);op(9);op(10);op(11);op(12);op(13);op(14);op(15);
.text
- .align align_8byte,0x90
- .global GNAME(alloc_tramp)
- .type GNAME(alloc_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(alloc_tramp)
+ TYPE(GNAME(alloc_tramp))
GNAME(alloc_tramp):
+ cld
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
+ and $-32,%rsp
+ sub $16*16,%rsp
+ map_all_xmm(stkxmmsave)
push %rax
push %rcx
push %rdx
push %r9
push %r10
push %r11
- mov 16(%rbp),%rdi
- call alloc
+ push %r11
+ mov 16(%rbp),%rdi
+ call GNAME(alloc)
mov %rax,16(%rbp)
pop %r11
+ pop %r11
pop %r10
pop %r9
pop %r8
pop %rdx
pop %rcx
pop %rax
+ map_all_xmm(stkxmmload)
+ mov %rbp,%rsp
pop %rbp
ret
- .size GNAME(alloc_tramp),.-GNAME(alloc_tramp)
+ SIZE(GNAME(alloc_tramp))
/*
* the closure trampoline
*/
.text
- .align align_8byte,0x90
- .global GNAME(closure_tramp)
- .type GNAME(closure_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(closure_tramp)
+ TYPE(GNAME(closure_tramp))
GNAME(closure_tramp):
mov FDEFN_FUN_OFFSET(%rax),%rax
/* FIXME: The '*' after "jmp" in the next line is from PVE's
* right. It would be good to find a way to force the flow of
* control through here to test it. */
jmp *CLOSURE_FUN_OFFSET(%rax)
- .size GNAME(closure_tramp), .-GNAME(closure_tramp)
+ SIZE(GNAME(closure_tramp))
.text
- .align align_8byte,0x90
- .global GNAME(funcallable_instance_tramp)
- .type GNAME(funcallable_instance_tramp),@function
-GNAME(funcallable_instance_tramp):
+ .align align_16byte,0x90
+ .globl GNAME(funcallable_instance_tramp)
+#if !defined(LISP_FEATURE_DARWIN)
+ TYPE(GNAME(funcallable_instance_tramp))
+#endif
+ GNAME(funcallable_instance_tramp):
mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
/* KLUDGE: on this platform, whatever kind of function is in %rax
* now, the first word of it contains the address to jump to. */
jmp *CLOSURE_FUN_OFFSET(%rax)
- .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp)
-
+#if !defined(LISP_FEATURE_DARWIN)
+ SIZE(GNAME(funcallable_instance_tramp))
+#endif
/*
* fun-end breakpoint magic
*/
+
+/*
+ * For an explanation of the magic involved in function-end
+ * breakpoints, see the implementation in ppc-assem.S.
+ */
+
.text
- .global GNAME(fun_end_breakpoint_guts)
- .align align_8byte
+ .globl GNAME(fun_end_breakpoint_guts)
+ .align align_16byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
jc multiple_value_return
mov %rsp,%rbx # Setup ebx - the ofp.
sub $8,%rsp # Allocate one stack slot for the return value
mov $8,%rcx # Setup ecx for one return value.
+#if defined(LISP_FEATURE_DARWIN)
+ mov GSYM(NIL),%rdi # default second value
+ mov GSYM(NIL),%rsi # default third value
+#else
mov $NIL,%rdi # default second value
mov $NIL,%rsi # default third value
-
+#endif
multiple_value_return:
- .global GNAME(fun_end_breakpoint_trap)
+ .globl GNAME(fun_end_breakpoint_trap)
+ .align align_16byte,0x90
GNAME(fun_end_breakpoint_trap):
- int3
+ TRAP
.byte trap_FunEndBreakpoint
hlt # We should never return here.
- .global GNAME(fun_end_breakpoint_end)
+ .globl GNAME(fun_end_breakpoint_end)
GNAME(fun_end_breakpoint_end):
\f
- .global GNAME(do_pending_interrupt)
- .type GNAME(do_pending_interrupt),@function
- .align align_8byte,0x90
+ .globl GNAME(do_pending_interrupt)
+ TYPE(GNAME(do_pending_interrupt))
+ .align align_16byte,0x90
GNAME(do_pending_interrupt):
- int3
+ TRAP
.byte trap_PendingInterrupt
ret
- .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt)
+ SIZE(GNAME(do_pending_interrupt))
\f
.globl GNAME(post_signal_tramp)
- .type GNAME(post_signal_tramp),@function
- .align align_8byte,0x90
+ TYPE(GNAME(post_signal_tramp))
+ .align align_16byte,0x90
GNAME(post_signal_tramp):
/* this is notionally the second half of a function whose first half
* doesn't exist. This is where call_into_lisp returns when called
popfq
leave
ret
- .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp)
+ SIZE(GNAME(post_signal_tramp))
\f
.text
- .align align_8byte,0x90
- .global GNAME(fast_bzero)
- .type GNAME(fast_bzero),@function
-
+ .align align_16byte,0x90
+ .globl GNAME(fast_bzero)
+ TYPE(GNAME(fast_bzero))
+
+ #ifdef LISP_FEATURE_WIN32
+ #define xmmreg xmm7
+ #define redsave(reg,off) movups reg,-off(%rsp)
+ #define redrestore(reg,off) movups -off(%rsp),reg
+ #else
+ #define xmmreg xmm0
+ #define redsave(reg,off)
+ #define redrestore(reg,off)
+ #endif
+
GNAME(fast_bzero):
/* A fast routine for zero-filling blocks of memory that are
* guaranteed to start and end at a 4096-byte aligned address.
shr $6, %rsi /* Amount of 64-byte blocks to copy */
jz Lend /* If none, stop */
mov %rsi, %rcx /* Save start address */
- movups %xmm7, -16(%rsp) /* Save XMM register */
- xorps %xmm7, %xmm7 /* Zero the XMM register */
+ redsave(%xmmreg,16)
+ xorps %xmmreg, %xmmreg /* Zero the XMM register */
jmp Lloop
- .align 16
+ .align align_16byte
Lloop:
/* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
* we're touching are not fetched into the L1 cache, since we're just
* going to overwrite the memory soon anyway.
*/
- movntdq %xmm7, 0(%rdi)
- movntdq %xmm7, 16(%rdi)
- movntdq %xmm7, 32(%rdi)
- movntdq %xmm7, 48(%rdi)
+ movntdq %xmmreg, 0(%rdi)
+ movntdq %xmmreg, 16(%rdi)
+ movntdq %xmmreg, 32(%rdi)
+ movntdq %xmmreg, 48(%rdi)
add $64, %rdi /* Advance pointer */
dec %rsi /* Decrement 64-byte block count */
jnz Lloop
mfence /* Ensure that the writes are globally visible, since
* MOVNTDQ is weakly ordered */
- movups -16(%rsp), %xmm7 /* Restore the XMM register */
+ redrestore(%xmmreg,16)
prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
* since it's likely to be used immediately. */
Lend:
ret
- .size GNAME(fast_bzero), .-GNAME(fast_bzero)
+ SIZE(GNAME(fast_bzero))
- .end
+\f
+/* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub
+ * the control stack from C, largely due to not knowing where the
+ * active stack frame ends. On such platforms, we reimplement the
+ * core scrubbing logic in assembly, in this case here:
+ */
+ .text
+ .align align_16byte,0x90
+ .globl GNAME(arch_scrub_control_stack)
+ TYPE(GNAME(arch_scrub_control_stack))
+GNAME(arch_scrub_control_stack):
+ /* We are passed three parameters:
+ * A (struct thread *) in RDI,
+ * the address of the guard page in RSI, and
+ * the address of the hard guard page in RDX.
+ * We may trash RAX, RCX, and R8-R11 with impunity.
+ * [RSP] is our return address, [RSP-8] is the first
+ * stack slot to scrub. */
+
+ /* We start by setting up our scrub pointer in RAX, our
+ * guard page upper bound in R8, and our hard guard
+ * page upper bound in R9. */
+ lea -8(%rsp), %rax
+#ifdef LISP_FEATURE_DARWIN
+ mov GSYM(GNAME(os_vm_page_size)),%r9
+#else
+ mov os_vm_page_size,%r9
+#endif
+ lea (%rsi,%r9), %r8
+ lea (%rdx,%r9), %r9
+
+ /* Now we begin our main scrub loop. */
+ascs_outer_loop:
+
+ /* If we're about to scrub the hard guard page, exit. */
+ cmp %r9, %rax
+ jae ascs_check_guard_page
+ cmp %rax, %rdx
+ jbe ascs_finished
+
+ascs_check_guard_page:
+ /* If we're about to scrub the guard page, and the guard
+ * page is protected, exit. */
+ cmp %r8, %rax
+ jae ascs_clear_loop
+ cmp %rax, %rsi
+ ja ascs_clear_loop
+ cmpq $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%rdi)
+ jne ascs_finished
+
+ /* Clear memory backwards to the start of the (4KiB) page */
+ascs_clear_loop:
+ movq $0, (%rax)
+ test $0xfff, %rax
+ lea -8(%rax), %rax
+ jnz ascs_clear_loop
+
+ /* If we're about to hit the hard guard page, exit. */
+ cmp %r9, %rax
+ jae ascs_finished
+
+ /* If the next (previous?) 4KiB page contains a non-zero
+ * word, continue scrubbing. */
+ascs_check_loop:
+ testq $-1, (%rax)
+ jnz ascs_outer_loop
+ test $0xfff, %rax
+ lea -8(%rax), %rax
+ jnz ascs_check_loop
+
+ascs_finished:
+ ret
+ SIZE(GNAME(arch_scrub_control_stack))
+\f
+ END()