*/
\f
#define LANGUAGE_ASSEMBLY
+#include "genesis/config.h"
#include "validate.h"
#include "sbcl.h"
#include "genesis/closure.h"
+#include "genesis/funcallable-instance.h"
#include "genesis/fdefn.h"
#include "genesis/static-symbols.h"
#include "genesis/symbol.h"
#define align_16byte 4
#endif
+/*
+ * The assembler used for win32 doesn't like .type or .size directives,
+ * so we want to conditionally kill them out. So let's wrap them in macros
+ * that are defined to be no-ops on win32. Hopefully this still works on
+ * other platforms.
+ */
+#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
+#define TYPE(name) .type name,@function
+#define SIZE(name) .size name,.-name
+#define DOLLAR(name) $(name)
+#else
+#define TYPE(name)
+#define SIZE(name)
+#endif
+
+/*
+ * x86/darwin (as of MacOS X 10.4.5) doesn't reliably fire signal
+ * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
+ * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
+ * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
+ * for this instruction in the SIGILL handler and if we see it, we
+ * advance the EIP by two bytes to skip over ud2 instruction and
+ * call sigtrap_handler. */
+#if defined(LISP_FEATURE_DARWIN)
+#define TRAP ud2
+#else
+#define TRAP int3
+#endif
+
+/*
+ * More Apple assembler hacks
+ */
+
+#if defined(LISP_FEATURE_DARWIN)
+/* global symbol x86-64 sym(%rip) hack:*/
+#define GSYM(name) name(%rip)
+#define END()
+#else
+#define GSYM(name) $name
+#define END() .end
+#endif
+
+
.text
- .global GNAME(foreign_function_call_active)
- .global GNAME(all_threads)
+ .globl GNAME(all_threads)
+
\f
/* From lower to higher-numbered addresses, the stack contains
*/
.text
.align align_16byte,0x90
- .global GNAME(call_into_c)
- .type GNAME(call_into_c),@function
+ .globl GNAME(call_into_c)
+ TYPE(GNAME(call_into_c))
GNAME(call_into_c):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
mov %rbp,%rsp
pop %rbp
ret
- .size GNAME(call_into_c), . - GNAME(call_into_c)
+ SIZE(GNAME(call_into_c))
\f
.text
- .global GNAME(call_into_lisp_first_time)
- .type GNAME(call_into_lisp_first_time),@function
+ .globl GNAME(call_into_lisp_first_time)
+ TYPE(GNAME(call_into_lisp_first_time))
/* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
* the stack changes. We don't worry too much about saving registers
GNAME(call_into_lisp_first_time):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
- mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
- mov GNAME(all_threads),%rax
- mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp
- /* don't think too hard about what happens if we get interrupted
- * here */
- add $THREAD_CONTROL_STACK_SIZE-16,%rsp
+ mov %rsp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
+#if defined(LISP_FEATURE_DARWIN)
+ movq GSYM(GNAME(all_threads)),%rax
+#else
+ movq GNAME(all_threads),%rax
+#endif
+ mov THREAD_CONTROL_STACK_END_OFFSET(%rax) ,%rsp
jmp Lstack
\f
.text
- .global GNAME(call_into_lisp)
- .type GNAME(call_into_lisp),@function
+ .globl GNAME(call_into_lisp)
+ TYPE(GNAME(call_into_lisp))
/*
* amd64 calling convention: C expects that
mov %rsp,%rbp # Establish new frame.
Lstack:
/* FIXME x86 saves FPU state here */
- push %rbx
- push %r12
- push %r13
- push %r14
- push %r15
-
+ push %rbx # these regs are callee-saved according to C
+ push %r12 # so must be preserved and restored when
+ push %r13 # the lisp function returns
+ push %r14 #
+ push %r15 #
mov %rsp,%rbx # remember current stack
push %rbx # Save entry stack on (maybe) new stack.
- /* Establish Lisp args. */
- mov %rdi,%rax # lexenv?
- mov %rsi,%rbx # address of arg vec
- mov %rdx,%rcx # num args
+ push %rdi # args from C
+ push %rsi #
+ push %rdx #
+#ifdef LISP_FEATURE_SB_THREAD
+#ifdef LISP_FEATURE_GCC_TLS
+ movq %fs:0, %rax
+ movq GNAME(current_thread)@TPOFF(%rax), %r12
+#else
+#ifdef LISP_FEATURE_DARWIN
+ mov GSYM(GNAME(specials)),%rdi
+#else
+ mov specials,%rdi
+#endif
+ call GNAME(pthread_getspecific)
+ mov %rax,%r12
+#endif
+#endif
+ pop %rcx # num args
+ pop %rbx # arg vector
+ pop %rax # function ptr/lexenv
xor %rdx,%rdx # clear any descriptor registers
xor %rdi,%rdi # that we can't be sure we'll
/* If the function returned multiple values, it will return to
this point. Lose them */
+ jnc LsingleValue
mov %rbx, %rsp
- /* A singled value function returns here */
+LsingleValue:
/* Restore the stack, in case there was a stack change. */
pop %rsp # c-sp
/* return value is already in rax where lisp expects it */
leave
ret
- .size GNAME(call_into_lisp), . - GNAME(call_into_lisp)
+ SIZE(GNAME(call_into_lisp))
\f
/* support for saving and restoring the NPX state from C */
.text
- .global GNAME(fpu_save)
- .type GNAME(fpu_save),@function
+ .globl GNAME(fpu_save)
+ TYPE(GNAME(fpu_save))
.align 2,0x90
GNAME(fpu_save):
mov 4(%rsp),%rax
fnsave (%rax) # Save the NPX state. (resets NPX)
ret
- .size GNAME(fpu_save),.-GNAME(fpu_save)
+ SIZE(GNAME(fpu_save))
- .global GNAME(fpu_restore)
- .type GNAME(fpu_restore),@function
+ .globl GNAME(fpu_restore)
+ TYPE(GNAME(fpu_restore))
.align 2,0x90
GNAME(fpu_restore):
mov 4(%rsp),%rax
frstor (%rax) # Restore the NPX state.
ret
- .size GNAME(fpu_restore),.-GNAME(fpu_restore)
+ SIZE(GNAME(fpu_restore))
\f
/*
* the undefined-function trampoline
*/
.text
- .align align_8byte,0x90
- .global GNAME(undefined_tramp)
- .type GNAME(undefined_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(undefined_tramp)
+ TYPE(GNAME(undefined_tramp))
GNAME(undefined_tramp):
- int3
+ TRAP
.byte trap_Error
.byte 2
.byte UNDEFINED_FUN_ERROR
.byte sc_DescriptorReg # eax in the Descriptor-reg SC
ret
- .size GNAME(undefined_tramp), .-GNAME(undefined_tramp)
+ SIZE(GNAME(undefined_tramp))
.text
- .align align_8byte,0x90
- .global GNAME(alloc_tramp)
- .type GNAME(alloc_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(alloc_tramp)
+ TYPE(GNAME(alloc_tramp))
GNAME(alloc_tramp):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
push %r10
push %r11
mov 16(%rbp),%rdi
- call alloc
+ call GNAME(alloc)
mov %rax,16(%rbp)
pop %r11
pop %r10
pop %rax
pop %rbp
ret
- .size GNAME(alloc_tramp),.-GNAME(alloc_tramp)
+ SIZE(GNAME(alloc_tramp))
/*
* the closure trampoline
*/
.text
- .align align_8byte,0x90
- .global GNAME(closure_tramp)
- .type GNAME(closure_tramp),@function
+ .align align_16byte,0x90
+ .globl GNAME(closure_tramp)
+ TYPE(GNAME(closure_tramp))
GNAME(closure_tramp):
mov FDEFN_FUN_OFFSET(%rax),%rax
/* FIXME: The '*' after "jmp" in the next line is from PVE's
* right. It would be good to find a way to force the flow of
* control through here to test it. */
jmp *CLOSURE_FUN_OFFSET(%rax)
- .size GNAME(closure_tramp), .-GNAME(closure_tramp)
+ SIZE(GNAME(closure_tramp))
+ .text
+ .align align_16byte,0x90
+ .globl GNAME(funcallable_instance_tramp)
+#if !defined(LISP_FEATURE_DARWIN)
+ .type GNAME(funcallable_instance_tramp),@function
+#endif
+ GNAME(funcallable_instance_tramp):
+ mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
+ /* KLUDGE: on this platform, whatever kind of function is in %rax
+ * now, the first word of it contains the address to jump to. */
+ jmp *CLOSURE_FUN_OFFSET(%rax)
+#if !defined(LISP_FEATURE_DARWIN)
+ .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp)
+#endif
/*
* fun-end breakpoint magic
*/
.text
- .global GNAME(fun_end_breakpoint_guts)
- .align align_8byte
+ .globl GNAME(fun_end_breakpoint_guts)
+ .align align_16byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
- jmp multiple_value_return
- /* the above jmp is only 2 bytes long, we need to add a nop for
- * padding since the single value return convention jumps to original
- * return address + 3 bytes */
- nop
+ jc multiple_value_return
/* Single value return: The eventual return will now use the
multiple values return convention but with a return values
count of one. */
mov %rsp,%rbx # Setup ebx - the ofp.
sub $8,%rsp # Allocate one stack slot for the return value
mov $8,%rcx # Setup ecx for one return value.
+#if defined(LISP_FEATURE_DARWIN)
+ mov GSYM(NIL),%rdi # default second value
+ mov GSYM(NIL),%rsi # default third value
+#else
mov $NIL,%rdi # default second value
mov $NIL,%rsi # default third value
-
+#endif
multiple_value_return:
- .global GNAME(fun_end_breakpoint_trap)
+ .globl GNAME(fun_end_breakpoint_trap)
GNAME(fun_end_breakpoint_trap):
- int3
+ TRAP
.byte trap_FunEndBreakpoint
hlt # We should never return here.
- .global GNAME(fun_end_breakpoint_end)
+ .globl GNAME(fun_end_breakpoint_end)
GNAME(fun_end_breakpoint_end):
\f
- .global GNAME(do_pending_interrupt)
- .type GNAME(do_pending_interrupt),@function
- .align align_8byte,0x90
+ .globl GNAME(do_pending_interrupt)
+ TYPE(GNAME(do_pending_interrupt))
+ .align align_16byte,0x90
GNAME(do_pending_interrupt):
- int3
+ TRAP
.byte trap_PendingInterrupt
ret
- .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt)
+ SIZE(GNAME(do_pending_interrupt))
\f
.globl GNAME(post_signal_tramp)
- .type GNAME(post_signal_tramp),@function
- .align align_8byte,0x90
+ TYPE(GNAME(post_signal_tramp))
+ .align align_16byte,0x90
GNAME(post_signal_tramp):
/* this is notionally the second half of a function whose first half
* doesn't exist. This is where call_into_lisp returns when called
popq %r8
popq %rdi
popq %rsi
- addq $8, %rsp
- popq %rsp
- popq %rdx
+ /* skip RBP and RSP */
popq %rbx
+ popq %rdx
popq %rcx
popq %rax
- popfl
+ popfq
leave
ret
- .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp)
+ SIZE(GNAME(post_signal_tramp))
\f
- .end
+ .text
+ .align align_16byte,0x90
+ .globl GNAME(fast_bzero)
+ TYPE(GNAME(fast_bzero))
+
+GNAME(fast_bzero):
+ /* A fast routine for zero-filling blocks of memory that are
+ * guaranteed to start and end at a 4096-byte aligned address.
+ */
+ shr $6, %rsi /* Amount of 64-byte blocks to copy */
+ jz Lend /* If none, stop */
+ mov %rsi, %rcx /* Save start address */
+ movups %xmm7, -16(%rsp) /* Save XMM register */
+ xorps %xmm7, %xmm7 /* Zero the XMM register */
+ jmp Lloop
+ .align align_16byte
+Lloop:
+
+ /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
+ * non-caching double-quadword moving variant, i.e. the memory areas
+ * we're touching are not fetched into the L1 cache, since we're just
+ * going to overwrite the memory soon anyway.
+ */
+ movntdq %xmm7, 0(%rdi)
+ movntdq %xmm7, 16(%rdi)
+ movntdq %xmm7, 32(%rdi)
+ movntdq %xmm7, 48(%rdi)
+
+ add $64, %rdi /* Advance pointer */
+ dec %rsi /* Decrement 64-byte block count */
+ jnz Lloop
+ mfence /* Ensure that the writes are globally visible, since
+ * MOVNTDQ is weakly ordered */
+ movups -16(%rsp), %xmm7 /* Restore the XMM register */
+ prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
+ * since it's likely to be used immediately. */
+Lend:
+ ret
+ SIZE(GNAME(fast_bzero))
+
+ END()