.global GNAME(call_into_c)
.type GNAME(call_into_c),@function
GNAME(call_into_c):
+ /* ABI requires that the direction flag be clear on function
+ * entry and exit. */
+ cld
+
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
mov %rsp,%rbp # Establish new frame.
Lstack:
/* FIXME x86 saves FPU state here */
- push %rbx
- push %r12
- push %r13
- push %r14
- push %r15
-
+ push %rbx # these regs are callee-saved according to C
+ push %r12 # so must be preserved and restored when
+ push %r13 # the lisp function returns
+ push %r14 #
+ push %r15 #
mov %rsp,%rbx # remember current stack
push %rbx # Save entry stack on (maybe) new stack.
- /* Establish Lisp args. */
- mov %rdi,%rax # lexenv?
- mov %rsi,%rbx # address of arg vec
- mov %rdx,%rcx # num args
+ push %rdi # args from C
+ push %rsi #
+ push %rdx #
+#ifdef LISP_FEATURE_SB_THREAD
+ mov specials,%rdi
+ call pthread_getspecific
+ mov %rax,%r12
+#endif
+ pop %rcx # num args
+ pop %rbx # arg vector
+ pop %rax # function ptr/lexenv
xor %rdx,%rdx # clear any descriptor registers
xor %rdi,%rdi # that we can't be sure we'll
/* If the function returned multiple values, it will return to
this point. Lose them */
+ jnc LsingleValue
mov %rbx, %rsp
- /* A singled value function returns here */
+LsingleValue:
/* Restore the stack, in case there was a stack change. */
pop %rsp # c-sp
pop %r12
pop %rbx
+ /* ABI requires that the direction flag be clear on function
+ * entry and exit. */
+ cld
+
/* FIXME Restore the NPX state. */
/* return value is already in rax where lisp expects it */
.align align_8byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
- jmp multiple_value_return
- /* the above jmp is only 2 bytes long, we need to add a nop for
- * padding since the single value return convention jumps to original
- * return address + 3 bytes */
- nop
+ jc multiple_value_return
/* Single value return: The eventual return will now use the
multiple values return convention but with a return values
count of one. */
popq %r8
popq %rdi
popq %rsi
- addq $8, %rsp
- popq %rsp
- popq %rdx
+ /* skip RBP and RSP */
popq %rbx
+ popq %rdx
popq %rcx
popq %rax
- popfl
+ popfq
leave
ret
.size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp)
\f
- .end
+ .text
+ .align align_8byte,0x90
+ .global GNAME(fast_bzero)
+ .type GNAME(fast_bzero),@function
+
+GNAME(fast_bzero):
+ /* A fast routine for zero-filling blocks of memory that are
+ * guaranteed to start and end at a 4096-byte aligned address.
+ */
+ shr $6, %rsi /* Amount of 64-byte blocks to copy */
+ jz Lend /* If none, stop */
+ mov %rsi, %rcx /* Save start address */
+ movups %xmm7, -16(%rsp) /* Save XMM register */
+ xorps %xmm7, %xmm7 /* Zero the XMM register */
+ jmp Lloop
+ .align 16
+Lloop:
+
+ /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
+ * non-caching double-quadword moving variant, i.e. the memory areas
+ * we're touching are not fetched into the L1 cache, since we're just
+ * going to overwrite the memory soon anyway.
+ */
+ movntdq %xmm7, 0(%rdi)
+ movntdq %xmm7, 16(%rdi)
+ movntdq %xmm7, 32(%rdi)
+ movntdq %xmm7, 48(%rdi)
+
+ add $64, %rdi /* Advance pointer */
+ dec %rsi /* Decrement 64-byte block count */
+ jnz Lloop
+ mfence /* Ensure that the writes are globally visible, since
+ * MOVNTDQ is weakly ordered */
+ movups -16(%rsp), %xmm7 /* Restore the XMM register */
+ prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
+ * since it's likely to be used immediately. */
+Lend:
+ ret
+ .size GNAME(fast_bzero), .-GNAME(fast_bzero)
+
+ .end