#include "genesis/thread.h"
/* Minimize conditionalization for different OS naming schemes. */
-#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun
+#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun || defined _WIN64
#define GNAME(var) var
#else
#define GNAME(var) _##var
/* Get the right type of alignment. Linux, FreeBSD and OpenBSD
* want alignment in bytes. */
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) || defined _WIN64
#define align_4byte 4
#define align_8byte 8
#define align_16byte 16
* return values in rax rdx
* callee saves rbp rbx r12-15 if it uses them
*/
-
+#ifdef LISP_FEATURE_WIN32
+# define SUPPORT_FOMIT_FRAME_POINTER
+#endif
.align align_16byte,0x90
GNAME(call_into_lisp):
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov %rbp,%rax
+#endif
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
Lstack:
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ /* If called through call_into_lisp_first_time, %r15 becomes invalid
+ * here, but we will not return in that case. */
+ push %r15
+ mov %rax,%r15
+#endif
/* FIXME x86 saves FPU state here */
push %rbx # these regs are callee-saved according to C
push %r12 # so must be preserved and restored when
push %rsi #
push %rdx #
#ifdef LISP_FEATURE_SB_THREAD
+# ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov (%rbp),%rcx
+ sub $32,%rsp
+ call GNAME(carry_frame_pointer)
+ add $32,%rsp
+ mov %rax,(%rbp)
+# endif
#ifdef LISP_FEATURE_GCC_TLS
movq %fs:0, %rax
movq GNAME(current_thread)@TPOFF(%rax), %r12
/* FIXME Restore the NPX state. */
mov %rdx,%rax # c-val
+#ifdef SUPPORT_FOMIT_FRAME_POINTER
+ mov %r15,%rbp # orig rbp
+ pop %r15 # orig r15
+ add $8,%rsp # no need for saved (overridden) rbp
+#else
leave
+#endif
ret
SIZE(GNAME(call_into_lisp))
\f
ret
SIZE(GNAME(undefined_tramp))
+ .text
+ .align align_16byte,0x90
+ .globl GNAME(undefined_alien_function)
+ TYPE(GNAME(undefined_alien_function))
+GNAME(undefined_alien_function):
+ pop 8(%rbp) # Save return PC for backtrace.
+ TRAP
+ .byte trap_Error
+ .byte 4
+ .byte UNDEFINED_ALIEN_FUN_ERROR
+ /* Encode RBX
+ FIXME: make independt of the encoding changes. */
+ .byte 0xFE
+ .byte 0x9F
+ .byte 0x01
+ ret
+ SIZE(GNAME(undefined_alien_function))
+
/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
* to know the name of the function immediately following the
* undefined-function trampoline. */
+/* Our call-site does not take care of caller-saved xmm registers, so it
+ * falls to us spill them beforing hopping into C.
+ *
+ * We simply save all of them.
+ *
+ * (But for the sake of completeness, here is my understanding of the specs:)
+ * System V Microsoft
+ * argument passing xmm0-7 xmm0-3
+ * caller-saved xmm8-15 xmm4-5
+ * callee-saved - xmm6-15
+ *
+ * --DFL */
+
+#define stkxmmsave(n) movaps %xmm##n, n*16(%rsp)
+#define stkxmmload(n) movaps n*16(%rsp), %xmm##n
+#define map_all_xmm(op) \
+ op(0);op(1);op(2);op(3);op(4);op(5);op(6);op(7); \
+ op(8);op(9);op(10);op(11);op(12);op(13);op(14);op(15);
+
.text
.align align_16byte,0x90
.globl GNAME(alloc_tramp)
TYPE(GNAME(alloc_tramp))
GNAME(alloc_tramp):
+ cld
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
+ and $-32,%rsp
+ sub $16*16,%rsp
+ map_all_xmm(stkxmmsave)
push %rax
push %rcx
push %rdx
push %r9
push %r10
push %r11
- mov 16(%rbp),%rdi
+ push %r11
+ mov 16(%rbp),%rdi
call GNAME(alloc)
mov %rax,16(%rbp)
pop %r11
+ pop %r11
pop %r10
pop %r9
pop %r8
pop %rdx
pop %rcx
pop %rax
+ map_all_xmm(stkxmmload)
+ mov %rbp,%rsp
pop %rbp
ret
SIZE(GNAME(alloc_tramp))
.align align_16byte,0x90
.globl GNAME(funcallable_instance_tramp)
#if !defined(LISP_FEATURE_DARWIN)
- .type GNAME(funcallable_instance_tramp),@function
+ TYPE(GNAME(funcallable_instance_tramp))
#endif
GNAME(funcallable_instance_tramp):
mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
* now, the first word of it contains the address to jump to. */
jmp *CLOSURE_FUN_OFFSET(%rax)
#if !defined(LISP_FEATURE_DARWIN)
- .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp)
+ SIZE(GNAME(funcallable_instance_tramp))
#endif
/*
* fun-end breakpoint magic
.align align_16byte,0x90
.globl GNAME(fast_bzero)
TYPE(GNAME(fast_bzero))
-
+
+ #ifdef LISP_FEATURE_WIN32
+ #define xmmreg xmm7
+ #define redsave(reg,off) movups reg,-off(%rsp)
+ #define redrestore(reg,off) movups -off(%rsp),reg
+ #else
+ #define xmmreg xmm0
+ #define redsave(reg,off)
+ #define redrestore(reg,off)
+ #endif
+
GNAME(fast_bzero):
/* A fast routine for zero-filling blocks of memory that are
* guaranteed to start and end at a 4096-byte aligned address.
shr $6, %rsi /* Amount of 64-byte blocks to copy */
jz Lend /* If none, stop */
mov %rsi, %rcx /* Save start address */
- movups %xmm7, -16(%rsp) /* Save XMM register */
- xorps %xmm7, %xmm7 /* Zero the XMM register */
+ redsave(%xmmreg,16)
+ xorps %xmmreg, %xmmreg /* Zero the XMM register */
jmp Lloop
- .align align_16byte
+ .align align_16byte
Lloop:
/* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
* we're touching are not fetched into the L1 cache, since we're just
* going to overwrite the memory soon anyway.
*/
- movntdq %xmm7, 0(%rdi)
- movntdq %xmm7, 16(%rdi)
- movntdq %xmm7, 32(%rdi)
- movntdq %xmm7, 48(%rdi)
+ movntdq %xmmreg, 0(%rdi)
+ movntdq %xmmreg, 16(%rdi)
+ movntdq %xmmreg, 32(%rdi)
+ movntdq %xmmreg, 48(%rdi)
add $64, %rdi /* Advance pointer */
dec %rsi /* Decrement 64-byte block count */
jnz Lloop
mfence /* Ensure that the writes are globally visible, since
* MOVNTDQ is weakly ordered */
- movups -16(%rsp), %xmm7 /* Restore the XMM register */
+ redrestore(%xmmreg,16)
prefetcht0 0(%rcx) /* Prefetch the start of the block into cache,
* since it's likely to be used immediately. */
Lend: