X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-64-assem.S;h=d8084769be9adf2f06904184354b23a5851cb926;hb=0e3c4b4db102bd204a30402d7e5a0de44aea57ce;hp=053167e076520dcb3b02a1e691ad06d9b2ae59c6;hpb=3ea89bab9e4600ba80b6bc931481100fca74fa9d;p=sbcl.git diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index 053167e..d808476 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -25,7 +25,7 @@ #include "genesis/thread.h" /* Minimize conditionalization for different OS naming schemes. */ -#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun +#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun || defined _WIN64 #define GNAME(var) var #else #define GNAME(var) _##var @@ -33,7 +33,7 @@ /* Get the right type of alignment. Linux, FreeBSD and OpenBSD * want alignment in bytes. */ -#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) || defined _WIN64 #define align_4byte 4 #define align_8byte 8 #define align_16byte 16 @@ -156,12 +156,23 @@ GNAME(call_into_lisp_first_time): * return values in rax rdx * callee saves rbp rbx r12-15 if it uses them */ - +#ifdef LISP_FEATURE_WIN32 +# define SUPPORT_FOMIT_FRAME_POINTER +#endif .align align_16byte,0x90 GNAME(call_into_lisp): +#ifdef SUPPORT_FOMIT_FRAME_POINTER + mov %rbp,%rax +#endif push %rbp # Save old frame pointer. mov %rsp,%rbp # Establish new frame. Lstack: +#ifdef SUPPORT_FOMIT_FRAME_POINTER + /* If called through call_into_lisp_first_time, %r15 becomes invalid + * here, but we will not return in that case. */ + push %r15 + mov %rax,%r15 +#endif /* FIXME x86 saves FPU state here */ push %rbx # these regs are callee-saved according to C push %r12 # so must be preserved and restored when @@ -176,6 +187,13 @@ Lstack: push %rsi # push %rdx # #ifdef LISP_FEATURE_SB_THREAD +# ifdef SUPPORT_FOMIT_FRAME_POINTER + mov (%rbp),%rcx + sub $32,%rsp + call GNAME(carry_frame_pointer) + add $32,%rsp + mov %rax,(%rbp) +# endif #ifdef LISP_FEATURE_GCC_TLS movq %fs:0, %rax movq GNAME(current_thread)@TPOFF(%rax), %r12 @@ -238,7 +256,13 @@ LsingleValue: /* FIXME Restore the NPX state. */ mov %rdx,%rax # c-val +#ifdef SUPPORT_FOMIT_FRAME_POINTER + mov %r15,%rbp # orig rbp + pop %r15 # orig r15 + add $8,%rsp # no need for saved (overridden) rbp +#else leave +#endif ret SIZE(GNAME(call_into_lisp)) @@ -277,17 +301,58 @@ GNAME(undefined_tramp): ret SIZE(GNAME(undefined_tramp)) + .text + .align align_16byte,0x90 + .globl GNAME(undefined_alien_function) + TYPE(GNAME(undefined_alien_function)) +GNAME(undefined_alien_function): + pop 8(%rbp) # Save return PC for backtrace. + TRAP + .byte trap_Error + .byte 4 + .byte UNDEFINED_ALIEN_FUN_ERROR + /* Encode RBX + FIXME: make independt of the encoding changes. */ + .byte 0xFE + .byte 0x9F + .byte 0x01 + ret + SIZE(GNAME(undefined_alien_function)) + /* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs * to know the name of the function immediately following the * undefined-function trampoline. */ +/* Our call-site does not take care of caller-saved xmm registers, so it + * falls to us spill them beforing hopping into C. + * + * We simply save all of them. + * + * (But for the sake of completeness, here is my understanding of the specs:) + * System V Microsoft + * argument passing xmm0-7 xmm0-3 + * caller-saved xmm8-15 xmm4-5 + * callee-saved - xmm6-15 + * + * --DFL */ + +#define stkxmmsave(n) movaps %xmm##n, n*16(%rsp) +#define stkxmmload(n) movaps n*16(%rsp), %xmm##n +#define map_all_xmm(op) \ + op(0);op(1);op(2);op(3);op(4);op(5);op(6);op(7); \ + op(8);op(9);op(10);op(11);op(12);op(13);op(14);op(15); + .text .align align_16byte,0x90 .globl GNAME(alloc_tramp) TYPE(GNAME(alloc_tramp)) GNAME(alloc_tramp): + cld push %rbp # Save old frame pointer. mov %rsp,%rbp # Establish new frame. + and $-32,%rsp + sub $16*16,%rsp + map_all_xmm(stkxmmsave) push %rax push %rcx push %rdx @@ -297,10 +362,12 @@ GNAME(alloc_tramp): push %r9 push %r10 push %r11 - mov 16(%rbp),%rdi + push %r11 + mov 16(%rbp),%rdi call GNAME(alloc) mov %rax,16(%rbp) pop %r11 + pop %r11 pop %r10 pop %r9 pop %r8 @@ -309,6 +376,8 @@ GNAME(alloc_tramp): pop %rdx pop %rcx pop %rax + map_all_xmm(stkxmmload) + mov %rbp,%rsp pop %rbp ret SIZE(GNAME(alloc_tramp)) @@ -336,7 +405,7 @@ GNAME(closure_tramp): .align align_16byte,0x90 .globl GNAME(funcallable_instance_tramp) #if !defined(LISP_FEATURE_DARWIN) - .type GNAME(funcallable_instance_tramp),@function + TYPE(GNAME(funcallable_instance_tramp)) #endif GNAME(funcallable_instance_tramp): mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax @@ -344,7 +413,7 @@ GNAME(closure_tramp): * now, the first word of it contains the address to jump to. */ jmp *CLOSURE_FUN_OFFSET(%rax) #if !defined(LISP_FEATURE_DARWIN) - .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp) + SIZE(GNAME(funcallable_instance_tramp)) #endif /* * fun-end breakpoint magic @@ -427,7 +496,17 @@ GNAME(post_signal_tramp): .align align_16byte,0x90 .globl GNAME(fast_bzero) TYPE(GNAME(fast_bzero)) - + + #ifdef LISP_FEATURE_WIN32 + #define xmmreg xmm7 + #define redsave(reg,off) movups reg,-off(%rsp) + #define redrestore(reg,off) movups -off(%rsp),reg + #else + #define xmmreg xmm0 + #define redsave(reg,off) + #define redrestore(reg,off) + #endif + GNAME(fast_bzero): /* A fast routine for zero-filling blocks of memory that are * guaranteed to start and end at a 4096-byte aligned address. @@ -435,10 +514,10 @@ GNAME(fast_bzero): shr $6, %rsi /* Amount of 64-byte blocks to copy */ jz Lend /* If none, stop */ mov %rsi, %rcx /* Save start address */ - movups %xmm7, -16(%rsp) /* Save XMM register */ - xorps %xmm7, %xmm7 /* Zero the XMM register */ + redsave(%xmmreg,16) + xorps %xmmreg, %xmmreg /* Zero the XMM register */ jmp Lloop - .align align_16byte + .align align_16byte Lloop: /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the @@ -446,17 +525,17 @@ Lloop: * we're touching are not fetched into the L1 cache, since we're just * going to overwrite the memory soon anyway. */ - movntdq %xmm7, 0(%rdi) - movntdq %xmm7, 16(%rdi) - movntdq %xmm7, 32(%rdi) - movntdq %xmm7, 48(%rdi) + movntdq %xmmreg, 0(%rdi) + movntdq %xmmreg, 16(%rdi) + movntdq %xmmreg, 32(%rdi) + movntdq %xmmreg, 48(%rdi) add $64, %rdi /* Advance pointer */ dec %rsi /* Decrement 64-byte block count */ jnz Lloop mfence /* Ensure that the writes are globally visible, since * MOVNTDQ is weakly ordered */ - movups -16(%rsp), %xmm7 /* Restore the XMM register */ + redrestore(%xmmreg,16) prefetcht0 0(%rcx) /* Prefetch the start of the block into cache, * since it's likely to be used immediately. */ Lend: