X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-64-assem.S;h=6ea7d2153562b86e8bbcb7e397a75642ff438dca;hb=2b90fd1dbad23322258222a2ef4cef7f6a00831d;hp=4f693725ad69aa6782d27bc2e2f1127460e49938;hpb=c3334d2307b721cfcea29e6abcd33e48487fb1ea;p=sbcl.git diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index 4f69372..6ea7d21 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -17,6 +17,7 @@ #include "validate.h" #include "sbcl.h" #include "genesis/closure.h" +#include "genesis/funcallable-instance.h" #include "genesis/fdefn.h" #include "genesis/static-symbols.h" #include "genesis/symbol.h" @@ -60,6 +61,10 @@ .global GNAME(call_into_c) .type GNAME(call_into_c),@function GNAME(call_into_c): + /* ABI requires that the direction flag be clear on function + * entry and exit. */ + cld + push %rbp # Save old frame pointer. mov %rsp,%rbp # Establish new frame. @@ -97,7 +102,7 @@ GNAME(call_into_lisp_first_time): mov THREAD_CONTROL_STACK_START_OFFSET(%rax) ,%rsp /* don't think too hard about what happens if we get interrupted * here */ - add $THREAD_CONTROL_STACK_SIZE-8,%rsp + add $THREAD_CONTROL_STACK_SIZE-16,%rsp jmp Lstack .text @@ -117,20 +122,26 @@ GNAME(call_into_lisp): mov %rsp,%rbp # Establish new frame. Lstack: /* FIXME x86 saves FPU state here */ - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - + push %rbx # these regs are callee-saved according to C + push %r12 # so must be preserved and restored when + push %r13 # the lisp function returns + push %r14 # + push %r15 # mov %rsp,%rbx # remember current stack push %rbx # Save entry stack on (maybe) new stack. - /* Establish Lisp args. */ - mov %rdi,%rax # lexenv? - mov %rsi,%rbx # address of arg vec - mov %rdx,%rcx # num args + push %rdi # args from C + push %rsi # + push %rdx # +#ifdef LISP_FEATURE_SB_THREAD + mov specials,%rdi + call pthread_getspecific + mov %rax,%r12 +#endif + pop %rcx # num args + pop %rbx # arg vector + pop %rax # function ptr/lexenv xor %rdx,%rdx # clear any descriptor registers xor %rdi,%rdi # that we can't be sure we'll @@ -160,8 +171,9 @@ Lcall: /* If the function returned multiple values, it will return to this point. Lose them */ + jnc LsingleValue mov %rbx, %rsp - /* A singled value function returns here */ +LsingleValue: /* Restore the stack, in case there was a stack change. */ pop %rsp # c-sp @@ -173,9 +185,14 @@ Lcall: pop %r12 pop %rbx + /* ABI requires that the direction flag be clear on function + * entry and exit. */ + cld + /* FIXME Restore the NPX state. */ - pop %rbp # c-sp + /* return value is already in rax where lisp expects it */ + leave ret .size GNAME(call_into_lisp), . - GNAME(call_into_lisp) @@ -203,7 +220,7 @@ GNAME(fpu_restore): * the undefined-function trampoline */ .text - .align align_4byte,0x90 + .align align_8byte,0x90 .global GNAME(undefined_tramp) .type GNAME(undefined_tramp),@function GNAME(undefined_tramp): @@ -217,9 +234,9 @@ GNAME(undefined_tramp): .text - .align align_4byte,0x90 + .align align_8byte,0x90 .global GNAME(alloc_tramp) - .type GNAME(alooc_tramp),@function + .type GNAME(alloc_tramp),@function GNAME(alloc_tramp): push %rbp # Save old frame pointer. mov %rsp,%rbp # Establish new frame. @@ -253,7 +270,7 @@ GNAME(alloc_tramp): * the closure trampoline */ .text - .align align_4byte,0x90 + .align align_8byte,0x90 .global GNAME(closure_tramp) .type GNAME(closure_tramp),@function GNAME(closure_tramp): @@ -267,21 +284,32 @@ GNAME(closure_tramp): jmp *CLOSURE_FUN_OFFSET(%rax) .size GNAME(closure_tramp), .-GNAME(closure_tramp) + .text + .align align_8byte,0x90 + .global GNAME(funcallable_instance_tramp) + .type GNAME(funcallable_instance_tramp),@function +GNAME(funcallable_instance_tramp): + mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax + /* KLUDGE: on this platform, whatever kind of function is in %rax + * now, the first word of it contains the address to jump to. */ + jmp *CLOSURE_FUN_OFFSET(%rax) + .size GNAME(funcallable_instance_tramp), .-GNAME(funcallable_instance_tramp) + /* * fun-end breakpoint magic */ .text .global GNAME(fun_end_breakpoint_guts) - .align align_4byte + .align align_8byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ - jmp multiple_value_return + jc multiple_value_return /* Single value return: The eventual return will now use the multiple values return convention but with a return values count of one. */ mov %rsp,%rbx # Setup ebx - the ofp. - sub $4,%rsp # Allocate one stack slot for the return value - mov $4,%rcx # Setup ecx for one return value. + sub $8,%rsp # Allocate one stack slot for the return value + mov $8,%rcx # Setup ecx for one return value. mov $NIL,%rdi # default second value mov $NIL,%rsi # default third value @@ -299,7 +327,7 @@ GNAME(fun_end_breakpoint_end): .global GNAME(do_pending_interrupt) .type GNAME(do_pending_interrupt),@function - .align align_4byte,0x90 + .align align_8byte,0x90 GNAME(do_pending_interrupt): int3 .byte trap_PendingInterrupt @@ -308,12 +336,11 @@ GNAME(do_pending_interrupt): .globl GNAME(post_signal_tramp) .type GNAME(post_signal_tramp),@function - .align align_4byte,0x90 + .align align_8byte,0x90 GNAME(post_signal_tramp): /* this is notionally the second half of a function whose first half * doesn't exist. This is where call_into_lisp returns when called * using return_to_lisp_function */ - addq $24,%rsp /* clear call_into_lisp args from stack */ popq %r15 popq %r14 popq %r13 @@ -324,14 +351,54 @@ GNAME(post_signal_tramp): popq %r8 popq %rdi popq %rsi - popq %rbp - popq %rsp - popq %rdx + /* skip RBP and RSP */ popq %rbx + popq %rdx popq %rcx popq %rax + popfq leave ret .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp) - .end + .text + .align align_8byte,0x90 + .global GNAME(fast_bzero) + .type GNAME(fast_bzero),@function + +GNAME(fast_bzero): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + shr $6, %rsi /* Amount of 64-byte blocks to copy */ + jz Lend /* If none, stop */ + mov %rsi, %rcx /* Save start address */ + movups %xmm7, -16(%rsp) /* Save XMM register */ + xorps %xmm7, %xmm7 /* Zero the XMM register */ + jmp Lloop + .align 16 +Lloop: + + /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the + * non-caching double-quadword moving variant, i.e. the memory areas + * we're touching are not fetched into the L1 cache, since we're just + * going to overwrite the memory soon anyway. + */ + movntdq %xmm7, 0(%rdi) + movntdq %xmm7, 16(%rdi) + movntdq %xmm7, 32(%rdi) + movntdq %xmm7, 48(%rdi) + + add $64, %rdi /* Advance pointer */ + dec %rsi /* Decrement 64-byte block count */ + jnz Lloop + mfence /* Ensure that the writes are globally visible, since + * MOVNTDQ is weakly ordered */ + movups -16(%rsp), %xmm7 /* Restore the XMM register */ + prefetcht0 0(%rcx) /* Prefetch the start of the block into cache, + * since it's likely to be used immediately. */ +Lend: + ret + .size GNAME(fast_bzero), .-GNAME(fast_bzero) + + .end