X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fruntime%2Fx86-64-assem.S;h=92e07164e7bcaaef94c28990131128a08db362c2;hb=6cb4f9ea3f4e35a5a8e75922833e14575ae92180;hp=c654861cd30be40ff01372b9376b363b3c6fa61e;hpb=1c6e1e0ccbad4cefe1984f4a1a45d81181718f65;p=sbcl.git diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index c654861..92e0716 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -60,6 +60,10 @@ .global GNAME(call_into_c) .type GNAME(call_into_c),@function GNAME(call_into_c): + /* ABI requires that the direction flag be clear on function + * entry and exit. */ + cld + push %rbp # Save old frame pointer. mov %rsp,%rbp # Establish new frame. @@ -166,8 +170,9 @@ Lcall: /* If the function returned multiple values, it will return to this point. Lose them */ + jnc LsingleValue mov %rbx, %rsp - /* A singled value function returns here */ +LsingleValue: /* Restore the stack, in case there was a stack change. */ pop %rsp # c-sp @@ -179,6 +184,10 @@ Lcall: pop %r12 pop %rbx + /* ABI requires that the direction flag be clear on function + * entry and exit. */ + cld + /* FIXME Restore the NPX state. */ /* return value is already in rax where lisp expects it */ @@ -282,11 +291,7 @@ GNAME(closure_tramp): .align align_8byte GNAME(fun_end_breakpoint_guts): /* Multiple Value return */ - jmp multiple_value_return - /* the above jmp is only 2 bytes long, we need to add a nop for - * padding since the single value return convention jumps to original - * return address + 3 bytes */ - nop + jc multiple_value_return /* Single value return: The eventual return will now use the multiple values return convention but with a return values count of one. */ @@ -344,4 +349,44 @@ GNAME(post_signal_tramp): ret .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp) - .end + .text + .align align_8byte,0x90 + .global GNAME(fast_bzero) + .type GNAME(fast_bzero),@function + +GNAME(fast_bzero): + /* A fast routine for zero-filling blocks of memory that are + * guaranteed to start and end at a 4096-byte aligned address. + */ + shr $6, %rsi /* Amount of 64-byte blocks to copy */ + jz Lend /* If none, stop */ + mov %rsi, %rcx /* Save start address */ + movups %xmm7, -16(%rsp) /* Save XMM register */ + xorps %xmm7, %xmm7 /* Zero the XMM register */ + jmp Lloop + .align 16 +Lloop: + + /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the + * non-caching double-quadword moving variant, i.e. the memory areas + * we're touching are not fetched into the L1 cache, since we're just + * going to overwrite the memory soon anyway. + */ + movntdq %xmm7, 0(%rdi) + movntdq %xmm7, 16(%rdi) + movntdq %xmm7, 32(%rdi) + movntdq %xmm7, 48(%rdi) + + add $64, %rdi /* Advance pointer */ + dec %rsi /* Decrement 64-byte block count */ + jnz Lloop + mfence /* Ensure that the writes are globally visible, since + * MOVNTDQ is weakly ordered */ + movups -16(%rsp), %xmm7 /* Restore the XMM register */ + prefetcht0 0(%rcx) /* Prefetch the start of the block into cache, + * since it's likely to be used immediately. */ +Lend: + ret + .size GNAME(fast_bzero), .-GNAME(fast_bzero) + + .end