From 45d50c67c7f3d81b3357c785d7226df22421a261 Mon Sep 17 00:00:00 2001 From: Stas Boukarev Date: Tue, 1 Oct 2013 20:17:00 +0400 Subject: [PATCH] Optimize RETURN-MULTIPLE on x86-64. Replace REP MOVS with simple instructions. RETURN-MULTIPLE is used to copy the multiple value down the stack. The following code becomes around 5 times faster on a modern CPU, passing more values is faster as well, but not by as much. (defun m () (declare (optimize speed (safety 0))) (values 1 2 3 4)) (defun b () (declare (optimize speed (safety 0))) (let (*) (m))) --- src/assembly/x86-64/assem-rtns.lisp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/assembly/x86-64/assem-rtns.lisp b/src/assembly/x86-64/assem-rtns.lisp index 125c15f..b41c86f 100644 --- a/src/assembly/x86-64/assem-rtns.lisp +++ b/src/assembly/x86-64/assem-rtns.lisp @@ -28,7 +28,9 @@ (:temp eax unsigned-reg rax-offset) (:temp ebx unsigned-reg rbx-offset) (:temp edx unsigned-reg rdx-offset) - (:temp edi unsigned-reg rdi-offset)) + (:temp edi unsigned-reg rdi-offset) + (:temp temp unsigned-reg r8-offset) + (:temp loop-index unsigned-reg r9-offset)) ;; Pick off the cases where everything fits in register args. (inst jrcxz ZERO-VALUES) @@ -55,21 +57,23 @@ ;; we have to be careful not to clobber values before we've read ;; them. Because the stack builds down, we are copying to a larger ;; address. Therefore, we need to iterate from larger addresses to - ;; smaller addresses. pfw-this says copy ecx words from esi to edi - ;; counting down. - (inst shr ecx n-fixnum-tag-bits) - (inst std) ; count down - (inst sub esi n-word-bytes) - (inst lea edi (make-ea :qword :base ebx :disp (- n-word-bytes))) - (inst rep) - (inst movs :qword) - (inst cld) - - ;; Restore the count. - (inst mov ecx edx) + ;; smaller addresses. + (zeroize loop-index) + LOOP + (inst sub loop-index n-word-bytes) + (inst mov temp + (make-ea :qword :base esi + :index loop-index)) + (inst mov + (make-ea :qword :base ebx + :index loop-index) + temp) + + (inst sub edx (fixnumize 1)) + (inst jmp :nz LOOP) ;; Set the stack top to the last result. - (inst lea rsp-tn (make-ea :qword :base edi :disp n-word-bytes)) + (inst lea rsp-tn (make-ea :qword :base ebx :index loop-index)) ;; Load the register args. (loadw edx ebx -1) -- 1.7.10.4