From 076d38141d1d2689a1040dc8af71bd7fbf2b54a4 Mon Sep 17 00:00:00 2001 From: Stas Boukarev Date: Thu, 15 Aug 2013 17:21:04 +0400 Subject: [PATCH] Optimize calling asm routines and static foreign functions on x86-64. Instead of loading the address using LEA REG, [#xADDRESS] use MOV REG, #xADDRESS Which saves 2 bytes. --- src/assembly/x86-64/arith.lisp | 9 ++---- src/assembly/x86-64/support.lisp | 9 ++---- src/compiler/x86-64/alloc.lisp | 2 +- src/compiler/x86-64/call.lisp | 8 ++--- src/compiler/x86-64/cell.lisp | 35 ++++++++++---------- src/compiler/x86-64/insts.lisp | 33 ++++++++++++++++--- src/compiler/x86-64/macros.lisp | 3 +- src/compiler/x86-64/move.lisp | 66 ++++++++++++++++++-------------------- src/compiler/x86-64/nlx.lisp | 2 +- src/compiler/x86-64/show.lisp | 6 ++-- 10 files changed, 91 insertions(+), 82 deletions(-) diff --git a/src/assembly/x86-64/arith.lisp b/src/assembly/x86-64/arith.lisp index c816bdf..c0c773f 100644 --- a/src/assembly/x86-64/arith.lisp +++ b/src/assembly/x86-64/arith.lisp @@ -224,8 +224,7 @@ (:generator 10 (move rdx x) (move rdi y) - (inst lea rcx (make-ea :qword - :disp (make-fixup ',name :assembly-routine))) + (inst mov rcx (make-fixup ',name :assembly-routine)) (inst call rcx))))) (define-cond-assem-rtn generic-< < two-arg-< :l) @@ -293,8 +292,7 @@ (:generator 10 (move rdx x) (move rdi y) - (inst lea rcx (make-ea :qword - :disp (make-fixup 'generic-eql :assembly-routine))) + (inst mov rcx (make-fixup 'generic-eql :assembly-routine)) (inst call rcx))) #+sb-assembling @@ -359,6 +357,5 @@ (:generator 10 (move rdx x) (move rdi y) - (inst lea rcx (make-ea :qword - :disp (make-fixup 'generic-= :assembly-routine))) + (inst mov rcx (make-fixup 'generic-= :assembly-routine)) (inst call rcx))) diff --git a/src/assembly/x86-64/support.lisp b/src/assembly/x86-64/support.lisp index c56bf76..1bd5887 100644 --- a/src/assembly/x86-64/support.lisp +++ b/src/assembly/x86-64/support.lisp @@ -13,23 +13,20 @@ (ecase style (:raw (values - `((inst lea temp-reg-tn - (make-ea :qword :disp (make-fixup ',name :assembly-routine))) + `((inst mov temp-reg-tn (make-fixup ',name :assembly-routine)) (inst call temp-reg-tn)) nil)) (:full-call (values `((note-this-location ,vop :call-site) - (inst lea temp-reg-tn - (make-ea :qword :disp (make-fixup ',name :assembly-routine))) + (inst mov temp-reg-tn (make-fixup ',name :assembly-routine)) (inst call temp-reg-tn) (note-this-location ,vop :single-value-return) (inst cmov :c rsp-tn rbx-tn)) '((:save-p :compute-only)))) (:none (values - `((inst lea temp-reg-tn - (make-ea :qword :disp (make-fixup ',name :assembly-routine))) + `((inst mov temp-reg-tn (make-fixup ',name :assembly-routine)) (inst jmp temp-reg-tn)) nil)))) diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp index 02b3cc4..74c49e8 100644 --- a/src/compiler/x86-64/alloc.lisp +++ b/src/compiler/x86-64/alloc.lisp @@ -177,7 +177,7 @@ (:args) (:results (result :scs (any-reg))) (:generator 1 - (inst lea result (make-fixup "funcallable_instance_tramp" :foreign)))) + (inst mov result (make-fixup "funcallable_instance_tramp" :foreign)))) (define-vop (fixed-alloc) (:args) diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp index 2b96e42..9684aa5 100644 --- a/src/compiler/x86-64/call.lisp +++ b/src/compiler/x86-64/call.lisp @@ -941,9 +941,7 @@ (move rsi args) (move rax function) ;; And jump to the assembly routine. - (inst lea call-target - (make-ea :qword - :disp (make-fixup 'tail-call-variable :assembly-routine))) + (inst mov call-target (make-fixup 'tail-call-variable :assembly-routine)) (inst jmp call-target))) ;;;; unknown values return @@ -1089,9 +1087,7 @@ (emit-label not-single))) (move rsi vals) (move rcx nvals) - (inst lea return-asm - (make-ea :qword :disp (make-fixup 'return-multiple - :assembly-routine))) + (inst mov return-asm (make-fixup 'return-multiple :assembly-routine)) (inst jmp return-asm) (trace-table-entry trace-table-normal))) diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp index 0263435..e728c1b 100644 --- a/src/compiler/x86-64/cell.lisp +++ b/src/compiler/x86-64/cell.lisp @@ -273,7 +273,7 @@ fun-pointer-lowtag))) (inst cmp (reg-in-size type :byte) simple-fun-header-widetag) (inst jmp :e NORMAL-FUN) - (inst lea raw (make-fixup "closure_tramp" :foreign)) + (inst mov raw (make-fixup "closure_tramp" :foreign)) NORMAL-FUN (storew function fdefn fdefn-fun-slot other-pointer-lowtag) (storew raw fdefn fdefn-raw-addr-slot other-pointer-lowtag) @@ -310,23 +310,22 @@ (inst test tls-index tls-index) (inst jmp :ne tls-index-valid) (inst mov tls-index symbol) - (inst lea temp-reg-tn - (make-ea :qword :disp - (make-fixup (ecase (tn-offset tls-index) - (#.rax-offset 'alloc-tls-index-in-rax) - (#.rcx-offset 'alloc-tls-index-in-rcx) - (#.rdx-offset 'alloc-tls-index-in-rdx) - (#.rbx-offset 'alloc-tls-index-in-rbx) - (#.rsi-offset 'alloc-tls-index-in-rsi) - (#.rdi-offset 'alloc-tls-index-in-rdi) - (#.r8-offset 'alloc-tls-index-in-r8) - (#.r9-offset 'alloc-tls-index-in-r9) - (#.r10-offset 'alloc-tls-index-in-r10) - (#.r12-offset 'alloc-tls-index-in-r12) - (#.r13-offset 'alloc-tls-index-in-r13) - (#.r14-offset 'alloc-tls-index-in-r14) - (#.r15-offset 'alloc-tls-index-in-r15)) - :assembly-routine))) + (inst mov temp-reg-tn + (make-fixup (ecase (tn-offset tls-index) + (#.rax-offset 'alloc-tls-index-in-rax) + (#.rcx-offset 'alloc-tls-index-in-rcx) + (#.rdx-offset 'alloc-tls-index-in-rdx) + (#.rbx-offset 'alloc-tls-index-in-rbx) + (#.rsi-offset 'alloc-tls-index-in-rsi) + (#.rdi-offset 'alloc-tls-index-in-rdi) + (#.r8-offset 'alloc-tls-index-in-r8) + (#.r9-offset 'alloc-tls-index-in-r9) + (#.r10-offset 'alloc-tls-index-in-r10) + (#.r12-offset 'alloc-tls-index-in-r12) + (#.r13-offset 'alloc-tls-index-in-r13) + (#.r14-offset 'alloc-tls-index-in-r14) + (#.r15-offset 'alloc-tls-index-in-r15)) + :assembly-routine)) (inst call temp-reg-tn) (emit-label tls-index-valid) (inst push (make-ea :qword :base thread-base-tn :scale 1 :index tls-index)) diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp index 7488457..442c485 100644 --- a/src/compiler/x86-64/insts.lisp +++ b/src/compiler/x86-64/insts.lisp @@ -156,7 +156,7 @@ (type sb!disassem:disassem-state dstate)) (if (typep value 'full-reg) (print-reg-with-width value width stream dstate) - (print-mem-access value width sized-p stream dstate))) + (print-mem-access value width sized-p stream dstate))) ;;; Print a register or a memory reference. The width is determined by ;;; calling INST-OPERAND-SIZE. @@ -343,6 +343,9 @@ (:dword 32) (:qword 64))) +(defun print-imm/asm-routine (value stream dstate) + (sb!disassem:maybe-note-assembler-routine value nil dstate) + (princ value stream)) ) ; EVAL-WHEN ;;;; disassembler argument types @@ -403,6 +406,10 @@ (setf width 32)) (sb!disassem:read-signed-suffix width dstate)))) +(sb!disassem:define-arg-type signed-imm-data/asm-routine + :type 'signed-imm-data + :printer #'print-imm/asm-routine) + ;;; Used by the variant of the MOV instruction with opcode B8 which can ;;; move immediates of all sizes (i.e. including :qword) into a ;;; register. @@ -413,6 +420,11 @@ (width-bits (inst-operand-size dstate)) dstate))) +(sb!disassem:define-arg-type signed-imm-data-upto-qword/asm-routine + :type 'signed-imm-data-upto-qword + :printer #'print-imm/asm-routine) + + ;;; Used by those instructions that have a default operand size of ;;; :qword. Nevertheless the immediate is at most of size :dword. ;;; The only instruction of this kind having a variant with an immediate @@ -688,6 +700,13 @@ (reg/mem :type 'sized-reg/mem) (imm :type 'signed-imm-data)) +(sb!disassem:define-instruction-format (reg/mem-imm/asm-routine 16 + :include 'reg/mem-imm + :default-printer + '(:name :tab reg/mem ", " imm)) + (reg/mem :type 'sized-reg/mem) + (imm :type 'signed-imm-data/asm-routine)) + ;;; Same as reg/mem, but with using the accumulator in the default printer (sb!disassem:define-instruction-format (accum-reg/mem 16 @@ -1737,9 +1756,9 @@ (define-instruction mov (segment dst src) ;; immediate to register - (:printer reg ((op #b1011) (imm nil :type 'signed-imm-data)) + (:printer reg ((op #b1011) (imm nil :type 'signed-imm-data/asm-routine)) '(:name :tab reg ", " imm)) - (:printer rex-reg ((op #b1011) (imm nil :type 'signed-imm-data-upto-qword)) + (:printer rex-reg ((op #b1011) (imm nil :type 'signed-imm-data-upto-qword/asm-routine)) '(:name :tab reg ", " imm)) ;; absolute mem to/from accumulator (:printer simple-dir ((op #b101000) (imm nil :type 'imm-addr)) @@ -1747,7 +1766,7 @@ ;; register to/from register/memory (:printer reg-reg/mem-dir ((op #b100010))) ;; immediate to register/memory - (:printer reg/mem-imm ((op '(#b1100011 #b000)))) + (:printer reg/mem-imm/asm-routine ((op '(#b1100011 #b000)))) (:emitter (let ((size (matching-operand-size dst src))) @@ -1765,6 +1784,12 @@ #b10111) (reg-tn-encoding dst)) (emit-sized-immediate segment size src)))) + ((and (fixup-p src) + (or (eq (fixup-flavor src) :foreign) + (eq (fixup-flavor src) :assembly-routine))) + (maybe-emit-rex-prefix segment :dword nil nil dst) + (emit-byte-with-reg segment #b10111 (reg-tn-encoding dst)) + (emit-absolute-fixup segment src)) (t (maybe-emit-rex-for-ea segment src dst) (emit-byte segment diff --git a/src/compiler/x86-64/macros.lisp b/src/compiler/x86-64/macros.lisp index 2883d28..fe896aa 100644 --- a/src/compiler/x86-64/macros.lisp +++ b/src/compiler/x86-64/macros.lisp @@ -168,8 +168,7 @@ ;;; object. (defun allocation-tramp (alloc-tn size lowtag) (inst push size) - (inst lea temp-reg-tn (make-ea :qword - :disp (make-fixup "alloc_tramp" :foreign))) + (inst mov temp-reg-tn (make-fixup "alloc_tramp" :foreign)) (inst call temp-reg-tn) (inst pop alloc-tn) (when lowtag diff --git a/src/compiler/x86-64/move.lisp b/src/compiler/x86-64/move.lisp index 7544b8b..dbf58ed 100644 --- a/src/compiler/x86-64/move.lisp +++ b/src/compiler/x86-64/move.lisp @@ -346,23 +346,22 @@ (inst imul y x #.(ash 1 n-fixnum-tag-bits)) (inst jmp :no DONE) (inst mov y x))) - (inst lea temp-reg-tn - (make-ea :qword :disp - (make-fixup (ecase (tn-offset y) - (#.rax-offset 'alloc-signed-bignum-in-rax) - (#.rcx-offset 'alloc-signed-bignum-in-rcx) - (#.rdx-offset 'alloc-signed-bignum-in-rdx) - (#.rbx-offset 'alloc-signed-bignum-in-rbx) - (#.rsi-offset 'alloc-signed-bignum-in-rsi) - (#.rdi-offset 'alloc-signed-bignum-in-rdi) - (#.r8-offset 'alloc-signed-bignum-in-r8) - (#.r9-offset 'alloc-signed-bignum-in-r9) - (#.r10-offset 'alloc-signed-bignum-in-r10) - (#.r12-offset 'alloc-signed-bignum-in-r12) - (#.r13-offset 'alloc-signed-bignum-in-r13) - (#.r14-offset 'alloc-signed-bignum-in-r14) - (#.r15-offset 'alloc-signed-bignum-in-r15)) - :assembly-routine))) + (inst mov temp-reg-tn + (make-fixup (ecase (tn-offset y) + (#.rax-offset 'alloc-signed-bignum-in-rax) + (#.rcx-offset 'alloc-signed-bignum-in-rcx) + (#.rdx-offset 'alloc-signed-bignum-in-rdx) + (#.rbx-offset 'alloc-signed-bignum-in-rbx) + (#.rsi-offset 'alloc-signed-bignum-in-rsi) + (#.rdi-offset 'alloc-signed-bignum-in-rdi) + (#.r8-offset 'alloc-signed-bignum-in-r8) + (#.r9-offset 'alloc-signed-bignum-in-r9) + (#.r10-offset 'alloc-signed-bignum-in-r10) + (#.r12-offset 'alloc-signed-bignum-in-r12) + (#.r13-offset 'alloc-signed-bignum-in-r13) + (#.r14-offset 'alloc-signed-bignum-in-r14) + (#.r15-offset 'alloc-signed-bignum-in-r15)) + :assembly-routine)) (inst call temp-reg-tn) DONE)) (define-move-vop move-from-signed :move @@ -395,23 +394,22 @@ :scale (ash 1 n-fixnum-tag-bits)))) (inst jmp :z done) (inst mov y x) - (inst lea temp-reg-tn - (make-ea :qword :disp - (make-fixup (ecase (tn-offset y) - (#.rax-offset 'alloc-unsigned-bignum-in-rax) - (#.rcx-offset 'alloc-unsigned-bignum-in-rcx) - (#.rdx-offset 'alloc-unsigned-bignum-in-rdx) - (#.rbx-offset 'alloc-unsigned-bignum-in-rbx) - (#.rsi-offset 'alloc-unsigned-bignum-in-rsi) - (#.rdi-offset 'alloc-unsigned-bignum-in-rdi) - (#.r8-offset 'alloc-unsigned-bignum-in-r8) - (#.r9-offset 'alloc-unsigned-bignum-in-r9) - (#.r10-offset 'alloc-unsigned-bignum-in-r10) - (#.r12-offset 'alloc-unsigned-bignum-in-r12) - (#.r13-offset 'alloc-unsigned-bignum-in-r13) - (#.r14-offset 'alloc-unsigned-bignum-in-r14) - (#.r15-offset 'alloc-unsigned-bignum-in-r15)) - :assembly-routine))) + (inst mov temp-reg-tn + (make-fixup (ecase (tn-offset y) + (#.rax-offset 'alloc-unsigned-bignum-in-rax) + (#.rcx-offset 'alloc-unsigned-bignum-in-rcx) + (#.rdx-offset 'alloc-unsigned-bignum-in-rdx) + (#.rbx-offset 'alloc-unsigned-bignum-in-rbx) + (#.rsi-offset 'alloc-unsigned-bignum-in-rsi) + (#.rdi-offset 'alloc-unsigned-bignum-in-rdi) + (#.r8-offset 'alloc-unsigned-bignum-in-r8) + (#.r9-offset 'alloc-unsigned-bignum-in-r9) + (#.r10-offset 'alloc-unsigned-bignum-in-r10) + (#.r12-offset 'alloc-unsigned-bignum-in-r12) + (#.r13-offset 'alloc-unsigned-bignum-in-r13) + (#.r14-offset 'alloc-unsigned-bignum-in-r14) + (#.r15-offset 'alloc-unsigned-bignum-in-r15)) + :assembly-routine)) (inst call temp-reg-tn) (emit-label done)))) (define-move-vop move-from-unsigned :move diff --git a/src/compiler/x86-64/nlx.lisp b/src/compiler/x86-64/nlx.lisp index 5816030..523ef31 100644 --- a/src/compiler/x86-64/nlx.lisp +++ b/src/compiler/x86-64/nlx.lisp @@ -266,7 +266,7 @@ catch-block-entry-pc-slot) ;; Run any required UWPs. - (inst lea temp-reg-tn (make-fixup 'unwind :assembly-routine)) + (inst mov temp-reg-tn (make-fixup 'unwind :assembly-routine)) (inst jmp temp-reg-tn) ENTRY-LABEL diff --git a/src/compiler/x86-64/show.lisp b/src/compiler/x86-64/show.lisp index b94e4bf..a23882e 100644 --- a/src/compiler/x86-64/show.lisp +++ b/src/compiler/x86-64/show.lisp @@ -32,10 +32,8 @@ (inst push rbp-tn) (inst and rsp-tn -16) (storew rax rsp-tn) - (inst lea rax (make-fixup "debug_print" :foreign)) - (inst lea call-target - (make-ea :qword - :disp (make-fixup "call_into_c" :foreign))) + (inst mov rax (make-fixup "debug_print" :foreign)) + (inst mov call-target (make-fixup "call_into_c" :foreign)) (inst call call-target) (inst mov rsp-tn rbp-tn) (inst pop rbp-tn) -- 1.7.10.4