From 7fccd3e92289bf3a7cd04bbbb92657aa3ad4808f Mon Sep 17 00:00:00 2001 From: Lutz Euler Date: Fri, 8 Jun 2012 17:30:15 +0200 Subject: [PATCH] Micro-optimization: Avoid byte register writes on x86-64 in LOAD-TYPE. The optimization guide for AMD's x86-64 processors recommends not to write a partial register but instead to use MOVZX to write the corresponding 32/64-bit register. Otherwise the instruction would have an unnecessary dependency on the most recent write to the register, reducing the available instruction level parallelism. On Intel's processors this is not necessary but doesn't hurt. To follow this recommendation, modify LOAD-TYPE to use MOVZX instead of a byte MOV and adapt the VOPs that call it: FUN-SUBTYPE, SET-FDEFN-FUN, and WIDETAG-OF. This additionally spares a temporary register in FUN-SUBTYPE and allows to shorten all paths through WIDETAG-OF by one instruction. The effect on code size is small and mixed. --- src/compiler/x86-64/cell.lisp | 4 ++-- src/compiler/x86-64/macros.lisp | 4 ++-- src/compiler/x86-64/system.lisp | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp index 8744c26..0263435 100644 --- a/src/compiler/x86-64/cell.lisp +++ b/src/compiler/x86-64/cell.lisp @@ -263,7 +263,7 @@ (:args (function :scs (descriptor-reg) :target result) (fdefn :scs (descriptor-reg))) (:temporary (:sc unsigned-reg) raw) - (:temporary (:sc byte-reg) type) + (:temporary (:sc unsigned-reg) type) (:results (result :scs (descriptor-reg))) (:generator 38 (load-type type function (- fun-pointer-lowtag)) @@ -271,7 +271,7 @@ (make-ea :byte :base function :disp (- (* simple-fun-code-offset n-word-bytes) fun-pointer-lowtag))) - (inst cmp type simple-fun-header-widetag) + (inst cmp (reg-in-size type :byte) simple-fun-header-widetag) (inst jmp :e NORMAL-FUN) (inst lea raw (make-fixup "closure_tramp" :foreign)) NORMAL-FUN diff --git a/src/compiler/x86-64/macros.lisp b/src/compiler/x86-64/macros.lisp index 801c905..13deab4 100644 --- a/src/compiler/x86-64/macros.lisp +++ b/src/compiler/x86-64/macros.lisp @@ -120,10 +120,10 @@ (n-offset offset)) (ecase *backend-byte-order* (:little-endian - `(inst mov ,n-target + `(inst movzx ,n-target (make-ea :byte :base ,n-source :disp ,n-offset))) (:big-endian - `(inst mov ,n-target + `(inst movzx ,n-target (make-ea :byte :base ,n-source :disp (+ ,n-offset (1- n-word-bytes)))))))) diff --git a/src/compiler/x86-64/system.lisp b/src/compiler/x86-64/system.lisp index 7cb54a4..ae57c4c 100644 --- a/src/compiler/x86-64/system.lisp +++ b/src/compiler/x86-64/system.lisp @@ -28,11 +28,12 @@ (:translate widetag-of) (:policy :fast-safe) (:args (object :scs (descriptor-reg))) - (:temporary (:sc unsigned-reg :offset eax-offset :to (:result 0)) rax) + (:temporary (:sc unsigned-reg :offset rax-offset :target result + :to (:result 0)) rax) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 6 - (inst mov rax object) + (inst movzx rax (reg-in-size object :byte)) (inst and al-tn lowtag-mask) (inst cmp al-tn other-pointer-lowtag) (inst jmp :e OTHER-PTR) @@ -48,29 +49,27 @@ (inst jmp :ne DONE) ;; must be an other immediate - (inst mov rax object) + (inst movzx rax (reg-in-size object :byte)) (inst jmp DONE) FUNCTION-PTR - (load-type al-tn object (- fun-pointer-lowtag)) + (load-type rax object (- fun-pointer-lowtag)) (inst jmp DONE) OTHER-PTR - (load-type al-tn object (- other-pointer-lowtag)) + (load-type rax object (- other-pointer-lowtag)) DONE - (inst movzx result al-tn))) + (move result rax))) (define-vop (fun-subtype) (:translate fun-subtype) (:policy :fast-safe) (:args (function :scs (descriptor-reg))) - (:temporary (:sc byte-reg :from (:eval 0) :to (:eval 1)) temp) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 6 - (load-type temp function (- fun-pointer-lowtag)) - (inst movzx result temp))) + (load-type result function (- fun-pointer-lowtag)))) (define-vop (set-fun-subtype) (:translate (setf fun-subtype)) -- 1.7.10.4