From: Nathan Froyd Date: Sat, 15 Jul 2006 04:26:24 +0000 (+0000) Subject: 0.9.14.13: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=25a64b8df600187eea7100bdb89375ff531d7e83;p=sbcl.git 0.9.14.13: Micro-optimize bit-vector accesses on x86 and x86-64: ... processor does the necessary masking for us, so we can eliminate an AND instruction; ... in the process, remove dodgy interior pointer usage lurking inside the small data-vector-ref VOPs. --- diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp index a144c56..be4823d 100644 --- a/src/compiler/x86-64/array.lisp +++ b/src/compiler/x86-64/array.lisp @@ -180,9 +180,14 @@ :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) (move ecx index) - (inst and ecx ,(1- elements-per-word)) + ;; We used to mask ECX for all values of BITS, but since + ;; Intel's documentation says that the chip will mask shift + ;; and rotate counts by 63 automatically, we can safely move + ;; the masking operation under the protection of this UNLESS + ;; in the bit-vector case. --njf, 2006-07-14 ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst shr result :cl) (inst and result ,(1- (ash 1 bits))))) (define-vop (,(symbolicate 'data-vector-ref-c/ type)) @@ -205,29 +210,32 @@ (:note "inline array store") (:translate data-vector-set) (:policy :fast-safe) - (:args (object :scs (descriptor-reg) :target ptr) + (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg) :target ecx) (value :scs (unsigned-reg immediate) :target result)) (:arg-types ,type positive-fixnum positive-fixnum) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) (:temporary (:sc unsigned-reg) word-index) - (:temporary (:sc unsigned-reg :from (:argument 0)) ptr old) - (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) - ecx) + (:temporary (:sc unsigned-reg) old) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) (:generator 25 (move word-index index) (inst shr word-index ,bit-shift) - (inst lea ptr + (inst mov old (make-ea :qword :base object :index word-index :scale n-word-bytes :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (loadw old ptr) (move ecx index) - (inst and ecx ,(1- elements-per-word)) + ;; We used to mask ECX for all values of BITS, but since + ;; Intel's documentation says that the chip will mask shift + ;; and rotate counts by 63 automatically, we can safely move + ;; the masking operation under the protection of this UNLESS + ;; in the bit-vector case. --njf, 2006-07-14 ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst ror old :cl) (unless (and (sc-is value immediate) (= (tn-value value) ,(1- (ash 1 bits)))) @@ -239,7 +247,11 @@ (unsigned-reg (inst or old value))) (inst rol old :cl) - (storew old ptr) + (mov (make-ea :qword :base object :index word-index + :scale n-word-bytes + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + old) (sc-case value (immediate (inst mov result (tn-value value))) diff --git a/src/compiler/x86/array.lisp b/src/compiler/x86/array.lisp index 7e6b27c..40926d7 100644 --- a/src/compiler/x86/array.lisp +++ b/src/compiler/x86/array.lisp @@ -173,9 +173,14 @@ :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) (move ecx index) - (inst and ecx ,(1- elements-per-word)) - ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + ;; We used to mask ECX for all values of ELEMENT-PER-WORD, + ;; but since Intel's documentation says that the chip will + ;; mask shift and rotate counts by 31 automatically, we can + ;; safely move the masking operation under the protection of + ;; this UNLESS in the bit-vector case. --njf, 2006-07-14 + ,@(unless (= elements-per-word n-word-bits) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst shr result :cl) (inst and result ,(1- (ash 1 bits))))) (define-vop (,(symbolicate 'data-vector-ref-c/ type)) @@ -198,28 +203,31 @@ (:note "inline array store") (:translate data-vector-set) (:policy :fast-safe) - (:args (object :scs (descriptor-reg) :target ptr) + (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg) :target ecx) (value :scs (unsigned-reg immediate) :target result)) (:arg-types ,type positive-fixnum positive-fixnum) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) (:temporary (:sc unsigned-reg) word-index) - (:temporary (:sc unsigned-reg :from (:argument 0)) ptr old) - (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) - ecx) + (:temporary (:sc unsigned-reg) old) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) (:generator 25 (move word-index index) (inst shr word-index ,bit-shift) - (inst lea ptr + (inst mov old (make-ea :dword :base object :index word-index :scale 4 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (loadw old ptr) (move ecx index) - (inst and ecx ,(1- elements-per-word)) - ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + ;; We used to mask ECX for all values of ELEMENT-PER-WORD, + ;; but since Intel's documentation says that the chip will + ;; mask shift and rotate counts by 31 automatically, we can + ;; safely move the masking operation under the protection of + ;; this UNLESS in the bit-vector case. --njf, 2006-07-14 + ,@(unless (= elements-per-word n-word-bits) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst ror old :cl) (unless (and (sc-is value immediate) (= (tn-value value) ,(1- (ash 1 bits)))) @@ -231,7 +239,10 @@ (unsigned-reg (inst or old value))) (inst rol old :cl) - (storew old ptr) + (inst mov (make-ea :dword :base object :index word-index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + old) (sc-case value (immediate (inst mov result (tn-value value))) diff --git a/version.lisp-expr b/version.lisp-expr index 7d1ce15..04f334c 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.14.12" +"0.9.14.13"