From: Paul Khuong Date: Wed, 22 May 2013 03:46:32 +0000 (-0400) Subject: Simpler word-sized variable right shifts on x86 and x86-64 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=60deeb7616b22ae52cf1dd8bbc2904a1a0d80ffd;p=sbcl.git Simpler word-sized variable right shifts on x86 and x86-64 * Known negative shifts are converted to another function that only handles machine-friendly right shifts. * The transforms and VOPs are conditionalised on ash-right-vops, so other platforms aren't penalised. * The new transforms trigger a lot of notes; this is suboptimal, and one test had to be adjusted. --- diff --git a/NEWS b/NEWS index 0411d55..a2e9a56 100644 --- a/NEWS +++ b/NEWS @@ -100,6 +100,9 @@ changes relative to sbcl-1.1.7: * optimization: IF/IF conversion should reliably result in sane code when (some of) the results of the inner IF are always false or always true. + * optimization: On x86oids, variable right shifts of machine words are + compiled into straight SAR/SHR, instead of going through the generic + VOP. (lp#1066204) changes in sbcl-1.1.7 relative to sbcl-1.1.6: * enhancement: TRACE :PRINT-ALL handles multiple-valued forms. diff --git a/base-target-features.lisp-expr b/base-target-features.lisp-expr index 131f517..e469fda 100644 --- a/base-target-features.lisp-expr +++ b/base-target-features.lisp-expr @@ -353,6 +353,9 @@ ;; (Replaces use of SIGALRM.) ; :sb-wtimer + ;; This platform implements VOPs for %ash/right, variable-width shift right + ; :ash-right-vops + ;; ;; miscellaneous notes on other things which could have special significance ;; in the *FEATURES* list diff --git a/make-config.sh b/make-config.sh index 5bac433..4030506 100644 --- a/make-config.sh +++ b/make-config.sh @@ -585,7 +585,7 @@ if [ "$sbcl_arch" = "x86" ]; then printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf printf ' :alien-callbacks :cycle-counter :inline-constants ' >> $ltf - printf ' :memory-barrier-vops :multiply-high-vops' >> $ltf + printf ' :memory-barrier-vops :multiply-high-vops :ash-right-vops' >> $ltf case "$sbcl_os" in linux | freebsd | netbsd | openbsd | sunos | darwin | win32) printf ' :linkage-table' >> $ltf @@ -606,7 +606,7 @@ elif [ "$sbcl_arch" = "x86-64" ]; then printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf printf ' :alien-callbacks :cycle-counter :complex-float-vops' >> $ltf printf ' :float-eql-vops :inline-constants :memory-barrier-vops' >> $ltf - printf ' :multiply-high-vops :sb-simd-pack' >> $ltf + printf ' :multiply-high-vops :sb-simd-pack :ash-right-vops' >> $ltf elif [ "$sbcl_arch" = "mips" ]; then printf ' :cheneygc :linkage-table' >> $ltf printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index abb77ec..4638b51 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1344,6 +1344,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%ARRAY-FILL-POINTER-P" "%ARRAY-RANK" #!+(or x86 x86-64 ppc) "%ARRAY-ATOMIC-INCF/WORD" + #!+ash-right-vops "%ASH/RIGHT" "%ASSOC" "%ASSOC-EQ" "%ASSOC-IF" diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index 358208f..1aef68d 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -367,6 +367,10 @@ (defknown logbitp (unsigned-byte integer) boolean (movable foldable flushable)) (defknown ash (integer integer) integer (movable foldable flushable explicit-check)) +#!+ash-right-vops +(defknown %ash/right ((or word sb!vm:signed-word) (mod #.sb!vm:n-word-bits)) + (or word sb!vm:signed-word) + (movable foldable flushable always-translatable)) (defknown (logcount integer-length) (integer) bit-index (movable foldable flushable explicit-check)) ;;; FIXME: According to the ANSI spec, it's legal to use any diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index ff0a51c..90d1077 100644 --- a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ -2776,7 +2776,87 @@ (specifier-type `(signed-byte ,size-high)) *universal-type*)) *universal-type*))) + +;;; Rightward ASH +#!+ash-right-vops +(progn + (defun %ash/right (integer amount) + (ash integer (- amount))) + + (deftransform ash ((integer amount) (sb!vm:signed-word (integer * 0))) + "Convert ASH of signed word to %ASH/RIGHT" + (when (constant-lvar-p amount) + (give-up-ir1-transform)) + (let ((use (lvar-uses amount))) + (cond ((and (combination-p use) + (eql '%negate (lvar-fun-name (combination-fun use)))) + (splice-fun-args amount '%negate 1) + `(lambda (integer amount) + (declare (type unsigned-byte amount)) + (%ash/right integer (if (>= amount ,sb!vm:n-word-bits) + ,(1- sb!vm:n-word-bits) + amount)))) + (t + `(%ash/right integer (if (<= amount ,(- sb!vm:n-word-bits)) + ,(1- sb!vm:n-word-bits) + (- amount))))))) + + (deftransform ash ((integer amount) (word (integer * 0))) + "Convert ASH of word to %ASH/RIGHT" + (when (constant-lvar-p amount) + (give-up-ir1-transform)) + (let ((use (lvar-uses amount))) + (cond ((and (combination-p use) + (eql '%negate (lvar-fun-name (combination-fun use)))) + (splice-fun-args amount '%negate 1) + `(lambda (integer amount) + (declare (type unsigned-byte amount)) + (if (>= amount ,sb!vm:n-word-bits) + 0 + (%ash/right integer amount)))) + (t + `(if (<= amount ,(- sb!vm:n-word-bits)) + 0 + (%ash/right integer (- amount))))))) + + (deftransform %ash/right ((integer amount) (integer (constant-arg unsigned-byte))) + "Convert %ASH/RIGHT by constant back to ASH" + `(ash integer ,(- (lvar-value amount)))) + + (deftransform %ash/right ((integer amount) * (member -1 0) :node node) + ;; constant-fold large shifts + (let ((return-type (single-value-type (node-derived-type node)))) + (cond ((type= return-type (specifier-type '(eql 0))) + 0) + ((type= return-type (specifier-type '(eql -1))) + -1) + (t + (aver (csubtypep (lvar-type integer) (specifier-type 'sb!vm:signed-word))) + `(ash integer ,(- 1 sb!vm:n-word-bits)))))) + + (defun %ash/right-derive-type-aux (n-type shift same-arg) + (declare (ignore same-arg)) + (or (and (or (csubtypep n-type (specifier-type 'sb!vm:signed-word)) + (csubtypep n-type (specifier-type 'word))) + (csubtypep shift (specifier-type `(mod ,sb!vm:n-word-bits))) + (let ((n-low (numeric-type-low n-type)) + (n-high (numeric-type-high n-type)) + (s-low (numeric-type-low shift)) + (s-high (numeric-type-high shift))) + (make-numeric-type :class 'integer :complexp :real + :low (when n-low + (if (minusp n-low) + (ash n-low (- s-low)) + (ash n-low (- s-high)))) + :high (when n-high + (if (minusp n-high) + (ash n-high (- s-high)) + (ash n-high (- s-low))))))) + *universal-type*)) + (defoptimizer (%ash/right derive-type) ((n shift)) + (two-arg-derive-type n shift #'%ash/right-derive-type-aux #'%ash/right)) + ) ;;; Modular functions diff --git a/src/compiler/x86-64/arith.lisp b/src/compiler/x86-64/arith.lisp index 516f2d0..311eb78 100644 --- a/src/compiler/x86-64/arith.lisp +++ b/src/compiler/x86-64/arith.lisp @@ -902,6 +902,52 @@ constant shift greater than word length"))) DONE)) +#!+ash-right-vops +(define-vop (fast-%ash/right/unsigned) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (unsigned-reg) :target result) + (amount :scs (unsigned-reg) :target rcx)) + (:arg-types unsigned-num unsigned-num) + (:results (result :scs (unsigned-reg) :from (:argument 0))) + (:result-types unsigned-num) + (:temporary (:sc signed-reg :offset rcx-offset :from (:argument 1)) rcx) + (:generator 4 + (move result number) + (move rcx amount) + (inst shr result :cl))) + +#!+ash-right-vops +(define-vop (fast-%ash/right/signed) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (signed-reg) :target result) + (amount :scs (unsigned-reg) :target rcx)) + (:arg-types signed-num unsigned-num) + (:results (result :scs (signed-reg) :from (:argument 0))) + (:result-types signed-num) + (:temporary (:sc signed-reg :offset rcx-offset :from (:argument 1)) rcx) + (:generator 4 + (move result number) + (move rcx amount) + (inst sar result :cl))) + +#!+ash-right-vops +(define-vop (fast-%ash/right/fixnum) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (any-reg) :target result) + (amount :scs (unsigned-reg) :target rcx)) + (:arg-types tagged-num unsigned-num) + (:results (result :scs (any-reg) :from (:argument 0))) + (:result-types tagged-num) + (:temporary (:sc signed-reg :offset rcx-offset :from (:argument 1)) rcx) + (:generator 3 + (move result number) + (move rcx amount) + (inst sar result :cl) + (inst and result (lognot fixnum-tag-mask)))) + (in-package "SB!C") (defknown %lea (integer integer (member 1 2 4 8 16) (signed-byte 64)) diff --git a/src/compiler/x86/arith.lisp b/src/compiler/x86/arith.lisp index ab11950..79064d0 100644 --- a/src/compiler/x86/arith.lisp +++ b/src/compiler/x86/arith.lisp @@ -810,6 +810,52 @@ constant shift greater than word length"))) DONE)) +#!+ash-right-vops +(define-vop (fast-%ash/right/unsigned) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (unsigned-reg) :target result) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types unsigned-num unsigned-num) + (:results (result :scs (unsigned-reg) :from (:argument 0))) + (:result-types unsigned-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:generator 4 + (move result number) + (move ecx amount) + (inst shr result :cl))) + +#!+ash-right-vops +(define-vop (fast-%ash/right/signed) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (signed-reg) :target result) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types signed-num unsigned-num) + (:results (result :scs (signed-reg) :from (:argument 0))) + (:result-types signed-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:generator 4 + (move result number) + (move ecx amount) + (inst sar result :cl))) + +#!+ash-right-vops +(define-vop (fast-%ash/right/fixnum) + (:translate %ash/right) + (:policy :fast-safe) + (:args (number :scs (any-reg) :target result) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types tagged-num unsigned-num) + (:results (result :scs (any-reg) :from (:argument 0))) + (:result-types tagged-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:generator 3 + (move result number) + (move ecx amount) + (inst sar result :cl) + (inst and result (lognot fixnum-tag-mask)))) + (in-package "SB!C") (defknown %lea (integer integer (member 1 2 4 8) (signed-byte 32)) diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp index 555d3a2..e16ad55 100644 --- a/tests/compiler.pure.lisp +++ b/tests/compiler.pure.lisp @@ -1441,7 +1441,9 @@ (declare (type (alien (* (unsigned 8))) a) (type (unsigned-byte 32) i)) (deref a i)))) - (compiler-note () (error "The code is not optimized."))) + (compiler-note (c) + (unless (search "%ASH/RIGHT" (first (simple-condition-format-arguments c))) + (error "The code is not optimized.")))) (handler-case (compile nil '(lambda (x) @@ -4515,3 +4517,53 @@ (declare (type fixnum y z)) (catch (if x y z) (funcall f))))) (error "Where's my style-warning?"))) + +;; Smoke test for rightward shifts +(with-test (:name (:ash/right-signed)) + (let* ((f (compile nil `(lambda (x y) + (declare (type (mod ,(* 2 sb-vm:n-word-bits)) y) + (type sb-vm:signed-word x) + (optimize speed)) + (ash x (- y))))) + (max (ash most-positive-word -1)) + (min (- -1 max))) + (flet ((test (x y) + (assert (= (ash x (- y)) + (funcall f x y))))) + (dotimes (x 32) + (dotimes (y (* 2 sb-vm:n-word-bits)) + (test x y) + (test (- x) y) + (test (- max x) y) + (test (+ min x) y)))))) + +(with-test (:name (:ash/right-unsigned)) + (let ((f (compile nil `(lambda (x y) + (declare (type (mod ,(* 2 sb-vm:n-word-bits)) y) + (type word x) + (optimize speed)) + (ash x (- y))))) + (max most-positive-word)) + (flet ((test (x y) + (assert (= (ash x (- y)) + (funcall f x y))))) + (dotimes (x 32) + (dotimes (y (* 2 sb-vm:n-word-bits)) + (test x y) + (test (- max x) y)))))) + +(with-test (:name (:ash/right-fixnum)) + (let ((f (compile nil `(lambda (x y) + (declare (type (mod ,(* 2 sb-vm:n-word-bits)) y) + (type fixnum x) + (optimize speed)) + (ash x (- y)))))) + (flet ((test (x y) + (assert (= (ash x (- y)) + (funcall f x y))))) + (dotimes (x 32) + (dotimes (y (* 2 sb-vm:n-word-bits)) + (test x y) + (test (- x) y) + (test (- most-positive-fixnum x) y) + (test (+ most-negative-fixnum x) y))))))