From 2f25121d2f99afb4840138dd7e86807ceee20696 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Thu, 4 Sep 2003 16:52:07 +0000 Subject: [PATCH] 0.8.3.37: Some more love and kisses to the ppc backend ... the strength reduction we perform on sparc multiplications is going to be common to most architectures, so ... factor out the reduction itself into a routine ... use it in a PPC deftransform for *, with suitable cutoffs ... some appropriate * vops, too Incidental cleanups in the ppc backend ... declare D-SI instructions' operands to have the appropriate type ... fix the shady dodgy dealing going on in the %LR macrofunction --- src/compiler/generic/vm-tran.lisp | 47 ++++++++++++++++++++ src/compiler/ppc/arith.lisp | 88 +++++++++++++++++++++++++++++++++++++ src/compiler/ppc/insts.lisp | 13 ++++-- src/compiler/sparc/arith.lisp | 74 ++++++------------------------- version.lisp-expr | 2 +- 5 files changed, 159 insertions(+), 65 deletions(-) diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index f6b32a1..df2dcb7 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -461,3 +461,50 @@ (define-good-modular-fun logand) (define-good-modular-fun logior) + +;;; There are two different ways the multiplier can be recoded. The +;;; more obvious is to shift X by the correct amount for each bit set +;;; in Y and to sum the results. But if there is a string of bits that +;;; are all set, you can add X shifted by one more then the bit +;;; position of the first set bit and subtract X shifted by the bit +;;; position of the last set bit. We can't use this second method when +;;; the high order bit is bit 31 because shifting by 32 doesn't work +;;; too well. +(defun ub32-strength-reduce-constant-multiply (arg num) + (declare (type (unsigned-byte 32) numb)) + (let ((adds 0) (shifts 0) + (result nil) first-one) + (labels ((tub32 (x) `(truly-the (unsigned-byte 32) ,x)) + (add (next-factor) + (setf result + (tub32 + (if result + (progn (incf adds) `(+ ,result ,(tub32 next-factor))) + next-factor))))) + (declare (inline add)) + (dotimes (bitpos 32) + (if first-one + (when (not (logbitp bitpos num)) + (add (if (= (1+ first-one) bitpos) + ;; There is only a single bit in the string. + (progn (incf shifts) `(ash ,arg ,first-one)) + ;; There are at least two. + (progn + (incf adds) + (incf shifts 2) + `(- ,(tub32 `(ash ,arg ,bitpos)) + ,(tub32 `(ash ,arg ,first-one)))))) + (setf first-one nil)) + (when (logbitp bitpos num) + (setf first-one bitpos)))) + (when first-one + (cond ((= first-one 31)) + ((= first-one 30) (incf shifts) (add `(ash ,arg 30))) + (t + (incf shifts 2) + (incf adds) + (add `(- ,(tub32 `(ash ,arg 31)) + ,(tub32 `(ash ,arg ,first-one)))))) + (incf shifts) + (add `(ash ,arg 31)))) + (values result adds shifts))) diff --git a/src/compiler/ppc/arith.lisp b/src/compiler/ppc/arith.lisp index 3d59350..9cc48ee 100644 --- a/src/compiler/ppc/arith.lisp +++ b/src/compiler/ppc/arith.lisp @@ -244,9 +244,75 @@ fixnum-additive-overflow-trap)) (emit-label no-overflow)))) +(define-vop (fast-*/fixnum=>fixnum fast-fixnum-binop) + (:temporary (:scs (non-descriptor-reg)) temp) + (:translate *) + (:generator 2 + (inst srawi temp y 2) + (inst mullw r x temp))) + +(define-vop (fast-*-c/fixnum=>fixnum fast-fixnum-binop-c) + (:translate *) + (:arg-types tagged-num + (:constant (and (signed-byte 16) (not (integer 0 0))))) + (:generator 1 + (inst mulli r x y))) + +(define-vop (fast-*-bigc/fixnum=>fixnum fast-fixnum-binop-c) + (:translate *) + (:arg-types tagged-num + (:constant (and fixnum (not (signed-byte 16))))) + (:temporary (:scs (non-descriptor-reg)) temp) + (:generator 1 + (inst lr temp y) + (inst mullw r x temp))) + +(define-vop (fast-*/signed=>signed fast-signed-binop) + (:translate *) + (:generator 4 + (inst mullw r x y))) +(define-vop (fast-*-c/signed=>signed fast-signed-binop-c) + (:translate *) + (:generator 3 + (inst mulli r x y))) + +(define-vop (fast-*/unsigned=>unsigned fast-unsigned-binop) + (:translate *) + (:generator 4 + (inst mullw r x y))) + +(define-vop (fast-*-c/unsigned=>unsigned fast-unsigned-binop-c) + (:translate *) + (:generator 3 + (inst mulli r x y))) + ;;; Shifting +(macrolet ((def (name sc-type type result-type cost) + `(define-vop (,name) + (:note "inline ASH") + (:translate ash) + (:args (number :scs (,sc-type)) + (amount :scs (signed-reg unsigned-reg immediate))) + (:arg-types ,type positive-fixnum) + (:results (result :scs (,result-type))) + (:result-types ,type) + (:policy :fast-safe) + (:generator ,cost + (sc-case amount + ((signed-reg unsigned-reg) + (inst slw result number amount)) + (immediate + (let ((amount (tn-value amount))) + (aver (> amount 0)) + (inst slwi result number amount)))))))) + ;; FIXME: There's the opportunity for a sneaky optimization here, I + ;; think: a FAST-ASH-LEFT-C/FIXNUM=>SIGNED vop. -- CSR, 2003-09-03 + (def fast-ash-left/fixnum=>fixnum any-reg tagged-num any-reg 2) + (def fast-ash-left/signed=>signed signed-reg signed-num signed-reg 3) + (def fast-ash-left/unsigned=>unsigned unsigned-reg unsigned-num unsigned-reg 3)) + (define-vop (fast-ash/unsigned=>unsigned) (:note "inline ASH") (:args (number :scs (unsigned-reg) :to :save) @@ -913,3 +979,25 @@ (define-static-fun two-arg-and (x y) :translate logand) (define-static-fun two-arg-ior (x y) :translate logior) (define-static-fun two-arg-xor (x y) :translate logxor) + +(in-package "SB!C") + +(deftransform * ((x y) + ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) + (unsigned-byte 32)) + "recode as shifts and adds" + (let ((y (continuation-value y))) + (multiple-value-bind (result adds shifts) + (ub32-strength-reduce-constant-multiply 'x y) + (cond + ((typep y '(signed-byte 16)) + ;; a mulli instruction has a latency of 5. + (when (> (+ adds shifts) 4) + (give-up-ir1-transform))) + (t + ;; a mullw instruction also has a latency of 5, plus two + ;; instructions (in general) to load the immediate into a + ;; register. + (when (> (+ adds shifts) 6) + (give-up-ir1-transform)))) + (or result 0)))) diff --git a/src/compiler/ppc/insts.lisp b/src/compiler/ppc/insts.lisp index f99ad4a..8c2203f 100644 --- a/src/compiler/ppc/insts.lisp +++ b/src/compiler/ppc/insts.lisp @@ -852,7 +852,8 @@ (define-d-si-instruction (name op &key (fixup nil) (cost 1) other-dependencies) (multiple-value-bind (other-reads other-writes) (classify-dependencies other-dependencies) `(define-instruction ,name (segment rt ra si) - (:declare (type (signed-byte 16))) + (:declare (type (or ,@(when fixup '(fixup)) + (signed-byte 16)) si)) (:printer d-si ((op ,op))) (:delay ,cost) (:cost ,cost) @@ -1968,10 +1969,14 @@ (let* ((high-half (ldb (byte 16 16) value)) (low-half (ldb (byte 16 0) value))) (declare (type (unsigned-byte 16) high-half low-half)) - (cond ((if (logbitp 15 low-half) (= high-half #xffff) (zerop high-half)) - (inst li reg low-half)) + (cond ((and (logbitp 15 low-half) (= high-half #xffff)) + (inst li reg (dpb low-half (byte 16 0) -1))) + ((and (not (logbitp 15 low-half)) (zerop high-half)) + (inst li reg low-half)) (t - (inst lis reg high-half) + (inst lis reg (if (logbitp 15 high-half) + (dpb high-half (byte 16 0) -1) + high-half)) (unless (zerop low-half) (inst ori reg reg low-half)))))) (fixup diff --git a/src/compiler/sparc/arith.lisp b/src/compiler/sparc/arith.lisp index 331a709..2669bf6 100644 --- a/src/compiler/sparc/arith.lisp +++ b/src/compiler/sparc/arith.lisp @@ -1252,70 +1252,24 @@ (in-package "SB!C") -;;; If both arguments and the result are (UNSIGNED-BYTE 32), try to -;;; come up with a ``better'' multiplication using multiplier -;;; recoding. There are two different ways the multiplier can be -;;; recoded. The more obvious is to shift X by the correct amount for -;;; each bit set in Y and to sum the results. But if there is a string -;;; of bits that are all set, you can add X shifted by one more then -;;; the bit position of the first set bit and subtract X shifted by -;;; the bit position of the last set bit. We can't use this second -;;; method when the high order bit is bit 31 because shifting by 32 -;;; doesn't work too well. (deftransform * ((x y) ((unsigned-byte 32) (constant-arg (unsigned-byte 32))) (unsigned-byte 32)) "recode as shifts and adds" - (let ((y (continuation-value y)) - (adds 0) - (shifts 0) - (result nil) - (first-one nil)) - (labels ((tub32 (x) `(truly-the (unsigned-byte 32) ,x)) - (add (next-factor) - (setf result - (tub32 - (if result - (progn (incf adds) `(+ ,result ,(tub32 next-factor))) - next-factor))))) - (declare (inline add)) - (dotimes (bitpos 32) - (if first-one - (when (not (logbitp bitpos y)) - (add (if (= (1+ first-one) bitpos) - ;; There is only a single bit in the string. - (progn (incf shifts) `(ash x ,first-one)) - ;; There are at least two. - (progn - (incf adds) - (incf shifts 2) - `(- ,(tub32 `(ash x ,bitpos)) - ,(tub32 `(ash x ,first-one)))))) - (setf first-one nil)) - (when (logbitp bitpos y) - (setf first-one bitpos)))) - (when first-one - (cond ((= first-one 31)) - ((= first-one 30) (incf shifts) (add '(ash x 30))) - (t - (incf shifts 2) - (incf adds) - (add `(- ,(tub32 '(ash x 31)) ,(tub32 `(ash x ,first-one)))))) - (incf shifts) - (add '(ash x 31)))) - - (cond - ;; we assume, perhaps foolishly, that good SPARCs don't have an - ;; issue with multiplications. (Remember that there's a - ;; different transform for converting x*2^k to a shift). - ((member :sparc-64 *backend-subfeatures*) (give-up-ir1-transform)) - ((or (member :sparc-v9 *backend-subfeatures*) - (member :sparc-v8 *backend-subfeatures*)) - ;; breakeven point as measured by Raymond Toy - (when (> (+ adds shifts) 9) - (give-up-ir1-transform)))) - - (or result 0))) + (let ((y (continuation-value y))) + (multiple-value-bind (result adds shifts) + (ub32-strength-reduce-constant-multiply 'x y) + (cond + ;; we assume, perhaps foolishly, that good SPARCs don't have an + ;; issue with multiplications. (Remember that there's a + ;; different transform for converting x*2^k to a shift). + ((member :sparc-64 *backend-subfeatures*) (give-up-ir1-transform)) + ((or (member :sparc-v9 *backend-subfeatures*) + (member :sparc-v8 *backend-subfeatures*)) + ;; breakeven point as measured by Raymond Toy + (when (> (+ adds shifts) 9) + (give-up-ir1-transform)))) + (or result 0)))) ;; If we can prove that we have a right shift, just do the right shift ;; instead of calling the inline ASH which has to check for the diff --git a/version.lisp-expr b/version.lisp-expr index 73b53e1..1696b3d 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.3.36" +"0.8.3.37" -- 1.7.10.4