(define-good-modular-fun logand)
(define-good-modular-fun logior)
+\f
+;;; There are two different ways the multiplier can be recoded. The
+;;; more obvious is to shift X by the correct amount for each bit set
+;;; in Y and to sum the results. But if there is a string of bits that
+;;; are all set, you can add X shifted by one more then the bit
+;;; position of the first set bit and subtract X shifted by the bit
+;;; position of the last set bit. We can't use this second method when
+;;; the high order bit is bit 31 because shifting by 32 doesn't work
+;;; too well.
+(defun ub32-strength-reduce-constant-multiply (arg num)
+ (declare (type (unsigned-byte 32) numb))
+ (let ((adds 0) (shifts 0)
+ (result nil) first-one)
+ (labels ((tub32 (x) `(truly-the (unsigned-byte 32) ,x))
+ (add (next-factor)
+ (setf result
+ (tub32
+ (if result
+ (progn (incf adds) `(+ ,result ,(tub32 next-factor)))
+ next-factor)))))
+ (declare (inline add))
+ (dotimes (bitpos 32)
+ (if first-one
+ (when (not (logbitp bitpos num))
+ (add (if (= (1+ first-one) bitpos)
+ ;; There is only a single bit in the string.
+ (progn (incf shifts) `(ash ,arg ,first-one))
+ ;; There are at least two.
+ (progn
+ (incf adds)
+ (incf shifts 2)
+ `(- ,(tub32 `(ash ,arg ,bitpos))
+ ,(tub32 `(ash ,arg ,first-one))))))
+ (setf first-one nil))
+ (when (logbitp bitpos num)
+ (setf first-one bitpos))))
+ (when first-one
+ (cond ((= first-one 31))
+ ((= first-one 30) (incf shifts) (add `(ash ,arg 30)))
+ (t
+ (incf shifts 2)
+ (incf adds)
+ (add `(- ,(tub32 `(ash ,arg 31))
+ ,(tub32 `(ash ,arg ,first-one))))))
+ (incf shifts)
+ (add `(ash ,arg 31))))
+ (values result adds shifts)))
fixnum-additive-overflow-trap))
(emit-label no-overflow))))
+(define-vop (fast-*/fixnum=>fixnum fast-fixnum-binop)
+ (:temporary (:scs (non-descriptor-reg)) temp)
+ (:translate *)
+ (:generator 2
+ (inst srawi temp y 2)
+ (inst mullw r x temp)))
+
+(define-vop (fast-*-c/fixnum=>fixnum fast-fixnum-binop-c)
+ (:translate *)
+ (:arg-types tagged-num
+ (:constant (and (signed-byte 16) (not (integer 0 0)))))
+ (:generator 1
+ (inst mulli r x y)))
+
+(define-vop (fast-*-bigc/fixnum=>fixnum fast-fixnum-binop-c)
+ (:translate *)
+ (:arg-types tagged-num
+ (:constant (and fixnum (not (signed-byte 16)))))
+ (:temporary (:scs (non-descriptor-reg)) temp)
+ (:generator 1
+ (inst lr temp y)
+ (inst mullw r x temp)))
+
+(define-vop (fast-*/signed=>signed fast-signed-binop)
+ (:translate *)
+ (:generator 4
+ (inst mullw r x y)))
+(define-vop (fast-*-c/signed=>signed fast-signed-binop-c)
+ (:translate *)
+ (:generator 3
+ (inst mulli r x y)))
+
+(define-vop (fast-*/unsigned=>unsigned fast-unsigned-binop)
+ (:translate *)
+ (:generator 4
+ (inst mullw r x y)))
+
+(define-vop (fast-*-c/unsigned=>unsigned fast-unsigned-binop-c)
+ (:translate *)
+ (:generator 3
+ (inst mulli r x y)))
+\f
;;; Shifting
+(macrolet ((def (name sc-type type result-type cost)
+ `(define-vop (,name)
+ (:note "inline ASH")
+ (:translate ash)
+ (:args (number :scs (,sc-type))
+ (amount :scs (signed-reg unsigned-reg immediate)))
+ (:arg-types ,type positive-fixnum)
+ (:results (result :scs (,result-type)))
+ (:result-types ,type)
+ (:policy :fast-safe)
+ (:generator ,cost
+ (sc-case amount
+ ((signed-reg unsigned-reg)
+ (inst slw result number amount))
+ (immediate
+ (let ((amount (tn-value amount)))
+ (aver (> amount 0))
+ (inst slwi result number amount))))))))
+ ;; FIXME: There's the opportunity for a sneaky optimization here, I
+ ;; think: a FAST-ASH-LEFT-C/FIXNUM=>SIGNED vop. -- CSR, 2003-09-03
+ (def fast-ash-left/fixnum=>fixnum any-reg tagged-num any-reg 2)
+ (def fast-ash-left/signed=>signed signed-reg signed-num signed-reg 3)
+ (def fast-ash-left/unsigned=>unsigned unsigned-reg unsigned-num unsigned-reg 3))
+
(define-vop (fast-ash/unsigned=>unsigned)
(:note "inline ASH")
(:args (number :scs (unsigned-reg) :to :save)
(define-static-fun two-arg-and (x y) :translate logand)
(define-static-fun two-arg-ior (x y) :translate logior)
(define-static-fun two-arg-xor (x y) :translate logxor)
+\f
+(in-package "SB!C")
+
+(deftransform * ((x y)
+ ((unsigned-byte 32) (constant-arg (unsigned-byte 32)))
+ (unsigned-byte 32))
+ "recode as shifts and adds"
+ (let ((y (continuation-value y)))
+ (multiple-value-bind (result adds shifts)
+ (ub32-strength-reduce-constant-multiply 'x y)
+ (cond
+ ((typep y '(signed-byte 16))
+ ;; a mulli instruction has a latency of 5.
+ (when (> (+ adds shifts) 4)
+ (give-up-ir1-transform)))
+ (t
+ ;; a mullw instruction also has a latency of 5, plus two
+ ;; instructions (in general) to load the immediate into a
+ ;; register.
+ (when (> (+ adds shifts) 6)
+ (give-up-ir1-transform))))
+ (or result 0))))
(define-d-si-instruction (name op &key (fixup nil) (cost 1) other-dependencies)
(multiple-value-bind (other-reads other-writes) (classify-dependencies other-dependencies)
`(define-instruction ,name (segment rt ra si)
- (:declare (type (signed-byte 16)))
+ (:declare (type (or ,@(when fixup '(fixup))
+ (signed-byte 16)) si))
(:printer d-si ((op ,op)))
(:delay ,cost)
(:cost ,cost)
(let* ((high-half (ldb (byte 16 16) value))
(low-half (ldb (byte 16 0) value)))
(declare (type (unsigned-byte 16) high-half low-half))
- (cond ((if (logbitp 15 low-half) (= high-half #xffff) (zerop high-half))
- (inst li reg low-half))
+ (cond ((and (logbitp 15 low-half) (= high-half #xffff))
+ (inst li reg (dpb low-half (byte 16 0) -1)))
+ ((and (not (logbitp 15 low-half)) (zerop high-half))
+ (inst li reg low-half))
(t
- (inst lis reg high-half)
+ (inst lis reg (if (logbitp 15 high-half)
+ (dpb high-half (byte 16 0) -1)
+ high-half))
(unless (zerop low-half)
(inst ori reg reg low-half))))))
(fixup
(in-package "SB!C")
-;;; If both arguments and the result are (UNSIGNED-BYTE 32), try to
-;;; come up with a ``better'' multiplication using multiplier
-;;; recoding. There are two different ways the multiplier can be
-;;; recoded. The more obvious is to shift X by the correct amount for
-;;; each bit set in Y and to sum the results. But if there is a string
-;;; of bits that are all set, you can add X shifted by one more then
-;;; the bit position of the first set bit and subtract X shifted by
-;;; the bit position of the last set bit. We can't use this second
-;;; method when the high order bit is bit 31 because shifting by 32
-;;; doesn't work too well.
(deftransform * ((x y)
((unsigned-byte 32) (constant-arg (unsigned-byte 32)))
(unsigned-byte 32))
"recode as shifts and adds"
- (let ((y (continuation-value y))
- (adds 0)
- (shifts 0)
- (result nil)
- (first-one nil))
- (labels ((tub32 (x) `(truly-the (unsigned-byte 32) ,x))
- (add (next-factor)
- (setf result
- (tub32
- (if result
- (progn (incf adds) `(+ ,result ,(tub32 next-factor)))
- next-factor)))))
- (declare (inline add))
- (dotimes (bitpos 32)
- (if first-one
- (when (not (logbitp bitpos y))
- (add (if (= (1+ first-one) bitpos)
- ;; There is only a single bit in the string.
- (progn (incf shifts) `(ash x ,first-one))
- ;; There are at least two.
- (progn
- (incf adds)
- (incf shifts 2)
- `(- ,(tub32 `(ash x ,bitpos))
- ,(tub32 `(ash x ,first-one))))))
- (setf first-one nil))
- (when (logbitp bitpos y)
- (setf first-one bitpos))))
- (when first-one
- (cond ((= first-one 31))
- ((= first-one 30) (incf shifts) (add '(ash x 30)))
- (t
- (incf shifts 2)
- (incf adds)
- (add `(- ,(tub32 '(ash x 31)) ,(tub32 `(ash x ,first-one))))))
- (incf shifts)
- (add '(ash x 31))))
-
- (cond
- ;; we assume, perhaps foolishly, that good SPARCs don't have an
- ;; issue with multiplications. (Remember that there's a
- ;; different transform for converting x*2^k to a shift).
- ((member :sparc-64 *backend-subfeatures*) (give-up-ir1-transform))
- ((or (member :sparc-v9 *backend-subfeatures*)
- (member :sparc-v8 *backend-subfeatures*))
- ;; breakeven point as measured by Raymond Toy
- (when (> (+ adds shifts) 9)
- (give-up-ir1-transform))))
-
- (or result 0)))
+ (let ((y (continuation-value y)))
+ (multiple-value-bind (result adds shifts)
+ (ub32-strength-reduce-constant-multiply 'x y)
+ (cond
+ ;; we assume, perhaps foolishly, that good SPARCs don't have an
+ ;; issue with multiplications. (Remember that there's a
+ ;; different transform for converting x*2^k to a shift).
+ ((member :sparc-64 *backend-subfeatures*) (give-up-ir1-transform))
+ ((or (member :sparc-v9 *backend-subfeatures*)
+ (member :sparc-v8 *backend-subfeatures*))
+ ;; breakeven point as measured by Raymond Toy
+ (when (> (+ adds shifts) 9)
+ (give-up-ir1-transform))))
+ (or result 0))))
;; If we can prove that we have a right shift, just do the right shift
;; instead of calling the inline ASH which has to check for the
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.8.3.36"
+"0.8.3.37"