(define-binop logior 2 or)
(define-binop logxor 2 xor))
-
;;; Special handling of add on the x86; can use lea to avoid a
;;; register load, otherwise it uses add.
(define-vop (fast-+/fixnum=>fixnum fast-safe-arith-op)
;; The result-type ensures us that this shift will not overflow.
(inst shl result :cl)))
-(define-vop (fast-ash-c)
+(define-vop (fast-ash-c/signed=>signed)
(:translate ash)
(:policy :fast-safe)
- (:args (number :scs (signed-reg unsigned-reg) :target result
- :load-if (not (and (sc-is number signed-stack unsigned-stack)
- (sc-is result signed-stack unsigned-stack)
+ (:args (number :scs (signed-reg) :target result
+ :load-if (not (and (sc-is number signed-stack)
+ (sc-is result signed-stack)
(location= number result)))))
(:info amount)
- (:arg-types (:or signed-num unsigned-num) (:constant integer))
- (:results (result :scs (signed-reg unsigned-reg)
- :load-if (not
- (and (sc-is number signed-stack unsigned-stack)
- (sc-is result signed-stack unsigned-stack)
- (location= number result)))))
- (:result-types (:or signed-num unsigned-num))
+ (:arg-types signed-num (:constant integer))
+ (:results (result :scs (signed-reg)
+ :load-if (not (and (sc-is number signed-stack)
+ (sc-is result signed-stack)
+ (location= number result)))))
+ (:result-types signed-num)
(:note "inline ASH")
(:generator 3
(cond ((and (= amount 1) (not (location= number result)))
(inst lea result (make-ea :dword :index number :scale 8)))
(t
(move result number)
- (cond ((plusp amount)
- ;; We don't have to worry about overflow because of the
- ;; result type restriction.
- (inst shl result amount))
- ((sc-is number signed-reg signed-stack)
- ;; If the amount is greater than 31, only shift by 31. We
- ;; have to do this because the shift instructions only look
- ;; at the low five bits of the result.
- (inst sar result (min 31 (- amount))))
- (t
- (inst shr result (min 31 (- amount)))))))))
+ (cond ((plusp amount) (inst shl result amount))
+ (t (inst sar result (min 31 (- amount)))))))))
-(define-vop (fast-ash-left)
+(define-vop (fast-ash-c/unsigned=>unsigned)
(:translate ash)
- (:args (number :scs (signed-reg unsigned-reg) :target result
- :load-if (not (and (sc-is number signed-stack unsigned-stack)
- (sc-is result signed-stack unsigned-stack)
+ (:policy :fast-safe)
+ (:args (number :scs (unsigned-reg) :target result
+ :load-if (not (and (sc-is number unsigned-stack)
+ (sc-is result unsigned-stack)
+ (location= number result)))))
+ (:info amount)
+ (:arg-types unsigned-num (:constant integer))
+ (:results (result :scs (unsigned-reg)
+ :load-if (not (and (sc-is number unsigned-stack)
+ (sc-is result unsigned-stack)
+ (location= number result)))))
+ (:result-types unsigned-num)
+ (:note "inline ASH")
+ (:generator 3
+ (cond ((and (= amount 1) (not (location= number result)))
+ (inst lea result (make-ea :dword :index number :scale 2)))
+ ((and (= amount 2) (not (location= number result)))
+ (inst lea result (make-ea :dword :index number :scale 4)))
+ ((and (= amount 3) (not (location= number result)))
+ (inst lea result (make-ea :dword :index number :scale 8)))
+ (t
+ (move result number)
+ (cond ((< -32 amount 32)
+ ;; this code is used both in ASH and ASH-MOD32, so
+ ;; be careful
+ (if (plusp amount)
+ (inst shl result amount)
+ (inst shr result (- amount))))
+ (t (inst xor result result)))))))
+
+(define-vop (fast-ash-left/signed=>signed)
+ (:translate ash)
+ (:args (number :scs (signed-reg) :target result
+ :load-if (not (and (sc-is number signed-stack)
+ (sc-is result signed-stack)
(location= number result))))
(amount :scs (unsigned-reg) :target ecx))
- (:arg-types (:or signed-num unsigned-num) positive-fixnum)
+ (:arg-types signed-num positive-fixnum)
(:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
- (:results (result :scs (signed-reg unsigned-reg) :from (:argument 0)
- :load-if (not
- (and (sc-is number signed-stack unsigned-stack)
- (sc-is result signed-stack unsigned-stack)
- (location= number result)))))
- (:result-types (:or signed-num unsigned-num))
+ (:results (result :scs (signed-reg) :from (:argument 0)
+ :load-if (not (and (sc-is number signed-stack)
+ (sc-is result signed-stack)
+ (location= number result)))))
+ (:result-types signed-num)
(:policy :fast-safe)
(:note "inline ASH")
(:generator 4
(move result number)
(move ecx amount)
- ;; The result-type ensures us that this shift will not overflow.
(inst shl result :cl)))
-(define-vop (fast-ash)
+(define-vop (fast-ash-left/unsigned=>unsigned)
(:translate ash)
+ (:args (number :scs (unsigned-reg) :target result
+ :load-if (not (and (sc-is number unsigned-stack)
+ (sc-is result unsigned-stack)
+ (location= number result))))
+ (amount :scs (unsigned-reg) :target ecx))
+ (:arg-types unsigned-num positive-fixnum)
+ (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
+ (:results (result :scs (unsigned-reg) :from (:argument 0)
+ :load-if (not (and (sc-is number unsigned-stack)
+ (sc-is result unsigned-stack)
+ (location= number result)))))
+ (:result-types unsigned-num)
(:policy :fast-safe)
- (:args (number :scs (signed-reg unsigned-reg) :target result)
+ (:note "inline ASH")
+ (:generator 4
+ (move result number)
+ (move ecx amount)
+ (inst shl result :cl)))
+
+(define-vop (fast-ash/signed=>signed)
+ (:translate ash)
+ (:policy :fast-safe)
+ (:args (number :scs (signed-reg) :target result)
(amount :scs (signed-reg) :target ecx))
- (:arg-types (:or signed-num unsigned-num) signed-num)
- (:results (result :scs (signed-reg unsigned-reg) :from (:argument 0)))
- (:result-types (:or signed-num unsigned-num))
+ (:arg-types signed-num signed-num)
+ (:results (result :scs (signed-reg) :from (:argument 0)))
+ (:result-types signed-num)
(:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
(:note "inline ASH")
(:generator 5
(move result number)
- (move ecx amount)
+ (move ecx amount)
(inst or ecx ecx)
(inst jmp :ns positive)
(inst neg ecx)
(inst jmp :be okay)
(inst mov ecx 31)
OKAY
- (sc-case number
- (signed-reg (inst sar result :cl))
- (unsigned-reg (inst shr result :cl)))
+ (inst sar result :cl)
(inst jmp done)
POSITIVE
(inst shl result :cl)
DONE))
+
+(define-vop (fast-ash/unsigned=>unsigned)
+ (:translate ash)
+ (:policy :fast-safe)
+ (:args (number :scs (unsigned-reg) :target result)
+ (amount :scs (signed-reg) :target ecx))
+ (:arg-types unsigned-num signed-num)
+ (:results (result :scs (unsigned-reg) :from (:argument 0)))
+ (:result-types unsigned-num)
+ (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
+ (:note "inline ASH")
+ (:generator 5
+ (move result number)
+ (move ecx amount)
+ (inst or ecx ecx)
+ (inst jmp :ns positive)
+ (inst neg ecx)
+ (inst cmp ecx 31)
+ (inst jmp :be okay)
+ (inst xor result result)
+ (inst jmp done)
+ OKAY
+ (inst shr result :cl)
+ (inst jmp done)
+
+ POSITIVE
+ ;; The result-type ensures us that this shift will not overflow.
+ (inst shl result :cl)
+
+ DONE))
+
+;;; FIXME: before making knowledge of this too public, it needs to be
+;;; fixed so that it's actually _faster_ than the non-CMOV version; at
+;;; least on my Celeron-XXX laptop, this version is marginally slower
+;;; than the above version with branches. -- CSR, 2003-09-04
+(define-vop (fast-cmov-ash/unsigned=>unsigned)
+ (:translate ash)
+ (:policy :fast-safe)
+ (:args (number :scs (unsigned-reg) :target result)
+ (amount :scs (signed-reg) :target ecx))
+ (:arg-types unsigned-num signed-num)
+ (:results (result :scs (unsigned-reg) :from (:argument 0)))
+ (:result-types unsigned-num)
+ (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
+ (:temporary (:sc any-reg :from (:eval 0) :to (:eval 1)) zero)
+ (:note "inline ASH")
+ (:guard (member :cmov *backend-subfeatures*))
+ (:generator 4
+ (move result number)
+ (move ecx amount)
+ (inst or ecx ecx)
+ (inst jmp :ns positive)
+ (inst neg ecx)
+ (inst xor zero zero)
+ (inst shr result :cl)
+ (inst cmp ecx 31)
+ (inst cmov :nbe result zero)
+ (inst jmp done)
+
+ POSITIVE
+ ;; The result-type ensures us that this shift will not overflow.
+ (inst shl result :cl)
+
+ DONE))
\f
;;; Note: documentation for this function is wrong - rtfm
(define-vop (signed-byte-32-len)
(:translate +-mod32))
(define-vop (fast-+-mod32-c/unsigned=>unsigned fast-+-c/unsigned=>unsigned)
(:translate +-mod32))
+(define-modular-fun --mod32 (x y) - 32)
+(define-vop (fast---mod32/unsigned=>unsigned fast--/unsigned=>unsigned)
+ (:translate --mod32))
+(define-vop (fast---mod32-c/unsigned=>unsigned fast---c/unsigned=>unsigned)
+ (:translate --mod32))
+
+(define-vop (fast-ash-left-mod32-c/unsigned=>unsigned
+ fast-ash-c/unsigned=>unsigned)
+ (:translate ash-left-mod32))
;;; logical operations
(define-modular-fun lognot-mod32 (x) lognot 32)
(define-vop (fast-logxor-mod32-c/unsigned=>unsigned
fast-logxor-c/unsigned=>unsigned)
(:translate logxor-mod32))
+
+(define-source-transform logeqv (&rest args)
+ (if (oddp (length args))
+ `(logxor ,@args)
+ `(lognot (logxor ,@args))))
+(define-source-transform logandc1 (x y)
+ `(logand (lognot ,x) ,y))
+(define-source-transform logandc2 (x y)
+ `(logand ,x (lognot ,y)))
+(define-source-transform logorc1 (x y)
+ `(logior (lognot ,x) ,y))
+(define-source-transform logorc2 (x y)
+ `(logior ,x (lognot ,y)))
+(define-source-transform lognor (x y)
+ `(lognot (logior ,x ,y)))
+(define-source-transform lognand (x y)
+ `(lognot (logand ,x ,y)))
\f
;;;; bignum stuff
(foldable flushable))
(defoptimizer (%lea derive-type) ((base index scale disp))
- (when (and (constant-continuation-p scale)
- (constant-continuation-p disp))
- (let ((scale (continuation-value scale))
- (disp (continuation-value disp))
- (base-type (continuation-type base))
- (index-type (continuation-type index)))
+ (when (and (constant-lvar-p scale)
+ (constant-lvar-p disp))
+ (let ((scale (lvar-value scale))
+ (disp (lvar-value disp))
+ (base-type (lvar-type base))
+ (index-type (lvar-type index)))
(when (and (numeric-type-p base-type)
(numeric-type-p index-type))
(let ((base-lo (numeric-type-low base-type))
(0
(let ((tmp (min 3 (aref condensed 1))))
(decf (aref condensed 1) tmp)
- `(truly-the (unsigned-byte 32)
+ `(logand #xffffffff
(%lea ,arg
,(decompose-multiplication
arg (ash (1- num) (- tmp)) (1- n-bits) (subseq condensed 1))
((1 2 3)
(let ((r0 (aref condensed 0)))
(incf (aref condensed 1) r0)
- `(truly-the (unsigned-byte 32)
+ `(logand #xffffffff
(%lea ,(decompose-multiplication
arg (- num (ash 1 r0)) (1- n-bits) (subseq condensed 1))
,arg
,(ash 1 r0) 0))))
(t (let ((r0 (aref condensed 0)))
(setf (aref condensed 0) 0)
- `(truly-the (unsigned-byte 32)
+ `(logand #xffffffff
(ash ,(decompose-multiplication
arg (ash num (- r0)) n-bits condensed)
,r0))))))
((= n-bits 0) 0)
((= num 1) arg)
((= n-bits 1)
- `(truly-the (unsigned-byte 32) (ash ,arg ,(1- (integer-length num)))))
+ `(logand #xffffffff (ash ,arg ,(1- (integer-length num)))))
((let ((max 0) (end 0))
(loop for i from 2 to (length condensed)
for j = (reduce #'+ (subseq condensed 0 i))
(let ((n2 (+ (ash 1 (1+ j))
(ash (ldb (byte (- 32 (1+ j)) (1+ j)) num) (1+ j))))
(n1 (1+ (ldb (byte (1+ j) 0) (lognot num)))))
- `(truly-the (unsigned-byte 32)
+ `(logand #xffffffff
(- ,(optimize-multiply arg n2) ,(optimize-multiply arg n1))))))))
((dolist (i '(9 5 3))
(when (integerp (/ num i))
(when (< (logcount (/ num i)) (logcount num))
(let ((x (gensym)))
(return `(let ((,x ,(optimize-multiply arg (/ num i))))
- (truly-the (unsigned-byte 32)
+ (logand #xffffffff
(%lea ,x ,x (1- ,i) 0)))))))))
(t (basic-decompose-multiplication arg num n-bits condensed))))
((unsigned-byte 32) (constant-arg (unsigned-byte 32)))
(unsigned-byte 32))
"recode as leas, shifts and adds"
- (let ((y (continuation-value y)))
+ (let ((y (lvar-value y)))
(cond
((= y (ash 1 (integer-length y)))
;; there's a generic transform for y = 2^k