(:note "inline ASH")
(:generator 2
(cond ((and (= amount 1) (not (location= number result)))
- (inst lea result (make-ea :dword :index number :scale 2)))
+ (inst lea result (make-ea :dword :base number :index number)))
((and (= amount 2) (not (location= number result)))
(inst lea result (make-ea :dword :index number :scale 4)))
((and (= amount 3) (not (location= number result)))
(inst lea result (make-ea :dword :index number :scale 8)))
(t
(move result number)
- (cond ((plusp amount)
- ;; We don't have to worry about overflow because of the
- ;; result type restriction.
- (inst shl result amount))
- (t
- ;; If the amount is greater than 31, only shift by 31. We
- ;; have to do this because the shift instructions only look
- ;; at the low five bits of the result.
- (inst sar result (min 31 (- amount)))
- ;; Fixnum correction.
- (inst and result #xfffffffc)))))))
+ (cond ((< -32 amount 32)
+ ;; this code is used both in ASH and ASH-SMOD30, so
+ ;; be careful
+ (if (plusp amount)
+ (inst shl result amount)
+ (progn
+ (inst sar result (- amount))
+ (inst and result (lognot fixnum-tag-mask)))))
+ ((plusp amount)
+ (if (sc-is result any-reg)
+ (inst xor result result)
+ (inst mov result 0)))
+ (t (inst sar result 31)
+ (inst and result (lognot fixnum-tag-mask))))))))
(define-vop (fast-ash-left/fixnum=>fixnum)
(:translate ash)
(:note "inline ASH")
(:generator 3
(cond ((and (= amount 1) (not (location= number result)))
- (inst lea result (make-ea :dword :index number :scale 2)))
+ (inst lea result (make-ea :dword :base number :index number)))
((and (= amount 2) (not (location= number result)))
(inst lea result (make-ea :dword :index number :scale 4)))
((and (= amount 3) (not (location= number result)))
(:note "inline ASH")
(:generator 3
(cond ((and (= amount 1) (not (location= number result)))
- (inst lea result (make-ea :dword :index number :scale 2)))
+ (inst lea result (make-ea :dword :base number :index number)))
((and (= amount 2) (not (location= number result)))
(inst lea result (make-ea :dword :index number :scale 4)))
((and (= amount 3) (not (location= number result)))
(:translate logcount)
(:note "inline (unsigned-byte 32) logcount")
(:policy :fast-safe)
- (:args (arg :scs (unsigned-reg)))
+ (:args (arg :scs (unsigned-reg) :target result))
(:arg-types unsigned-num)
(:results (result :scs (unsigned-reg)))
(:result-types positive-fixnum)
- (:temporary (:sc unsigned-reg :from (:argument 0)) temp)
- (:generator 30
+ (:temporary (:sc unsigned-reg) temp)
+ (:generator 14
+ ;; See the comments below for how the algorithm works. The tricks
+ ;; used can be found for example in AMD's software optimization
+ ;; guide or at "http://www.hackersdelight.org/HDcode/pop.cc" in the
+ ;; function "pop1".
+ ;; Calculate 2-bit sums. Note that the value of a two-digit binary
+ ;; number is the sum of the right digit and twice the left digit.
+ ;; Thus we can calculate the sum of the two digits by shifting the
+ ;; left digit to the right position and doing a two-bit subtraction.
+ ;; This subtraction will never create a borrow and thus can be made
+ ;; on all 16 2-digit numbers at once.
(move result arg)
-
- (inst mov temp result)
- (inst shr temp 1)
+ (move temp arg)
+ (inst shr result 1)
(inst and result #x55555555)
- (inst and temp #x55555555)
- (inst add result temp)
-
- (inst mov temp result)
+ (inst sub temp result)
+ ;; Calculate 4-bit sums by straightforward shift, mask and add.
+ ;; Note that we shift the source operand of the MOV and not its
+ ;; destination so that the SHR and the MOV can execute in the same
+ ;; clock cycle.
+ (inst mov result temp)
(inst shr temp 2)
(inst and result #x33333333)
(inst and temp #x33333333)
(inst add result temp)
-
+ ;; Calculate 8-bit sums. Since each sum is at most 8, which fits
+ ;; into 4 bits, we can apply the mask after the addition, saving one
+ ;; instruction.
(inst mov temp result)
- (inst shr temp 4)
- (inst and result #x0f0f0f0f)
- (inst and temp #x0f0f0f0f)
+ (inst shr result 4)
(inst add result temp)
-
+ (inst and result #x0f0f0f0f)
+ ;; Calculate the two 16-bit sums and the 32-bit sum. No masking is
+ ;; necessary inbetween since the final sum is at most 32 which fits
+ ;; into 6 bits.
(inst mov temp result)
- (inst shr temp 8)
- (inst and result #x00ff00ff)
- (inst and temp #x00ff00ff)
+ (inst shr result 8)
(inst add result temp)
-
(inst mov temp result)
- (inst shr temp 16)
- (inst and result #x0000ffff)
- (inst and temp #x0000ffff)
- (inst add result temp)))
+ (inst shr result 16)
+ (inst add result temp)
+ (inst and result #xff)))
\f
;;;; binary conditional VOPs
,(symbolicate "FAST-CONDITIONAL" suffix))
(:translate logtest)
(:generator ,cost
- (inst test x ,(if (eq suffix '-c/fixnum)
- '(fixnumize y)
- 'y))
+ (emit-optimized-test-inst x
+ ,(if (eq suffix '-c/fixnum)
+ '(fixnumize y)
+ 'y))
(inst jmp (if not-p :e :ne) target)))))))
(define-logtest-vops))
(defknown %logbitp (integer unsigned-byte) boolean
- (movable foldable flushable))
+ (movable foldable flushable always-translatable))
-(defun %logbitp (index integer)
+;;; only for constant folding within the compiler
+(defun %logbitp (integer index)
(logbitp index integer))
;;; too much work to do the non-constant case (maybe?)
(define-vop (fast-logbitp-c/fixnum fast-conditional-c/fixnum)
(:translate %logbitp)
+ (:arg-types tagged-num (:constant (integer 0 29)))
(:generator 4
- (aver (<= y 29))
(inst bt x (+ y n-fixnum-tag-bits))
(inst jmp (if not-p :nc :c) target)))
(define-vop (fast-logbitp/signed fast-conditional/signed)
+ (:args (x :scs (signed-reg signed-stack))
+ (y :scs (signed-reg)))
(:translate %logbitp)
(:generator 6
(inst bt x y)
(define-vop (fast-logbitp-c/signed fast-conditional-c/signed)
(:translate %logbitp)
+ (:arg-types signed-num (:constant (integer 0 31)))
(:generator 5
(inst bt x y)
(inst jmp (if not-p :nc :c) target)))
(define-vop (fast-logbitp/unsigned fast-conditional/unsigned)
+ (:args (x :scs (unsigned-reg unsigned-stack))
+ (y :scs (unsigned-reg)))
(:translate %logbitp)
(:generator 6
(inst bt x y)
(define-vop (fast-logbitp-c/unsigned fast-conditional-c/unsigned)
(:translate %logbitp)
+ (:arg-types unsigned-num (:constant (integer 0 31)))
(:generator 5
(inst bt x y)
(inst jmp (if not-p :nc :c) target)))
(define-full-reffer bignum-ref * bignum-digits-offset other-pointer-lowtag
(unsigned-reg) unsigned-num sb!bignum:%bignum-ref)
-
+(define-full-reffer+offset bignum-ref-with-offset *
+ bignum-digits-offset other-pointer-lowtag
+ (unsigned-reg) unsigned-num sb!bignum:%bignum-ref-with-offset)
(define-full-setter bignum-set * bignum-digits-offset other-pointer-lowtag
(unsigned-reg) unsigned-num sb!bignum:%bignum-set)
:load-if (not (and (sc-is result unsigned-stack)
(location= digit result)))))
(:result-types unsigned-num)
- (:generator 1
+ (:generator 2
(move result digit)
(move ecx count)
(inst sar result :cl)))
+(define-vop (digit-ashr/c)
+ (:translate sb!bignum:%ashr)
+ (:policy :fast-safe)
+ (:args (digit :scs (unsigned-reg unsigned-stack) :target result))
+ (:arg-types unsigned-num (:constant (integer 0 31)))
+ (:info count)
+ (:results (result :scs (unsigned-reg) :from (:argument 0)
+ :load-if (not (and (sc-is result unsigned-stack)
+ (location= digit result)))))
+ (:result-types unsigned-num)
+ (:generator 1
+ (move result digit)
+ (inst sar result count)))
+
(define-vop (digit-lshr digit-ashr)
(:translate sb!bignum:%digit-logical-shift-right)
(:generator 1
(:results (y :scs (unsigned-reg) :from (:eval 0)))
(:result-types unsigned-num)
(:generator 50
- (inst mov k (make-ea :dword :base state
- :disp (- (* (+ 2 vector-data-offset)
- n-word-bytes)
- other-pointer-lowtag)))
+ (loadw k state (+ 2 vector-data-offset) other-pointer-lowtag)
(inst cmp k 624)
(inst jmp :ne no-update)
(inst mov tmp state) ; The state is passed in EAX.
(inst xor k k)
NO-UPDATE
;; y = ptgfsr[k++];
- (inst mov y (make-ea :dword :base state :index k :scale 4
- :disp (- (* (+ 3 vector-data-offset)
- n-word-bytes)
- other-pointer-lowtag)))
+ (inst mov y (make-ea-for-vector-data state :index k :offset 3))
;; y ^= (y >> 11);
(inst shr y 11)
- (inst xor y (make-ea :dword :base state :index k :scale 4
- :disp (- (* (+ 3 vector-data-offset)
- n-word-bytes)
- other-pointer-lowtag)))
+ (inst xor y (make-ea-for-vector-data state :index k :offset 3))
;; y ^= (y << 7) & #x9d2c5680
(inst mov tmp y)
(inst inc k)
(inst shl tmp 7)
- (inst mov (make-ea :dword :base state
- :disp (- (* (+ 2 vector-data-offset)
- n-word-bytes)
- other-pointer-lowtag))
- k)
+ (storew k state (+ 2 vector-data-offset) other-pointer-lowtag)
(inst and tmp #x9d2c5680)
(inst xor y tmp)
;; y ^= (y << 15) & #xefc60000