+(deftransform count ((item sequence) (bit simple-bit-vector) *
+ :policy (>= speed space))
+ `(let ((length (length sequence)))
+ (if (zerop length)
+ 0
+ (do ((index sb!vm:vector-data-offset (1+ index))
+ (count 0)
+ (end-1 (+ sb!vm:vector-data-offset
+ (truncate (truly-the index (1- length))
+ sb!vm:n-word-bits))))
+ ((= index end-1)
+ (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits)))
+ (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+ (- extra sb!vm:n-word-bits)))
+ (bits (logand (ash mask
+ ,(ecase sb!c:*backend-byte-order*
+ (:little-endian 0)
+ (:big-endian
+ '(- sb!vm:n-word-bits extra))))
+ (%raw-bits sequence index))))
+ (declare (type (integer 1 #.sb!vm:n-word-bits) extra))
+ (declare (type sb!vm:word mask bits))
+ ;; could consider LOGNOT for the zero case instead of
+ ;; doing the subtraction...
+ (incf count ,(if (constant-lvar-p item)
+ (if (zerop (lvar-value item))
+ '(- extra (logcount bits))
+ '(logcount bits))
+ '(if (zerop item)
+ (- extra (logcount bits))
+ (logcount bits))))))
+ (declare (type index index count end-1)
+ (optimize (speed 3) (safety 0)))
+ (incf count ,(if (constant-lvar-p item)
+ (if (zerop (lvar-value item))
+ '(- sb!vm:n-word-bits (logcount (%raw-bits sequence index)))
+ '(logcount (%raw-bits sequence index)))
+ '(if (zerop item)
+ (- sb!vm:n-word-bits (logcount (%raw-bits sequence index)))
+ (logcount (%raw-bits sequence index)))))))))
+
+(deftransform fill ((sequence item) (simple-bit-vector bit) *
+ :policy (>= speed space))
+ (let ((value (if (constant-lvar-p item)
+ (if (= (lvar-value item) 0)
+ 0
+ #.(1- (ash 1 sb!vm:n-word-bits)))
+ `(if (= item 0) 0 #.(1- (ash 1 sb!vm:n-word-bits))))))
+ `(let ((length (length sequence))
+ (value ,value))
+ (if (= length 0)
+ sequence
+ (do ((index sb!vm:vector-data-offset (1+ index))
+ (end-1 (+ sb!vm:vector-data-offset
+ ;; bit-vectors of length 1 to n-word-bits need
+ ;; precisely one (SETF %RAW-BITS), done here
+ ;; in the epilogue. - CSR, 2002-04-24
+ (truncate (truly-the index (1- length))
+ sb!vm:n-word-bits))))
+ ((= index end-1)
+ (setf (%raw-bits sequence index) value)
+ sequence)
+ (declare (optimize (speed 3) (safety 0))
+ (type index index end-1))
+ (setf (%raw-bits sequence index) value))))))
+
+(deftransform fill ((sequence item) (simple-base-string base-char) *
+ :policy (>= speed space))
+ (let ((value (if (constant-lvar-p item)
+ (let* ((char (lvar-value item))
+ (code (sb!xc:char-code char))
+ (accum 0))
+ (dotimes (i sb!vm:n-word-bytes accum)
+ (setf accum (logior accum (ash code (* 8 i))))))
+ `(let ((code (sb!xc:char-code item)))
+ (logior ,@(loop for i from 0 below sb!vm:n-word-bytes
+ collect `(ash code ,(* 8 i))))))))
+ `(let ((length (length sequence))
+ (value ,value))
+ (multiple-value-bind (times rem)
+ (truncate length sb!vm:n-word-bytes)
+ (do ((index sb!vm:vector-data-offset (1+ index))
+ (end (+ times sb!vm:vector-data-offset)))
+ ((= index end)
+ (let ((place (* times sb!vm:n-word-bytes)))
+ (declare (fixnum place))
+ (dotimes (j rem sequence)
+ (declare (index j))
+ (setf (schar sequence (the index (+ place j))) item))))
+ (declare (optimize (speed 3) (safety 0))
+ (type index index))
+ (setf (%raw-bits sequence index) value))))))
+\f
+;;;; %BYTE-BLT
+
+;;; FIXME: The old CMU CL code used various COPY-TO/FROM-SYSTEM-AREA
+;;; stuff (with all the associated bit-index cruft and overflow
+;;; issues) even for byte moves. In SBCL, we're converting to byte
+;;; moves as problems are discovered with the old code, and this is
+;;; currently (ca. sbcl-0.6.12.30) the main interface for code in
+;;; SB!KERNEL and SB!SYS (e.g. i/o code). It's not clear that it's the
+;;; ideal interface, though, and it probably deserves some thought.
+(deftransform %byte-blt ((src src-start dst dst-start dst-end)
+ ((or (simple-unboxed-array (*)) system-area-pointer)
+ index
+ (or (simple-unboxed-array (*)) system-area-pointer)
+ index
+ index))
+ ;; FIXME: CMU CL had a hairier implementation of this (back when it
+ ;; was still called (%PRIMITIVE BYTE-BLT). It had the small problem
+ ;; that it didn't work for large (>16M) values of SRC-START or
+ ;; DST-START. However, it might have been more efficient. In
+ ;; particular, I don't really know how much the foreign function
+ ;; call costs us here. My guess is that if the overhead is