1.0.4.55: Optimized REPLACE and UB*-BASH-COPY routines
[sbcl.git] / src / compiler / generic / vm-tran.lisp
index e5073ff..f3791f0 100644 (file)
 (define-source-transform long-float-p (x) `(double-float-p ,x))
 
 (define-source-transform compiled-function-p (x)
-  `(functionp ,x))
+  #!-sb-eval
+  `(functionp ,x)
+  #!+sb-eval
+  (once-only ((x x))
+    `(and (functionp ,x)
+          (not (sb!eval:interpreted-function-p ,x)))))
 
 (define-source-transform char-int (x)
   `(char-code ,x))
 (deftransform abs ((x) (rational))
   '(if (< x 0) (- x) x))
 
+;;; We don't want to clutter the bignum code.
+#!+x86
+(define-source-transform sb!bignum:%bignum-ref (bignum index)
+  ;; KLUDGE: We use TRULY-THE here because even though the bignum code
+  ;; is (currently) compiled with (SAFETY 0), the compiler insists on
+  ;; inserting CAST nodes to ensure that INDEX is of the correct type.
+  ;; These CAST nodes do not generate any type checks, but they do
+  ;; interfere with the operation of FOLD-INDEX-ADDRESSING, below.
+  ;; This scenario is a problem for the more user-visible case of
+  ;; folding as well.  --njf, 2006-12-01
+  `(sb!bignum:%bignum-ref-with-offset ,bignum
+                                      (truly-the bignum-index ,index) 0))
+
+#!+x86
+(defun fold-index-addressing (fun-name element-size lowtag data-offset
+                              index offset &optional setter-p)
+  (multiple-value-bind (func index-args) (extract-fun-args index '(+ -) 2)
+    (destructuring-bind (x constant) index-args
+      (declare (ignorable x))
+      (unless (constant-lvar-p constant)
+        (give-up-ir1-transform))
+      (let ((value (lvar-value constant)))
+        (unless (and (integerp value)
+                     (sb!vm::foldable-constant-offset-p
+                      element-size lowtag data-offset
+                      (funcall func value (lvar-value offset))))
+          (give-up-ir1-transform "constant is too large for inlining"))
+        (splice-fun-args index func 2)
+        `(lambda (thing index off1 off2 ,@(when setter-p
+                                            '(value)))
+           (,fun-name thing index (,func off2 off1) ,@(when setter-p
+                                                        '(value))))))))
+
+#!+x86
+(deftransform sb!bignum:%bignum-ref-with-offset
+    ((bignum index offset) * * :node node)
+  (fold-index-addressing 'sb!bignum:%bignum-ref-with-offset
+                         sb!vm:n-word-bits sb!vm:other-pointer-lowtag
+                         sb!vm:bignum-digits-offset
+                         index offset))
+
+#!+x86
+(progn
+(define-source-transform sb!kernel:%vector-raw-bits (thing index)
+  `(sb!kernel:%raw-bits-with-offset ,thing ,index 2))
+
+(define-source-transform sb!kernel:%raw-bits (thing index)
+  `(sb!kernel:%raw-bits-with-offset ,thing ,index 0))
+
+(define-source-transform sb!kernel:%set-vector-raw-bits (thing index value)
+  `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 2 ,value))
+
+(define-source-transform sb!kernel:%set-raw-bits (thing index value)
+  `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 0 ,value))
+
+(deftransform sb!kernel:%raw-bits-with-offset ((thing index offset) * * :node node)
+  (fold-index-addressing 'sb!kernel:%raw-bits-with-offset
+                         sb!vm:n-word-bits sb!vm:other-pointer-lowtag
+                         0 index offset))
+
+(deftransform sb!kernel:%set-raw-bits-with-offset ((thing index offset value) * *)
+  (fold-index-addressing 'sb!kernel:%set-raw-bits-with-offset
+                         sb!vm:n-word-bits sb!vm:other-pointer-lowtag
+                         0 index offset t))
+) ; PROGN
+
 ;;; The layout is stored in slot 0.
 (define-source-transform %instance-layout (x)
   `(truly-the layout (%instance-ref ,x 0)))
 (define-source-transform %set-instance-layout (x val)
   `(%instance-set ,x 0 (the layout ,val)))
+(define-source-transform %funcallable-instance-layout (x)
+  `(truly-the layout (%funcallable-instance-info ,x 0)))
+(define-source-transform %set-funcallable-instance-layout (x val)
+  `(setf (%funcallable-instance-info ,x 0) (the layout ,val)))
 \f
 ;;;; character support
 
 ;;; In our implementation there are really only BASE-CHARs.
+#+nil
 (define-source-transform characterp (obj)
   `(base-char-p ,obj))
 \f
 ;;;; simplifying HAIRY-DATA-VECTOR-REF and HAIRY-DATA-VECTOR-SET
 
 (deftransform hairy-data-vector-ref ((string index) (simple-string t))
-  (let ((ctype (continuation-type string)))
+  (let ((ctype (lvar-type string)))
     (if (array-type-p ctype)
-       ;; the other transform will kick in, so that's OK
-       (give-up-ir1-transform)
-       `(etypecase string
-         ((simple-array character (*)) (data-vector-ref string index))
-         ((simple-array nil (*)) (data-vector-ref string index))))))
+        ;; the other transform will kick in, so that's OK
+        (give-up-ir1-transform)
+        `(etypecase string
+          ((simple-array character (*))
+           (data-vector-ref string index))
+          #!+sb-unicode
+          ((simple-array base-char (*))
+           (data-vector-ref string index))
+          ((simple-array nil (*))
+           (data-vector-ref string index))))))
 
-(deftransform hairy-data-vector-ref ((array index) (array t) * :important t)
+(deftransform hairy-data-vector-ref ((array index) (array t) *)
   "avoid runtime dispatch on array element type"
   (let ((element-ctype (extract-upgraded-element-type array))
-       (declared-element-ctype (extract-declared-element-type array)))
+        (declared-element-ctype (extract-declared-element-type array)))
     (declare (type ctype element-ctype))
     (when (eq *wild-type* element-ctype)
       (give-up-ir1-transform
     ;; WITH-ARRAY-DATA. Since WITH-ARRAY-DATA is implemented as a
     ;; macro, and macros aren't expanded in transform output, we have
     ;; to hand-expand it ourselves.)
-    (let ((element-type-specifier (type-specifier element-ctype)))
+    (let* ((element-type-specifier (type-specifier element-ctype)))
       `(multiple-value-bind (array index)
-          (%data-vector-and-index array index)
-        (declare (type (simple-array ,element-type-specifier 1) array))
-        ,(let ((bare-form '(data-vector-ref array index)))
-           (if (type= element-ctype declared-element-ctype)
-               bare-form
-               `(the ,(type-specifier declared-element-ctype)
-                     ,bare-form)))))))
-
-(deftransform data-vector-ref ((array index)
-                               (simple-array t))
-  (let ((array-type (continuation-type array)))
+           (%data-vector-and-index array index)
+         (declare (type (simple-array ,element-type-specifier 1) array))
+         ,(let ((bare-form '(data-vector-ref array index)))
+            (if (type= element-ctype declared-element-ctype)
+                bare-form
+                `(the ,(type-specifier declared-element-ctype)
+                      ,bare-form)))))))
+
+;;; Transform multi-dimensional array to one dimensional data vector
+;;; access.
+(deftransform data-vector-ref ((array index) (simple-array t))
+  (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let ((dims (array-type-dimensions array-type)))
                                      (%array-data-vector array))
                           index)))))
 
+;;; Transform data vector access to a form that opens up optimization
+;;; opportunities.
+#!+x86
+(deftransform data-vector-ref ((array index) ((or (simple-unboxed-array (*))
+                                                  simple-vector)
+                                              t))
+  (let ((array-type (lvar-type array)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
+           (saetp (find-saetp element-type)))
+      (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
+        (give-up-ir1-transform))
+      `(data-vector-ref-with-offset array index 0))))
+
+#!+x86
+(deftransform data-vector-ref-with-offset ((array index offset)
+                                           ((or (simple-unboxed-array (*))
+                                                simple-vector)
+                                            t t))
+  (let ((array-type (lvar-type array)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
+           (saetp (find-saetp element-type)))
+      (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits))
+      (fold-index-addressing 'data-vector-ref-with-offset
+                             (sb!vm:saetp-n-bits saetp)
+                             sb!vm:other-pointer-lowtag
+                             sb!vm:vector-data-offset
+                             index offset))))
+
 (deftransform hairy-data-vector-set ((string index new-value)
-                                    (simple-string t t))
-  (let ((ctype (continuation-type string)))
+                                     (simple-string t t))
+  (let ((ctype (lvar-type string)))
     (if (array-type-p ctype)
-       ;; the other transform will kick in, so that's OK
-       (give-up-ir1-transform)
-       `(etypecase string
-         ((simple-array character (*))
-          (data-vector-set string index new-value))
-         ((simple-array nil (*))
-          (data-vector-set string index new-value))))))
+        ;; the other transform will kick in, so that's OK
+        (give-up-ir1-transform)
+        `(etypecase string
+          ((simple-array character (*))
+           (data-vector-set string index new-value))
+          #!+sb-unicode
+          ((simple-array base-char (*))
+           (data-vector-set string index new-value))
+          ((simple-array nil (*))
+           (data-vector-set string index new-value))))))
 
 (deftransform hairy-data-vector-set ((array index new-value)
-                                    (array t t)
-                                    *
-                                    :important t)
+                                     (array t t)
+                                     *)
   "avoid runtime dispatch on array element type"
   (let ((element-ctype (extract-upgraded-element-type array))
-       (declared-element-ctype (extract-declared-element-type array)))
+        (declared-element-ctype (extract-declared-element-type array)))
     (declare (type ctype element-ctype))
     (when (eq *wild-type* element-ctype)
       (give-up-ir1-transform
        "Upgraded element type of array is not known at compile time."))
     (let ((element-type-specifier (type-specifier element-ctype)))
       `(multiple-value-bind (array index)
-          (%data-vector-and-index array index)
-        (declare (type (simple-array ,element-type-specifier 1) array)
-                 (type ,element-type-specifier new-value))
-        ,(if (type= element-ctype declared-element-ctype)
-             '(data-vector-set array index new-value)
-             `(truly-the ,(type-specifier declared-element-ctype)
-                (data-vector-set array index
-                 (the ,(type-specifier declared-element-ctype)
-                      new-value))))))))
+           (%data-vector-and-index array index)
+         (declare (type (simple-array ,element-type-specifier 1) array)
+                  (type ,element-type-specifier new-value))
+         ,(if (type= element-ctype declared-element-ctype)
+              '(data-vector-set array index new-value)
+              `(truly-the ,(type-specifier declared-element-ctype)
+                 (data-vector-set array index
+                  (the ,(type-specifier declared-element-ctype)
+                       new-value))))))))
 
+;;; Transform multi-dimensional array to one dimensional data vector
+;;; access.
 (deftransform data-vector-set ((array index new-value)
                                (simple-array t t))
-  (let ((array-type (continuation-type array)))
+  (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let ((dims (array-type-dimensions array-type)))
                           index
                           new-value)))))
 
+;;; Transform data vector access to a form that opens up optimization
+;;; opportunities.
+#!+x86
+(deftransform data-vector-set ((array index new-value)
+                               ((or (simple-unboxed-array (*)) simple-vector)
+                                t t))
+  (let ((array-type (lvar-type array)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
+           (saetp (find-saetp element-type)))
+      (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
+        (give-up-ir1-transform))
+      `(data-vector-set-with-offset array index 0 new-value))))
+
+#!+x86
+(deftransform data-vector-set-with-offset ((array index offset new-value)
+                                           ((or (simple-unboxed-array (*))
+                                                simple-vector)
+                                            t t t))
+  (let ((array-type (lvar-type array)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
+           (saetp (find-saetp element-type)))
+      (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits))
+      (fold-index-addressing 'data-vector-set-with-offset
+                             (sb!vm:saetp-n-bits saetp)
+                             sb!vm:other-pointer-lowtag
+                             sb!vm:vector-data-offset
+                             index offset t))))
+
 (defoptimizer (%data-vector-and-index derive-type) ((array index))
-  (let ((atype (continuation-type array)))
+  (let ((atype (lvar-type array)))
     (when (array-type-p atype)
       (values-specifier-type
        `(values (simple-array ,(type-specifier
                 index)))))
 
 (deftransform %data-vector-and-index ((%array %index)
-                                     (simple-array t)
-                                     *
-                                     :important t)
+                                      (simple-array t)
+                                      *)
   ;; KLUDGE: why the percent signs?  Well, ARRAY and INDEX are
   ;; respectively exported from the CL and SB!INT packages, which
   ;; means that they're visible to all sorts of things.  If the
 (macrolet
     ((frob (type bits)
        (let ((elements-per-word (truncate sb!vm:n-word-bits bits)))
-        `(progn
-           (deftransform data-vector-ref ((vector index)
-                                          (,type *))
-             `(multiple-value-bind (word bit)
-                  (floor index ,',elements-per-word)
-                (ldb ,(ecase sb!vm:target-byte-order
-                        (:little-endian '(byte ,bits (* bit ,bits)))
-                        (:big-endian '(byte ,bits (- sb!vm:n-word-bits
-                                                     (* (1+ bit) ,bits)))))
-                     (%raw-bits vector (+ word sb!vm:vector-data-offset)))))
-           (deftransform data-vector-set ((vector index new-value)
-                                          (,type * *))
-             `(multiple-value-bind (word bit)
-                  (floor index ,',elements-per-word)
-                (setf (ldb ,(ecase sb!vm:target-byte-order
-                              (:little-endian '(byte ,bits (* bit ,bits)))
-                              (:big-endian
-                               '(byte ,bits (- sb!vm:n-word-bits
-                                               (* (1+ bit) ,bits)))))
-                           (%raw-bits vector (+ word sb!vm:vector-data-offset)))
-                      new-value)))))))
+         `(progn
+            (deftransform data-vector-ref ((vector index)
+                                           (,type *))
+              `(multiple-value-bind (word bit)
+                   (floor index ,',elements-per-word)
+                 (ldb ,(ecase sb!vm:target-byte-order
+                         (:little-endian '(byte ,bits (* bit ,bits)))
+                         (:big-endian '(byte ,bits (- sb!vm:n-word-bits
+                                                      (* (1+ bit) ,bits)))))
+                      (%raw-bits vector (+ word sb!vm:vector-data-offset)))))
+            (deftransform data-vector-set ((vector index new-value)
+                                           (,type * *))
+              `(multiple-value-bind (word bit)
+                   (floor index ,',elements-per-word)
+                 (setf (ldb ,(ecase sb!vm:target-byte-order
+                               (:little-endian '(byte ,bits (* bit ,bits)))
+                               (:big-endian
+                                '(byte ,bits (- sb!vm:n-word-bits
+                                                (* (1+ bit) ,bits)))))
+                            (%raw-bits vector (+ word sb!vm:vector-data-offset)))
+                       new-value)))))))
   (frob simple-bit-vector 1)
   (frob (simple-array (unsigned-byte 2) (*)) 2)
   (frob (simple-array (unsigned-byte 4) (*)) 4))
 (macrolet ((def (bitfun wordfun)
              `(deftransform ,bitfun ((bit-array-1 bit-array-2 result-bit-array)
                                      (simple-bit-vector
-                                     simple-bit-vector
-                                     simple-bit-vector)
-                                    *
+                                      simple-bit-vector
+                                      simple-bit-vector)
+                                     *
                                      :node node :policy (>= speed space))
                 `(progn
                    ,@(unless (policy node (zerop safety))
                              '((unless (= (length bit-array-1)
-                                         (length bit-array-2)
+                                          (length bit-array-2)
                                           (length result-bit-array))
                                  (error "Argument and/or result bit arrays are not the same length:~
-                        ~%  ~S~%  ~S  ~%  ~S"
+                         ~%  ~S~%  ~S  ~%  ~S"
                                         bit-array-1
-                                       bit-array-2
-                                       result-bit-array))))
-                 (let ((length (length result-bit-array)))
-                   (if (= length 0)
-                       ;; We avoid doing anything to 0-length
-                       ;; bit-vectors, or rather, the memory that
-                       ;; follows them. Other divisible-by-32 cases
-                       ;; are handled by the (1- length), below.
-                       ;; CSR, 2002-04-24
-                       result-bit-array
-                       (do ((index sb!vm:vector-data-offset (1+ index))
-                            (end-1 (+ sb!vm:vector-data-offset
-                                      ;; bit-vectors of length 1-32
-                                      ;; need precisely one (SETF
-                                      ;; %RAW-BITS), done here in the
-                                      ;; epilogue. - CSR, 2002-04-24
-                                      (truncate (truly-the index (1- length))
-                                                sb!vm:n-word-bits))))
-                           ((= index end-1)
-                            (setf (%raw-bits result-bit-array index)
-                                  (,',wordfun (%raw-bits bit-array-1 index)
-                                              (%raw-bits bit-array-2 index)))
-                            result-bit-array)
-                         (declare (optimize (speed 3) (safety 0))
-                                  (type index index end-1))
-                         (setf (%raw-bits result-bit-array index)
-                               (,',wordfun (%raw-bits bit-array-1 index)
-                                           (%raw-bits bit-array-2 index))))))))))
- (def bit-and 32bit-logical-and)
- (def bit-ior 32bit-logical-or)
- (def bit-xor 32bit-logical-xor)
- (def bit-eqv 32bit-logical-eqv)
- (def bit-nand 32bit-logical-nand)
- (def bit-nor 32bit-logical-nor)
- (def bit-andc1 32bit-logical-andc1)
- (def bit-andc2 32bit-logical-andc2)
- (def bit-orc1 32bit-logical-orc1)
- (def bit-orc2 32bit-logical-orc2))
+                                        bit-array-2
+                                        result-bit-array))))
+                  (let ((length (length result-bit-array)))
+                    (if (= length 0)
+                        ;; We avoid doing anything to 0-length
+                        ;; bit-vectors, or rather, the memory that
+                        ;; follows them. Other divisible-by-32 cases
+                        ;; are handled by the (1- length), below.
+                        ;; CSR, 2002-04-24
+                        result-bit-array
+                        (do ((index sb!vm:vector-data-offset (1+ index))
+                             (end-1 (+ sb!vm:vector-data-offset
+                                       ;; bit-vectors of length 1-32
+                                       ;; need precisely one (SETF
+                                       ;; %RAW-BITS), done here in the
+                                       ;; epilogue. - CSR, 2002-04-24
+                                       (truncate (truly-the index (1- length))
+                                                 sb!vm:n-word-bits))))
+                            ((>= index end-1)
+                             (setf (%raw-bits result-bit-array index)
+                                   (,',wordfun (%raw-bits bit-array-1 index)
+                                               (%raw-bits bit-array-2 index)))
+                             result-bit-array)
+                          (declare (optimize (speed 3) (safety 0))
+                                   (type index index end-1))
+                          (setf (%raw-bits result-bit-array index)
+                                (,',wordfun (%raw-bits bit-array-1 index)
+                                            (%raw-bits bit-array-2 index))))))))))
+ (def bit-and word-logical-and)
+ (def bit-ior word-logical-or)
+ (def bit-xor word-logical-xor)
+ (def bit-eqv word-logical-eqv)
+ (def bit-nand word-logical-nand)
+ (def bit-nor word-logical-nor)
+ (def bit-andc1 word-logical-andc1)
+ (def bit-andc2 word-logical-andc2)
+ (def bit-orc1 word-logical-orc1)
+ (def bit-orc2 word-logical-orc2))
 
 (deftransform bit-not
-             ((bit-array result-bit-array)
-              (simple-bit-vector simple-bit-vector) *
-              :node node :policy (>= speed space))
+              ((bit-array result-bit-array)
+               (simple-bit-vector simple-bit-vector) *
+               :node node :policy (>= speed space))
   `(progn
      ,@(unless (policy node (zerop safety))
-        '((unless (= (length bit-array)
-                     (length result-bit-array))
-            (error "Argument and result bit arrays are not the same length:~
-                    ~%  ~S~%  ~S"
-                   bit-array result-bit-array))))
+         '((unless (= (length bit-array)
+                      (length result-bit-array))
+             (error "Argument and result bit arrays are not the same length:~
+                     ~%  ~S~%  ~S"
+                    bit-array result-bit-array))))
     (let ((length (length result-bit-array)))
       (if (= length 0)
-         ;; We avoid doing anything to 0-length bit-vectors, or
-         ;; rather, the memory that follows them. Other
-         ;; divisible-by-32 cases are handled by the (1- length),
-         ;; below.  CSR, 2002-04-24
-         result-bit-array
-         (do ((index sb!vm:vector-data-offset (1+ index))
-              (end-1 (+ sb!vm:vector-data-offset
-                        ;; bit-vectors of length 1-32 need precisely
-                        ;; one (SETF %RAW-BITS), done here in the
-                        ;; epilogue. - CSR, 2002-04-24
-                        (truncate (truly-the index (1- length))
-                                  sb!vm:n-word-bits))))
-             ((= index end-1)
-              (setf (%raw-bits result-bit-array index)
-                    (32bit-logical-not (%raw-bits bit-array index)))
-              result-bit-array)
-           (declare (optimize (speed 3) (safety 0))
-                    (type index index end-1))
-           (setf (%raw-bits result-bit-array index)
-                 (32bit-logical-not (%raw-bits bit-array index))))))))
+          ;; We avoid doing anything to 0-length bit-vectors, or rather,
+          ;; the memory that follows them. Other divisible-by
+          ;; n-word-bits cases are handled by the (1- length), below.
+          ;; CSR, 2002-04-24
+          result-bit-array
+          (do ((index sb!vm:vector-data-offset (1+ index))
+               (end-1 (+ sb!vm:vector-data-offset
+                         ;; bit-vectors of length 1 to n-word-bits need
+                         ;; precisely one (SETF %RAW-BITS), done here in
+                         ;; the epilogue. - CSR, 2002-04-24
+                         (truncate (truly-the index (1- length))
+                                   sb!vm:n-word-bits))))
+              ((>= index end-1)
+               (setf (%raw-bits result-bit-array index)
+                     (word-logical-not (%raw-bits bit-array index)))
+               result-bit-array)
+            (declare (optimize (speed 3) (safety 0))
+                     (type index index end-1))
+            (setf (%raw-bits result-bit-array index)
+                  (word-logical-not (%raw-bits bit-array index))))))))
 
 (deftransform bit-vector-= ((x y) (simple-bit-vector simple-bit-vector))
   `(and (= (length x) (length y))
         (let ((length (length x)))
-         (or (= length 0)
-             (do* ((i sb!vm:vector-data-offset (+ i 1))
-                   (end-1 (+ sb!vm:vector-data-offset
-                             (floor (1- length) sb!vm:n-word-bits))))
-                  ((= i end-1)
-                   (let* ((extra (mod length sb!vm:n-word-bits))
-                          (mask (1- (ash 1 extra)))
-                          (numx
-                           (logand
-                            (ash mask
-                                 ,(ecase sb!c:*backend-byte-order*
-                                    (:little-endian 0)
-                                    (:big-endian
-                                     '(- sb!vm:n-word-bits extra))))
-                            (%raw-bits x i)))
-                          (numy
-                           (logand
-                            (ash mask
-                                 ,(ecase sb!c:*backend-byte-order*
-                                    (:little-endian 0)
-                                    (:big-endian
-                                     '(- sb!vm:n-word-bits extra))))
-                            (%raw-bits y i))))
-                     (declare (type (integer 0 31) extra)
-                              (type (unsigned-byte 32) mask numx numy))
-                     (= numx numy)))
-               (declare (type index i end-1))
-               (let ((numx (%raw-bits x i))
-                     (numy (%raw-bits y i)))
-                 (declare (type (unsigned-byte 32) numx numy))
-                 (unless (= numx numy)
-                   (return nil))))))))
+          (or (= length 0)
+              (do* ((i sb!vm:vector-data-offset (+ i 1))
+                    (end-1 (+ sb!vm:vector-data-offset
+                              (floor (1- length) sb!vm:n-word-bits))))
+                   ((>= i end-1)
+                    (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits)))
+                           (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+                                      (- extra sb!vm:n-word-bits)))
+                           (numx
+                            (logand
+                             (ash mask
+                                  ,(ecase sb!c:*backend-byte-order*
+                                     (:little-endian 0)
+                                     (:big-endian
+                                      '(- sb!vm:n-word-bits extra))))
+                             (%raw-bits x i)))
+                           (numy
+                            (logand
+                             (ash mask
+                                  ,(ecase sb!c:*backend-byte-order*
+                                     (:little-endian 0)
+                                     (:big-endian
+                                      '(- sb!vm:n-word-bits extra))))
+                             (%raw-bits y i))))
+                      (declare (type (integer 1 #.sb!vm:n-word-bits) extra)
+                               (type sb!vm:word mask numx numy))
+                      (= numx numy)))
+                (declare (type index i end-1))
+                (let ((numx (%raw-bits x i))
+                      (numy (%raw-bits y i)))
+                  (declare (type sb!vm:word numx numy))
+                  (unless (= numx numy)
+                    (return nil))))))))
+
+(deftransform count ((item sequence) (bit simple-bit-vector) *
+                     :policy (>= speed space))
+  `(let ((length (length sequence)))
+    (if (zerop length)
+        0
+        (do ((index sb!vm:vector-data-offset (1+ index))
+             (count 0)
+             (end-1 (+ sb!vm:vector-data-offset
+                       (truncate (truly-the index (1- length))
+                                 sb!vm:n-word-bits))))
+            ((>= index end-1)
+             (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits)))
+                    (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+                               (- extra sb!vm:n-word-bits)))
+                    (bits (logand (ash mask
+                                       ,(ecase sb!c:*backend-byte-order*
+                                               (:little-endian 0)
+                                               (:big-endian
+                                                '(- sb!vm:n-word-bits extra))))
+                                  (%raw-bits sequence index))))
+               (declare (type (integer 1 #.sb!vm:n-word-bits) extra))
+               (declare (type sb!vm:word mask bits))
+               (incf count (logcount bits))
+               ,(if (constant-lvar-p item)
+                    (if (zerop (lvar-value item))
+                        '(- length count)
+                        'count)
+                    '(if (zerop item)
+                         (- length count)
+                         count))))
+          (declare (type index index count end-1)
+                   (optimize (speed 3) (safety 0)))
+          (incf count (logcount (%raw-bits sequence index)))))))
 
 (deftransform fill ((sequence item) (simple-bit-vector bit) *
-                   :policy (>= speed space))
-  (let ((value (if (constant-continuation-p item)
-                  (if (= (continuation-value item) 0)
-                      0
-                      #.(1- (ash 1 32)))
-                  `(if (= item 0) 0 #.(1- (ash 1 32))))))
+                    :policy (>= speed space))
+  (let ((value (if (constant-lvar-p item)
+                   (if (= (lvar-value item) 0)
+                       0
+                       #.(1- (ash 1 sb!vm:n-word-bits)))
+                   `(if (= item 0) 0 #.(1- (ash 1 sb!vm:n-word-bits))))))
     `(let ((length (length sequence))
-          (value ,value))
+           (value ,value))
        (if (= length 0)
-          sequence
-          (do ((index sb!vm:vector-data-offset (1+ index))
-               (end-1 (+ sb!vm:vector-data-offset
-                         ;; bit-vectors of length 1-32 need precisely
-                         ;; one (SETF %RAW-BITS), done here in the
-                         ;; epilogue. - CSR, 2002-04-24
-                         (truncate (truly-the index (1- length))
-                                   sb!vm:n-word-bits))))
-              ((= index end-1)
-               (setf (%raw-bits sequence index) value)
-               sequence)
-            (declare (optimize (speed 3) (safety 0))
-                     (type index index end-1))
-            (setf (%raw-bits sequence index) value))))))
+           sequence
+           (do ((index sb!vm:vector-data-offset (1+ index))
+                (end-1 (+ sb!vm:vector-data-offset
+                          ;; bit-vectors of length 1 to n-word-bits need
+                          ;; precisely one (SETF %RAW-BITS), done here
+                          ;; in the epilogue. - CSR, 2002-04-24
+                          (truncate (truly-the index (1- length))
+                                    sb!vm:n-word-bits))))
+               ((>= index end-1)
+                (setf (%raw-bits sequence index) value)
+                sequence)
+             (declare (optimize (speed 3) (safety 0))
+                      (type index index end-1))
+             (setf (%raw-bits sequence index) value))))))
 
 (deftransform fill ((sequence item) (simple-base-string base-char) *
-                   :policy (>= speed space))
-  (let ((value (if (constant-continuation-p item)
-                  (let* ((char (continuation-value item))
-                         (code (sb!xc:char-code char)))
-                    (logior code (ash code 8) (ash code 16) (ash code 24)))
-                  `(let ((code (sb!xc:char-code item)))
-                    (logior code (ash code 8) (ash code 16) (ash code 24))))))
+                    :policy (>= speed space))
+  (let ((value (if (constant-lvar-p item)
+                   (let* ((char (lvar-value item))
+                          (code (sb!xc:char-code char))
+                          (accum 0))
+                     (dotimes (i sb!vm:n-word-bytes accum)
+                       (setf accum (logior accum (ash code (* 8 i))))))
+                   `(let ((code (sb!xc:char-code item)))
+                     (logior ,@(loop for i from 0 below sb!vm:n-word-bytes
+                                     collect `(ash code ,(* 8 i))))))))
     `(let ((length (length sequence))
-          (value ,value))
+           (value ,value))
       (multiple-value-bind (times rem)
-         (truncate length 4)
-       (do ((index sb!vm:vector-data-offset (1+ index))
-            (end (+ times sb!vm:vector-data-offset)))
-           ((= index end)
-            (let ((place (* times 4)))
-              (declare (fixnum place))
-              (dotimes (j rem sequence)
-                (declare (index j))
-                (setf (schar sequence (the index (+ place j))) item))))
-         (declare (optimize (speed 3) (safety 0))
-                  (type index index))
-         (setf (%raw-bits sequence index) value))))))
+          (truncate length sb!vm:n-word-bytes)
+        (do ((index sb!vm:vector-data-offset (1+ index))
+             (end (+ times sb!vm:vector-data-offset)))
+            ((>= index end)
+             (let ((place (* times sb!vm:n-word-bytes)))
+               (declare (fixnum place))
+               (dotimes (j rem sequence)
+                 (declare (index j))
+                 (setf (schar sequence (the index (+ place j))) item))))
+          (declare (optimize (speed 3) (safety 0))
+                   (type index index))
+          (setf (%raw-bits sequence index) value))))))
 \f
 ;;;; %BYTE-BLT
 
 ;;; SB!KERNEL and SB!SYS (e.g. i/o code). It's not clear that it's the
 ;;; ideal interface, though, and it probably deserves some thought.
 (deftransform %byte-blt ((src src-start dst dst-start dst-end)
-                        ((or (simple-unboxed-array (*)) system-area-pointer)
-                         index
-                         (or (simple-unboxed-array (*)) system-area-pointer)
-                         index
-                         index))
+                         ((or (simple-unboxed-array (*)) system-area-pointer)
+                          index
+                          (or (simple-unboxed-array (*)) system-area-pointer)
+                          index
+                          index))
   ;; FIXME: CMU CL had a hairier implementation of this (back when it
   ;; was still called (%PRIMITIVE BYTE-BLT). It had the small problem
   ;; that it didn't work for large (>16M) values of SRC-START or
   ;; acceptable for SQRT and COS, it's acceptable here, but this
   ;; should probably be checked. -- WHN
   '(flet ((sapify (thing)
-           (etypecase thing
-             (system-area-pointer thing)
-             ;; FIXME: The code here rather relies on the simple
-             ;; unboxed array here having byte-sized entries. That
-             ;; should be asserted explicitly, I just haven't found
-             ;; a concise way of doing it. (It would be nice to
-             ;; declare it in the DEFKNOWN too.)
-             ((simple-unboxed-array (*)) (vector-sap thing)))))
+            (etypecase thing
+              (system-area-pointer thing)
+              ;; FIXME: The code here rather relies on the simple
+              ;; unboxed array here having byte-sized entries. That
+              ;; should be asserted explicitly, I just haven't found
+              ;; a concise way of doing it. (It would be nice to
+              ;; declare it in the DEFKNOWN too.)
+              ((simple-unboxed-array (*)) (vector-sap thing)))))
      (declare (inline sapify))
      (without-gcing
       (memmove (sap+ (sapify dst) dst-start)
-              (sap+ (sapify src) src-start)
-              (- dst-end dst-start)))
+               (sap+ (sapify src) src-start)
+               (- dst-end dst-start)))
      (values)))
 \f
 ;;;; transforms for EQL of floating point values
 
 (deftransform eql ((x y) (double-float double-float))
   '(and (= (double-float-low-bits x) (double-float-low-bits y))
-       (= (double-float-high-bits x) (double-float-high-bits y))))
+        (= (double-float-high-bits x) (double-float-high-bits y))))
 
 \f
-;;;; 32-bit operations
-#!-x86 ; on X86 it is a modular function
-(deftransform lognot ((x) ((unsigned-byte 32)) *
-                      :node node
-                      :result result)
-  "32-bit implementation"
-  (let ((dest (continuation-dest result)))
-    (unless (and (combination-p dest)
-                 (eq (continuation-fun-name (combination-fun dest))
-                     'logand))
-      (give-up-ir1-transform))
-    (unless (some (lambda (arg)
-                    (csubtypep (continuation-type arg)
-                               (specifier-type '(unsigned-byte 32))))
-                  (combination-args dest))
-      (give-up-ir1-transform))
-    (setf (node-derived-type node)
-          (values-specifier-type '(values (unsigned-byte 32) &optional)))
-    '(32bit-logical-not x)))
+;;;; modular functions
+(define-good-modular-fun logand :unsigned)
+(define-good-modular-fun logior :unsigned)
+;;; FIXME: XOR? ANDC1, ANDC2?  -- CSR, 2003-09-16
+
+(macrolet
+    ((def (name class width)
+       (let ((type (ecase class
+                     (:unsigned 'unsigned-byte)
+                     (:signed 'signed-byte))))
+         `(progn
+            (defknown ,name (integer (integer 0)) (,type ,width)
+                      (foldable flushable movable))
+            (define-modular-fun-optimizer ash ((integer count) ,class :width width)
+              (when (and (<= width ,width)
+                         (or (and (constant-lvar-p count)
+                                  (plusp (lvar-value count)))
+                             (csubtypep (lvar-type count)
+                                        (specifier-type '(and unsigned-byte fixnum)))))
+                (cut-to-width integer ,class width)
+                ',name))
+            (setf (gethash ',name (modular-class-versions (find-modular-class ',class)))
+                  `(ash ,',width))))))
+  ;; This should really be dependent on SB!VM:N-WORD-BITS, but since we
+  ;; don't have a true Alpha64 port yet, we'll have to stick to
+  ;; SB!VM:N-MACHINE-WORD-BITS for the time being.  --njf, 2004-08-14
+  #!+#.(cl:if (cl:= 32 sb!vm:n-machine-word-bits) '(and) '(or))
+  (progn
+    #!+x86 (def sb!vm::ash-left-smod30 :signed 30)
+    (def sb!vm::ash-left-mod32 :unsigned 32))
+  #!+#.(cl:if (cl:= 64 sb!vm:n-machine-word-bits) '(and) '(or))
+  (progn
+    #!+x86-64 (def sb!vm::ash-left-smod61 :signed 61)
+    (def sb!vm::ash-left-mod64 :unsigned 64)))
+
+\f
+;;;; word-wise logical operations
+
+;;; These transforms assume the presence of modular arithmetic to
+;;; generate efficient code.
+
+(define-source-transform word-logical-not (x)
+  `(logand (lognot (the sb!vm:word ,x)) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-and ((x y))
+  '(logand x y))
+
+(deftransform word-logical-nand ((x y))
+  '(logand (lognand x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-or ((x y))
+  '(logior x y))
+
+(deftransform word-logical-nor ((x y))
+  '(logand (lognor x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-xor ((x y))
+  '(logxor x y))
+
+(deftransform word-logical-eqv ((x y))
+  '(logand (logeqv x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-orc1 ((x y))
+  '(logand (logorc1 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-orc2 ((x y))
+  '(logand (logorc2 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-andc1 ((x y))
+  '(logand (logandc1 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
+
+(deftransform word-logical-andc2 ((x y))
+  '(logand (logandc2 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
 
-(define-good-modular-fun logand)
-(define-good-modular-fun logior)
 \f
 ;;; There are two different ways the multiplier can be recoded. The
 ;;; more obvious is to shift X by the correct amount for each bit set
 (defun ub32-strength-reduce-constant-multiply (arg num)
   (declare (type (unsigned-byte 32) num))
   (let ((adds 0) (shifts 0)
-       (result nil) first-one)
-    (labels ((tub32 (x) `(truly-the (unsigned-byte 32) ,x))
-            (add (next-factor)
-              (setf result
-                    (tub32
-                     (if result
-                         (progn (incf adds) `(+ ,result ,(tub32 next-factor)))
-                         next-factor)))))
+        (result nil) first-one)
+    (labels ((add (next-factor)
+               (setf result
+                     (if result
+                         (progn (incf adds) `(+ ,result ,next-factor))
+                         next-factor))))
       (declare (inline add))
       (dotimes (bitpos 32)
-       (if first-one
-           (when (not (logbitp bitpos num))
-             (add (if (= (1+ first-one) bitpos)
-                      ;; There is only a single bit in the string.
-                      (progn (incf shifts) `(ash ,arg ,first-one))
-                      ;; There are at least two.
-                      (progn
-                        (incf adds)
-                        (incf shifts 2)
-                        `(- ,(tub32 `(ash ,arg ,bitpos))
-                            ,(tub32 `(ash ,arg ,first-one))))))
-             (setf first-one nil))
-           (when (logbitp bitpos num)
-             (setf first-one bitpos))))
+        (if first-one
+            (when (not (logbitp bitpos num))
+              (add (if (= (1+ first-one) bitpos)
+                       ;; There is only a single bit in the string.
+                       (progn (incf shifts) `(ash ,arg ,first-one))
+                       ;; There are at least two.
+                       (progn
+                         (incf adds)
+                         (incf shifts 2)
+                         `(- (ash ,arg ,bitpos)
+                             (ash ,arg ,first-one)))))
+              (setf first-one nil))
+            (when (logbitp bitpos num)
+              (setf first-one bitpos))))
       (when first-one
-       (cond ((= first-one 31))
-             ((= first-one 30) (incf shifts) (add `(ash ,arg 30)))
-             (t
-              (incf shifts 2)
-              (incf adds)
-              (add `(- ,(tub32 `(ash ,arg 31)) 
-                       ,(tub32 `(ash ,arg ,first-one))))))
-       (incf shifts)
-       (add `(ash ,arg 31))))
-    (values result adds shifts)))
+        (cond ((= first-one 31))
+              ((= first-one 30) (incf shifts) (add `(ash ,arg 30)))
+              (t
+               (incf shifts 2)
+               (incf adds)
+               (add `(- (ash ,arg 31)
+                        (ash ,arg ,first-one)))))
+        (incf shifts)
+        (add `(ash ,arg 31))))
+    (values (if (plusp adds)
+                `(logand ,result #.(1- (ash 1 32))) ; using modular arithmetic
+                result)
+            adds
+            shifts)))
+
+\f
+;;; Transform GET-LISP-OBJ-ADDRESS for constant immediates, since the normal
+;;; VOP can't handle them.
+
+(deftransform sb!vm::get-lisp-obj-address ((obj) ((constant-arg fixnum)))
+  (ash (lvar-value obj) sb!vm::n-fixnum-tag-bits))
+
+(deftransform sb!vm::get-lisp-obj-address ((obj) ((constant-arg character)))
+  (logior sb!vm::character-widetag
+          (ash (char-code (lvar-value obj)) sb!vm::n-widetag-bits)))