0.8.21.5:
authorNathan Froyd <froydnj@cs.rice.edu>
Mon, 28 Mar 2005 18:54:50 +0000 (18:54 +0000)
committerNathan Froyd <froydnj@cs.rice.edu>
Mon, 28 Mar 2005 18:54:50 +0000 (18:54 +0000)
Enable faster REPLACE on declared specialized arrays.

Overview of necessary machinery:
* New %VECTOR-RAW-BITS and %SET-VECTOR-RAW-BITS functions/VOPs
  which automatically take into account VECTOR-DATA-OFFSET
  (eliminates tedium associated with previous bit-bashing code
   and makes things slightly faster).  It's not clear if the
  old %RAW-BITS and %SET-RAW-BITS functions need to remain;
* Generalize the old bit-bashing code to generate bit-bashers
  for differently sized "bytes" (1-bit, 2-bit, 4-bit, etc.);
* Add REPLACE transforms for most specialized array types
  (those with elements not larger than the word size);
* Replace various incantations of COPY-FROM-SYSTEM-AREA,
  COPY-TO-SYSTEM-AREA, BIT-BASH-COPY, etc. with their new
  width-aware equivalents (this accounts for the bulk of the
  changed files, if not the changed lines);
* Add systematic tests for UB*-BASH-{FILL,COPY};
* Add generalized SUBSEQ and COPY-SEQ transforms while we're
  at it (FILL would be nice to have, but is a little bit
  trickier to do in the general case).

These changes also open up the possibility of removing %BYTE-BLT
  from the sources.  Benefits: decrease in the number of
  WITHOUT-GCING forms required, less calling out to C, more of
  the system in Lisp, etc.  %BYTE-BLT remains in this version,
  but may be removed if there is sufficient support for its
  removal.

38 files changed:
NEWS
contrib/sb-md5/md5.lisp
contrib/sb-simple-streams/internal.lisp
package-data-list.lisp-expr
src/code/alpha-vm.lisp
src/code/bit-bash.lisp
src/code/debug-int.lisp
src/code/defsetfs.lisp
src/code/fd-stream.lisp
src/code/host-alieneval.lisp
src/code/hppa-vm.lisp
src/code/kernel.lisp
src/code/mips-vm.lisp
src/code/ppc-vm.lisp
src/code/run-program.lisp
src/code/sparc-vm.lisp
src/code/stream.lisp
src/code/target-c-call.lisp
src/code/x86-64-vm.lisp
src/code/x86-vm.lisp
src/compiler/alpha/array.lisp
src/compiler/generic/target-core.lisp
src/compiler/generic/vm-fndb.lisp
src/compiler/hppa/array.lisp
src/compiler/hppa/insts.lisp
src/compiler/mips/array.lisp
src/compiler/mips/insts.lisp
src/compiler/ppc/array.lisp
src/compiler/ppc/insts.lisp
src/compiler/seqtran.lisp
src/compiler/sparc/array.lisp
src/compiler/sparc/insts.lisp
src/compiler/x86-64/array.lisp
src/compiler/x86-64/insts.lisp
src/compiler/x86/array.lisp
src/compiler/x86/insts.lisp
tests/seq.impure.lisp
version.lisp-expr

diff --git a/NEWS b/NEWS
index ea7f290..e18ea00 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,9 @@ changes in sbcl-0.8.22 relative to sbcl-0.8.21:
   * incompatible change: the --noprogrammer option, deprecated since
     version 0.7.5, has been removed.  Please use the equivalent
     --disable-debugger option instead.
+  * optimization: REPLACE on declared (UNSIGNED-BYTE 8) vectors, as well
+    as other specialized array types, is much faster.  SUBSEQ and
+    COPY-SEQ on such arrays have also been sped up.
   * fixed inference of the upper bound of an iteration variable.
     (reported by Rajat Datta).
   * fixed bug 376: CONJUGATE type deriver.
index 12515fb..eb7c7d5 100644 (file)
@@ -277,11 +277,7 @@ starting from offset into the given 16 word MD5 block."
    block (* vm:vector-data-offset vm:word-bits)
    (* 64 vm:byte-bits))
   #+(and :sbcl :little-endian)
-  (sb-kernel:bit-bash-copy
-   buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-            (* offset sb-vm:n-byte-bits))
-   block (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-   (* 64 sb-vm:n-byte-bits))
+  (sb-kernel:ub8-bash-copy buffer offset block 0 64)
   #-(or (and :sbcl :little-endian) (and :cmu :little-endian))
   (loop for i of-type (integer 0 16) from 0
        for j of-type (integer 0 #.most-positive-fixnum)
@@ -306,11 +302,7 @@ offset into the given 16 word MD5 block."
    block (* vm:vector-data-offset vm:word-bits)
    (* 64 vm:byte-bits))
   #+(and :sbcl :little-endian)
-  (sb-kernel:bit-bash-copy
-   buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-            (* offset sb-vm:n-byte-bits))
-   block (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-   (* 64 sb-vm:n-byte-bits))
+  (sb-kernel:ub8-bash-copy buffer offset block 0 64)
   #-(or (and :sbcl :little-endian) (and :cmu :little-endian))
   (loop for i of-type (integer 0 16) from 0
        for j of-type (integer 0 #.most-positive-fixnum)
@@ -380,12 +372,7 @@ starting at buffer-offset."
             (* buffer-offset vm:byte-bits))
    (* count vm:byte-bits))
   #+sbcl
-  (sb-kernel:bit-bash-copy
-   from (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-          (* from-offset sb-vm:n-byte-bits))
-   buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits)
-            (* buffer-offset sb-vm:n-byte-bits))
-   (* count sb-vm:n-byte-bits))
+  (sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count)
   #-(or cmu sbcl)
   (etypecase from
     (simple-string
index e5e926e..a74aabb 100644 (file)
@@ -43,9 +43,9 @@
   (declare (type simple-stream-buffer src dst)
           (type fixnum soff doff length))
   (sb-sys:without-gcing ;; is this necessary??
-   (sb-kernel:system-area-copy (buffer-sap src) (* soff 8)
-                               (buffer-sap dst) (* doff 8)
-                               (* length 8))))
+   (sb-kernel:system-area-ub8-copy (buffer-sap src) soff
+                                   (buffer-sap dst) doff
+                                   length)))
 
 (defun allocate-buffer (size)
   (if (= size sb-impl::bytes-per-buffer)
index 49b56e7..2f1ba5f 100644 (file)
@@ -1098,7 +1098,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%MAKE-RATIO" "%MAP" "%MAP-TO-SIMPLE-VECTOR-ARITY-1"
                "%MAP-TO-LIST-ARITY-1" "%MAP-TO-NIL-ON-SEQUENCE"
                "%MAP-TO-NIL-ON-SIMPLE-VECTOR" "%MAP-TO-NIL-ON-VECTOR"
-               "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH" "%RAW-BITS"
+               "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH"
+               "%RAW-BITS" "%VECTOR-RAW-BITS"
                "%RAW-REF-COMPLEX-DOUBLE" "%RAW-REF-COMPLEX-LONG"
                "%RAW-REF-COMPLEX-SINGLE" "%RAW-REF-DOUBLE"
                "%RAW-REF-LONG" "%RAW-REF-SINGLE"
@@ -1106,7 +1107,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%RAW-SET-COMPLEX-SINGLE" "%RAW-SET-DOUBLE"
                "%RAW-SET-LONG" "%RAW-SET-SINGLE" "%SCALB" "%SCALBN"
                "%SET-ARRAY-DIMENSION" "%SET-FUNCALLABLE-INSTANCE-FUN"
-               "%SET-FUNCALLABLE-INSTANCE-INFO" "%SET-RAW-BITS"
+               "%SET-FUNCALLABLE-INSTANCE-INFO"
+               "%SET-RAW-BITS" "%SET-VECTOR-RAW-BITS"
                "%SET-SAP-REF-16" "%SET-SAP-REF-32" "%SET-SAP-REF-64"
                "%SET-SAP-REF-WORD" "%SET-SAP-REF-8" "%SET-SAP-REF-DOUBLE"
                "%SET-SAP-REF-LONG" "%SET-SAP-REF-SAP"
@@ -1144,7 +1146,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "ASH-INDEX" "ASSERT-ERROR"
                #!+sb-unicode "BASE-CHAR-P"
                "BASE-STRING-P"
-               "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX"
+               "BINDING-STACK-POINTER-SAP" "BIT-INDEX"
                "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE"
                "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT"
                "CALLABLE" "CASE-BODY-ERROR"
@@ -1166,8 +1168,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "CONS-TYPE-CDR-TYPE" "CONS-TYPE-P" "CONSED-SEQUENCE"
                "CONSTANT" "CONSTANT-TYPE" "CONSTANT-TYPE-P"
                "CONSTANT-TYPE-TYPE" "CONTAINING-INTEGER-TYPE"
-               "CONTROL-STACK-POINTER-SAP" "COPY-FROM-SYSTEM-AREA"
-               "COPY-TO-SYSTEM-AREA" "COPY-BYTE-VECTOR-TO-SYSTEM-AREA"
+               "CONTROL-STACK-POINTER-SAP" "COPY-BYTE-VECTOR-TO-SYSTEM-AREA"
                "CSUBTYPEP" "CTYPE" "TYPE-HASH-VALUE" "CTYPE-OF"
                "CTYPE-P" "CTYPEP" "CURRENT-FP" "CURRENT-SP"
                "CURRENT-DYNAMIC-SPACE-START" "DATA-VECTOR-REF"
@@ -1389,7 +1390,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "STACK-REF" "STREAM-DESIGNATOR" "STRING-DESIGNATOR"
                "STRUCTURE-RAW-SLOT-TYPE-AND-SIZE" "SUB-GC"
                "SYMBOLS-DESIGNATOR" "%INSTANCE-LENGTH" "%INSTANCE-REF"
-               "%INSTANCE-SET" "SYSTEM-AREA-CLEAR" "SYSTEM-AREA-COPY"
+               "%INSTANCE-SET" "SYSTEM-AREA-CLEAR"
                "TWO-ARG-*" "TWO-ARG-+" "TWO-ARG--" "TWO-ARG-/"
                "TWO-ARG-/=" "TWO-ARG-<" "TWO-ARG-<=" "TWO-ARG-="
                "TWO-ARG->" "TWO-ARG->=" "TWO-ARG-AND" "TWO-ARG-EQV"
@@ -1417,6 +1418,31 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "VECTOR-OF-CHECKED-LENGTH-GIVEN-LENGTH" "WITH-ARRAY-DATA"
                "WRONG-NUMBER-OF-INDICES-ERROR"
 
+               ;; bit bash fillers (FIXME: 32/64-bit issues)
+               "UB1-BASH-FILL" "SYSTEM-AREA-UB1-FILL"
+               "UB2-BASH-FILL" "SYSTEM-AREA-UB2-FILL"
+               "UB4-BASH-FILL" "SYSTEM-AREA-UB4-FILL"
+               "UB8-BASH-FILL" "SYSTEM-AREA-UB8-FILL"
+               "UB16-BASH-FILL" "SYSTEM-AREA-UB16-FILL"
+               "UB32-BASH-FILL" "SYSTEM-AREA-UB32-FILL"
+               "UB64-BASH-FILL" "SYSTEM-AREA-UB64-FILL"
+
+               ;; bit bash copiers (FIXME: 32/64-bit issues)
+               "UB1-BASH-COPY" "SYSTEM-AREA-UB1-COPY"
+               "COPY-UB1-TO-SYSTEM-AREA" "COPY-UB1-FROM-SYSTEM-AREA"
+               "UB2-BASH-COPY" "SYSTEM-AREA-UB2-COPY"
+               "COPY-UB2-TO-SYSTEM-AREA" "COPY-UB2-FROM-SYSTEM-AREA"
+               "UB4-BASH-COPY" "SYSTEM-AREA-UB4-COPY"
+               "COPY-UB4-TO-SYSTEM-AREA" "COPY-UB4-FROM-SYSTEM-AREA"
+               "UB8-BASH-COPY" "SYSTEM-AREA-UB8-COPY"
+               "COPY-UB8-TO-SYSTEM-AREA" "COPY-UB8-FROM-SYSTEM-AREA"
+               "UB16-BASH-COPY" "SYSTEM-AREA-UB16-COPY"
+               "COPY-UB16-TO-SYSTEM-AREA" "COPY-UB16-FROM-SYSTEM-AREA"
+               "UB32-BASH-COPY" "SYSTEM-AREA-UB32-COPY"
+               "COPY-UB32-TO-SYSTEM-AREA" "COPY-UB32-FROM-SYSTEM-AREA"
+               "UB64-BASH-COPY" "SYSTEM-AREA-UB64-COPY"
+               "COPY-UB64-TO-SYSTEM-AREA" "COPY-UB64-FROM-SYSTEM-AREA"
+
                "FDEFN" "MAKE-FDEFN" "FDEFN-P" "FDEFN-NAME" "FDEFN-FUN"
                "FDEFN-MAKUNBOUND" "OUTER-FDEFN"
                "%COERCE-CALLABLE-TO-FUN" "FUN-SUBTYPE"
index fff421e..3b421b8 100644 (file)
            (vector (make-array length :element-type '(unsigned-byte 8))))
       (declare (type (unsigned-byte 8) length)
                (type (simple-array (unsigned-byte 8) (*)) vector))
-      (copy-from-system-area pc (* n-byte-bits 5)
-                             vector (* n-word-bits vector-data-offset)
-                             (* length n-byte-bits))
+      (copy-ub8-from-system-area pc 5 vector 0 length)
       (let* ((index 0)
              (error-number (sb!c:read-var-integer vector index)))
         (collect ((sc-offsets))
index 0ca7ed6..c2fb216 100644 (file)
 
 (in-package "SB!VM")
 \f
-;;;; constants and types
+;;;; types
 
-;;; the number of bits to process at a time
-(defconstant unit-bits n-word-bits)
+(deftype bit-offset () '(integer 0 (#.sb!vm:n-word-bits)))
 
-;;; the maximum number of bits that can be dealt with in a single call
-(defconstant max-bits (ash sb!xc:most-positive-fixnum -2))
-
-(deftype unit ()
-  `(unsigned-byte ,unit-bits))
-
-(deftype offset ()
-  `(integer 0 ,max-bits))
-
-(deftype bit-offset ()
-  `(integer 0 (,unit-bits)))
-
-(deftype bit-count ()
-  `(integer 1 (,unit-bits)))
-
-(deftype word-offset ()
-  `(integer 0 (,(ceiling max-bits unit-bits))))
-\f
 ;;;; support routines
 
 ;;; A particular implementation must offer either VOPs to translate
 ;;; machines this is a left-shift and on little-endian machines this
 ;;; is a right-shift.
 (defun shift-towards-start (number countoid)
-  (declare (type unit number) (fixnum countoid))
-  (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) countoid)))
+  (declare (type sb!vm:word number) (fixnum countoid))
+  (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) countoid)))
     (declare (type bit-offset count))
     (if (zerop count)
        number
        (ecase sb!c:*backend-byte-order*
          (:big-endian
-          (ash (ldb (byte (- unit-bits count) 0) number) count))
+          (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count))
          (:little-endian
           (ash number (- count)))))))
 
@@ -74,8 +55,8 @@
 ;;; removing bits from the "end". On big-endian machines this is a
 ;;; right-shift and on little-endian machines this is a left-shift.
 (defun shift-towards-end (number count)
-  (declare (type unit number) (fixnum count))
-  (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) count)))
+  (declare (type sb!vm:word number) (fixnum count))
+  (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) count)))
     (declare (type bit-offset count))
     (if (zerop count)
        number
@@ -83,9 +64,9 @@
          (:big-endian
           (ash number (- count)))
          (:little-endian
-          (ash (ldb (byte (- unit-bits count) 0) number) count))))))
+          (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count))))))
 
-#!-sb-fluid (declaim (inline start-mask end-mask fix-sap-and-offset))
+#!-sb-fluid (declaim (inline start-mask end-mask))
 
 ;;; Produce a mask that contains 1's for the COUNT "start" bits and
 ;;; 0's for the remaining "end" bits. Only the lower 5 bits of COUNT
@@ -93,7 +74,7 @@
 ;;; on 32-bit word size -- WHN 2001-03-19).
 (defun start-mask (count)
   (declare (fixnum count))
-  (shift-towards-start (1- (ash 1 unit-bits)) (- count)))
+  (shift-towards-start (1- (ash 1 sb!vm:n-word-bits)) (- count)))
 
 ;;; Produce a mask that contains 1's for the COUNT "end" bits and 0's
 ;;; for the remaining "start" bits. Only the lower 5 bits of COUNT are
 ;;; 32-bit word size -- WHN 2001-03-19).
 (defun end-mask (count)
   (declare (fixnum count))
-  (shift-towards-end (1- (ash 1 unit-bits)) (- count)))
-
-;;; Align the SAP to a word boundary, and update the offset accordingly.
-(defun fix-sap-and-offset (sap offset)
-  (declare (type system-area-pointer sap)
-          (type index offset)
-          (values system-area-pointer index))
-  (let ((address (sap-int sap)))
-    (values (int-sap #!-alpha (word-logical-andc2 address
-                                                 sb!vm::fixnum-tag-mask)
-                    #!+alpha (ash (ash address -2) 2))
-           (+ (* (logand address sb!vm::fixnum-tag-mask) n-byte-bits)
-              offset))))
+  (shift-towards-end (1- (ash 1 sb!vm:n-word-bits)) (- count)))
 
 #!-sb-fluid (declaim (inline word-sap-ref %set-word-sap-ref))
 (defun word-sap-ref (sap offset)
           (type index offset)
           (values sb!vm:word)
           (optimize (speed 3) (safety 0) #-sb-xc-host (inhibit-warnings 3)))
-  (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits))))
+  (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits))))
 (defun %set-word-sap-ref (sap offset value)
   (declare (type system-area-pointer sap)
           (type index offset)
           (type sb!vm:word value)
           (values sb!vm:word)
           (optimize (speed 3) (safety 0) (inhibit-warnings 3)))
-  (setf (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits)))
+  (setf (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits)))
        value))
-\f
-;;;; CONSTANT-BIT-BASH
 
-;;; Fill DST with VALUE starting at DST-OFFSET and continuing for
-;;; LENGTH bits.
-#!-sb-fluid (declaim (inline constant-bit-bash))
-(defun constant-bit-bash (dst dst-offset length value dst-ref-fn dst-set-fn)
-  (declare (type offset dst-offset) (type unit value)
-          (type function dst-ref-fn dst-set-fn))
-  (multiple-value-bind (dst-word-offset dst-bit-offset)
-      (floor dst-offset unit-bits)
-    (declare (type word-offset dst-word-offset)
-            (type bit-offset dst-bit-offset))
-    (multiple-value-bind (words final-bits)
-       (floor (+ dst-bit-offset length) unit-bits)
-      (declare (type word-offset words) (type bit-offset final-bits))
-      (if (zerop words)
-         (unless (zerop length)
-           (funcall dst-set-fn dst dst-word-offset
-                    (if (= length unit-bits)
-                        value
-                        (let ((mask (shift-towards-end (start-mask length)
-                                                       dst-bit-offset)))
-                          (declare (type unit mask))
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2
-                            (funcall dst-ref-fn dst dst-word-offset)
-                            mask))))))
-         (let ((interior (floor (- length final-bits) unit-bits)))
-           (unless (zerop dst-bit-offset)
-             (let ((mask (end-mask (- dst-bit-offset))))
-               (declare (type unit mask))
-               (funcall dst-set-fn dst dst-word-offset
-                        (word-logical-or
-                         (word-logical-and value mask)
-                         (word-logical-andc2
-                          (funcall dst-ref-fn dst dst-word-offset)
-                          mask))))
-             (incf dst-word-offset))
-           (dotimes (i interior)
-             (funcall dst-set-fn dst dst-word-offset value)
-             (incf dst-word-offset))
-           (unless (zerop final-bits)
-             (let ((mask (start-mask final-bits)))
-               (declare (type unit mask))
-               (funcall dst-set-fn dst dst-word-offset
-                        (word-logical-or
-                         (word-logical-and value mask)
-                         (word-logical-andc2
-                          (funcall dst-ref-fn dst dst-word-offset)
-                          mask)))))))))
-  (values))
 \f
-;;;; UNARY-BIT-BASH
+;;; the actual bashers and common uses of same
 
-#!-sb-fluid (declaim (inline unary-bit-bash))
-(defun unary-bit-bash (src src-offset dst dst-offset length
-                          dst-ref-fn dst-set-fn src-ref-fn)
-  ;; FIXME: Declaring these bit indices to be of type OFFSET, then
-  ;; using the inline expansion in SPEED 3 SAFETY 0 functions, is not
-  ;; a good thing. At the very least, we should make sure that the
-  ;; type (overflow) checks get done. Better would be to avoid
-  ;; using bit indices, and to use 32-bit unsigneds instead, and/or
-  ;; to call out to things like memmove(3) for big moves.
-  (declare (type offset src-offset dst-offset length)
-          (type function dst-ref-fn dst-set-fn src-ref-fn))
-  (multiple-value-bind (dst-word-offset dst-bit-offset)
-      (floor dst-offset unit-bits)
-    (declare (type word-offset dst-word-offset)
-            (type bit-offset dst-bit-offset))
-    (multiple-value-bind (src-word-offset src-bit-offset)
-       (floor src-offset unit-bits)
-      (declare (type word-offset src-word-offset)
-              (type bit-offset src-bit-offset))
-      (cond
-       ((<= (+ dst-bit-offset length) unit-bits)
-       ;; We are only writing one word, so it doesn't matter what
-       ;; order we do it in. But we might be reading from multiple
-       ;; words, so take care.
-       (cond
-        ((zerop length)
-         ;; Actually, we aren't even writing one word. This is really easy.
-         )
-        ((= length unit-bits)
-         ;; DST-BIT-OFFSET must be equal to zero, or we would be
-         ;; writing multiple words. If SRC-BIT-OFFSET is also zero,
-         ;; then we just transfer the single word. Otherwise we have
-         ;; to extract bits from two src words.
-         (funcall dst-set-fn dst dst-word-offset
-                  (if (zerop src-bit-offset)
-                      (funcall src-ref-fn src src-word-offset)
-                      (word-logical-or
-                       (shift-towards-start
-                        (funcall src-ref-fn src src-word-offset)
-                        src-bit-offset)
-                       (shift-towards-end
-                        (funcall src-ref-fn src (1+ src-word-offset))
-                        (- src-bit-offset))))))
-        (t
-         ;; We are only writing some portion of the dst word, so we
-         ;; need to preserve the extra bits. Also, we still don't
-         ;; know whether we need one or two source words.
-         (let ((mask (shift-towards-end (start-mask length) dst-bit-offset))
-               (orig (funcall dst-ref-fn dst dst-word-offset))
-               (value
-                (if (> src-bit-offset dst-bit-offset)
-                    ;; The source starts further into the word than
-                    ;; does the dst, so the source could extend into
-                    ;; the next word. If it does, we have to merge
-                    ;; the two words, and if not, we can just shift
-                    ;; the first word.
-                    (let ((src-bit-shift (- src-bit-offset dst-bit-offset)))
-                      (if (> (+ src-bit-offset length) unit-bits)
-                          (word-logical-or
-                           (shift-towards-start
-                            (funcall src-ref-fn src src-word-offset)
-                            src-bit-shift)
-                           (shift-towards-end
-                            (funcall src-ref-fn src (1+ src-word-offset))
-                            (- src-bit-shift)))
-                          (shift-towards-start
-                           (funcall src-ref-fn src src-word-offset)
-                           src-bit-shift)))
-                    ;; The dst starts further into the word than does
-                    ;; the source, so we know the source can not
-                    ;; extend into a second word (or else the dst
-                    ;; would too, and we wouldn't be in this branch.
-                    (shift-towards-end
-                     (funcall src-ref-fn src src-word-offset)
-                     (- dst-bit-offset src-bit-offset)))))
-           (declare (type unit mask orig value))
-           ;; Replace the dst word.
-           (funcall dst-set-fn dst dst-word-offset
-                    (word-logical-or
-                     (word-logical-and value mask)
-                     (word-logical-andc2 orig mask)))))))
-       ((= src-bit-offset dst-bit-offset)
-       ;; The source and dst are aligned, so we don't need to shift
-       ;; anything. But we have to pick the direction of the loop in
-       ;; case the source and dst are really the same thing.
-       (multiple-value-bind (words final-bits)
-           (floor (+ dst-bit-offset length) unit-bits)
-         (declare (type word-offset words) (type bit-offset final-bits))
-         (let ((interior (floor (- length final-bits) unit-bits)))
-           (declare (type word-offset interior))
-           (cond
-            ((<= dst-offset src-offset)
-             ;; We need to loop from left to right
-             (unless (zerop dst-bit-offset)
-               ;; We are only writing part of the first word, so mask
-               ;; off the bits we want to preserve.
-               (let ((mask (end-mask (- dst-bit-offset)))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or (word-logical-and value mask)
-                                            (word-logical-andc2 orig mask))))
-               (incf src-word-offset)
-               (incf dst-word-offset))
-             ;; Just copy the interior words.
-             (dotimes (i interior)
-               (funcall dst-set-fn dst dst-word-offset
-                        (funcall src-ref-fn src src-word-offset))
-               (incf src-word-offset)
-               (incf dst-word-offset))
-             (unless (zerop final-bits)
-               ;; We are only writing part of the last word.
-               (let ((mask (start-mask final-bits))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask))))))
-            (t
-             ;; We need to loop from right to left.
-             (incf dst-word-offset words)
-             (incf src-word-offset words)
-             (unless (zerop final-bits)
-               (let ((mask (start-mask final-bits))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask)))))
-             (dotimes (i interior)
-               (decf src-word-offset)
-               (decf dst-word-offset)
-               (funcall dst-set-fn dst dst-word-offset
-                        (funcall src-ref-fn src src-word-offset)))
-             (unless (zerop dst-bit-offset)
-               (decf src-word-offset)
-               (decf dst-word-offset)
-               (let ((mask (end-mask (- dst-bit-offset)))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask))))))))))
-       (t
-       ;; They aren't aligned.
-       (multiple-value-bind (words final-bits)
-           (floor (+ dst-bit-offset length) unit-bits)
-         (declare (type word-offset words) (type bit-offset final-bits))
-         (let ((src-shift (mod (- src-bit-offset dst-bit-offset) unit-bits))
-               (interior (floor (- length final-bits) unit-bits)))
-           (declare (type bit-offset src-shift)
-                    (type word-offset interior))
-           (cond
-            ((<= dst-offset src-offset)
-             ;; We need to loop from left to right
-             (let ((prev 0)
-                   (next (funcall src-ref-fn src src-word-offset)))
-               (declare (type unit prev next))
-               (flet ((get-next-src ()
-                        (setf prev next)
-                        (setf next (funcall src-ref-fn src
-                                            (incf src-word-offset)))))
-                 (declare (inline get-next-src))
-                 (unless (zerop dst-bit-offset)
-                   (when (> src-bit-offset dst-bit-offset)
-                     (get-next-src))
-                   (let ((mask (end-mask (- dst-bit-offset)))
-                         (orig (funcall dst-ref-fn dst dst-word-offset))
-                         (value (word-logical-or
-                                 (shift-towards-start prev src-shift)
-                                 (shift-towards-end next (- src-shift)))))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))
-                     (incf dst-word-offset)))
-                 (dotimes (i interior)
-                   (get-next-src)
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift))))
-                     (declare (type unit value))
-                     (funcall dst-set-fn dst dst-word-offset value)
-                     (incf dst-word-offset)))
-                 (unless (zerop final-bits)
-                   (let ((value
-                          (if (> (+ final-bits src-shift) unit-bits)
-                              (progn
-                                (get-next-src)
-                                (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift)))
-                              (shift-towards-start next src-shift)))
-                         (mask (start-mask final-bits))
-                         (orig (funcall dst-ref-fn dst dst-word-offset)))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask))))))))
-            (t
-             ;; We need to loop from right to left.
-             (incf dst-word-offset words)
-             (incf src-word-offset
-                   (1- (ceiling (+ src-bit-offset length) unit-bits)))
-             (let ((next 0)
-                   (prev (funcall src-ref-fn src src-word-offset)))
-               (declare (type unit prev next))
-               (flet ((get-next-src ()
-                        (setf next prev)
-                        (setf prev (funcall src-ref-fn src
-                                            (decf src-word-offset)))))
-                 (declare (inline get-next-src))
-                 (unless (zerop final-bits)
-                   (when (> final-bits (- unit-bits src-shift))
-                     (get-next-src))
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift)))
-                         (mask (start-mask final-bits))
-                         (orig (funcall dst-ref-fn dst dst-word-offset)))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))))
-                 (decf dst-word-offset)
-                 (dotimes (i interior)
-                   (get-next-src)
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift))))
-                     (declare (type unit value))
-                     (funcall dst-set-fn dst dst-word-offset value)
-                     (decf dst-word-offset)))
-                 (unless (zerop dst-bit-offset)
-                   (if (> src-bit-offset dst-bit-offset)
-                       (get-next-src)
-                       (setf next prev prev 0))
-                   (let ((mask (end-mask (- dst-bit-offset)))
-                         (orig (funcall dst-ref-fn dst dst-word-offset))
-                         (value (word-logical-or
-                                 (shift-towards-start prev src-shift)
-                                 (shift-towards-end next (- src-shift)))))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))))))))))))))
-  (values))
-\f
-;;;; the actual bashers
+;;; This is a little ugly.  Fixing bug 188 would bring the ability to
+;;; wrap a MACROLET or something similar around this whole thing would
+;;; make things significantly less ugly.  --njf, 2005-02-23
+(eval-when (:compile-toplevel :load-toplevel :execute)
 
-(defun bit-bash-fill (value dst dst-offset length)
-  (declare (type unit value) (type offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (constant-bit-bash dst dst-offset length value
-                     #'%raw-bits #'%set-raw-bits)))
+;;; Align the SAP to a word boundary, and update the offset accordingly.
+(defmacro !define-sap-fixer (bitsize)
+  (let ((name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize))))
+    `(progn
+      (declaim (inline ,name))
+      (defun ,name (sap offset)
+        (declare (type system-area-pointer sap)
+                 (type index offset)
+                 (values system-area-pointer index))
+        (let ((address (sap-int sap)))
+          (values (int-sap #!-alpha (word-logical-andc2 address
+                                                        sb!vm:fixnum-tag-mask)
+                           #!+alpha (ash (ash address -2) 2))
+                  (+ ,(ecase bitsize
+                       (1 '(* (logand address sb!vm:fixnum-tag-mask) n-byte-bits))
+                       (2 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 2)))
+                       (4 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 4)))
+                       ((8 16 32 64) '(logand address sb!vm:fixnum-tag-mask)))
+                     offset)))))))
+
+(defmacro !define-byte-bashers (bitsize)
+  (let* ((bytes-per-word (/ n-word-bits bitsize))
+         (byte-offset `(integer 0 (,bytes-per-word)))
+         (byte-count `(integer 1 (,bytes-per-word)))
+         (max-bytes (ash most-positive-fixnum
+                         ;; FIXME: this reflects code contained in the
+                         ;; original bit-bash.lisp, but seems very
+                         ;; nonsensical.  Why shouldn't we be able to
+                         ;; handle M-P-FIXNUM bits?  And if we can't,
+                         ;; are these other shift amounts bogus, too?
+                         (ecase bitsize
+                           (1 -2)
+                           (2 -1)
+                           (4  0)
+                           (8  0)
+                           (16 0)
+                           (32 0))))
+         (offset `(integer 0 ,max-bytes))
+         (max-word-offset (ceiling max-bytes bytes-per-word))
+         (word-offset `(integer 0 ,max-word-offset))
+         (fix-sap-and-offset-name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize)))
+         (constant-bash-name (intern (format nil "CONSTANT-UB~A-BASH" bitsize) (find-package "SB!KERNEL")))
+         (array-fill-name (intern (format nil "UB~A-BASH-FILL" bitsize) (find-package "SB!KERNEL")))
+         (system-area-fill-name (intern (format nil "SYSTEM-AREA-UB~A-FILL" bitsize) (find-package "SB!KERNEL")))
+         (unary-bash-name (intern (format nil "UNARY-UB~A-BASH" bitsize) (find-package "SB!KERNEL")))
+         (array-copy-name (intern (format nil "UB~A-BASH-COPY" bitsize) (find-package "SB!KERNEL")))
+         (system-area-copy-name (intern (format nil "SYSTEM-AREA-UB~A-COPY" bitsize) (find-package "SB!KERNEL")))
+         (array-copy-to-system-area-name
+          (intern (format nil "COPY-UB~A-TO-SYSTEM-AREA" bitsize) (find-package "SB!KERNEL")))
+         (system-area-copy-to-array-name
+          (intern (format nil "COPY-UB~A-FROM-SYSTEM-AREA" bitsize)
+                  (find-package "SB!KERNEL"))))
+    `(progn
+      (declaim (inline ,constant-bash-name ,unary-bash-name))
+      ;; Fill DST with VALUE starting at DST-OFFSET and continuing
+      ;; for LENGTH bytes (however bytes are defined).
+      (defun ,constant-bash-name (dst dst-offset length value
+                                      dst-ref-fn dst-set-fn)
+        (declare (type word value) (type index dst-offset length))
+        (declare (ignorable dst-ref-fn))
+        (multiple-value-bind (dst-word-offset dst-byte-offset)
+            (floor dst-offset ,bytes-per-word)
+          (declare (type ,word-offset dst-word-offset)
+                   (type ,byte-offset dst-byte-offset))
+          (multiple-value-bind (n-words final-bytes)
+              (floor (+ dst-byte-offset length) ,bytes-per-word)
+            (declare (type ,word-offset n-words)
+                     (type ,byte-offset final-bytes))
+            (if (zerop n-words)
+                ,(unless (= bytes-per-word 1)
+                  `(unless (zerop length)
+                    (locally (declare (type ,byte-count length))
+                      (funcall dst-set-fn dst dst-word-offset
+                               (if (= length ,bytes-per-word)
+                                   value
+                                   (let ((mask (shift-towards-end
+                                                (start-mask (* length ,bitsize))
+                                                (* dst-byte-offset ,bitsize))))
+                                     (word-logical-or (word-logical-and value mask)
+                                                      (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                          mask))))))))
+                (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
+                  ,@(unless (= bytes-per-word 1)
+                     `((unless (zerop dst-byte-offset)
+                         (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))))
+                           (funcall dst-set-fn dst dst-word-offset
+                                    (word-logical-or (word-logical-and value mask)
+                                                     (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                         mask))))
+                         (incf dst-word-offset))))
+                  (dotimes (i interior)
+                    (funcall dst-set-fn dst dst-word-offset value)
+                    (incf dst-word-offset))
+                  ,@(unless (= bytes-per-word 1)
+                     `((unless (zerop final-bytes)
+                         (let ((mask (start-mask (* final-bytes ,bitsize))))
+                           (funcall dst-set-fn dst dst-word-offset
+                                    (word-logical-or (word-logical-and value mask)
+                                                     (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                         mask)))))))))))
+        (values))
 
-(defun system-area-fill (value dst dst-offset length)
-  (declare (type unit value) (type offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-     (constant-bit-bash dst dst-offset length value
-                       #'word-sap-ref #'%set-word-sap-ref))))
+      ;; common uses for constant-byte-bashing
+      (defun ,array-fill-name (value dst dst-offset length)
+        (declare (type word value) (type ,offset dst-offset length))
+        (declare (optimize (speed 3) (safety 1)))
+        (,constant-bash-name dst dst-offset length value
+                             #'%vector-raw-bits #'%set-vector-raw-bits))
+      (defun ,system-area-fill-name (value dst dst-offset length)
+        (declare (type word value) (type ,offset dst-offset length))
+        (declare (optimize (speed 3) (safety 1)))
+        (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset)
+          (,constant-bash-name dst dst-offset length value
+                               #'word-sap-ref #'%set-word-sap-ref)))
 
-(defun bit-bash-copy (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0))
-           (inline unary-bit-bash))
-   (unary-bit-bash src src-offset dst dst-offset length
-                  #'%raw-bits #'%set-raw-bits #'%raw-bits)))
+         ;; unary byte bashing (copying)
+         (defun ,unary-bash-name (src src-offset dst dst-offset length
+                                      dst-ref-fn dst-set-fn src-ref-fn)
+           (declare (type index src-offset dst-offset length)
+                    (type function dst-ref-fn dst-set-fn src-ref-fn)
+                    (ignorable dst-ref-fn))
+           (multiple-value-bind (dst-word-offset dst-byte-offset)
+               (floor dst-offset ,bytes-per-word)
+             (declare (type ,word-offset dst-word-offset)
+                      (type ,byte-offset dst-byte-offset))
+             (multiple-value-bind (src-word-offset src-byte-offset)
+                 (floor src-offset ,bytes-per-word)
+               (declare (type ,word-offset src-word-offset)
+                        (type ,byte-offset src-byte-offset))
+               (cond
+                 ((<= (+ dst-byte-offset length) ,bytes-per-word)
+                  ;; We are only writing one word, so it doesn't matter what
+                  ;; order we do it in.  But we might be reading from
+                  ;; multiple words, so take care.
+                  (cond
+                    ((zerop length)
+                     ;; We're not writing anything.  This is really easy.
+                     )
+                    ((= length ,bytes-per-word)
+                     ;; DST-BYTE-OFFSET must be equal to zero, or we would be
+                     ;; writing multiple words.  If SRC-BYTE-OFFSET is also zero,
+                     ;; the we just transfer the single word.  Otherwise we have
+                     ;; to extract bytes from two source words.
+                     (funcall dst-set-fn dst dst-word-offset
+                             (cond
+                               ((zerop src-byte-offset)
+                                (funcall src-ref-fn src src-word-offset))
+                               ,@(unless (= bytes-per-word 1)
+                                  `((t (word-logical-or (shift-towards-start
+                                                         (funcall src-ref-fn src src-word-offset)
+                                                         (* src-byte-offset ,bitsize))
+                                        (shift-towards-end
+                                          (funcall src-ref-fn src (1+ src-word-offset))
+                                          (* (- src-byte-offset) ,bitsize)))))))))
+                    ,@(unless (= bytes-per-word 1)
+                       `((t
+                          ;; We are only writing some portion of the destination word.
+                          ;; We still don't know whether we need one or two source words.
+                          (locally (declare (type ,byte-count length))
+                            (let ((mask (shift-towards-end (start-mask (* length ,bitsize))
+                                                           (* dst-byte-offset ,bitsize)))
+                                  (orig (funcall dst-ref-fn dst dst-word-offset))
+                                  (value (if (> src-byte-offset dst-byte-offset)
+                                             ;; The source starts further
+                                             ;; into the word than does the
+                                             ;; destination, so the source
+                                             ;; could extend into the next
+                                             ;; word.  If it does, we have
+                                             ;; to merge the two words, and
+                                             ;; it not, we can just shift
+                                             ;; the first word.
+                                             (let ((src-byte-shift (- src-byte-offset
+                                                                      dst-byte-offset)))
+                                               (if (> (+ src-byte-offset length) ,bytes-per-word)
+                                                   (word-logical-or
+                                                    (shift-towards-start
+                                                     (funcall src-ref-fn src src-word-offset)
+                                                     (* src-byte-shift ,bitsize))
+                                                    (shift-towards-end
+                                                     (funcall src-ref-fn src (1+ src-word-offset))
+                                                     (* (- src-byte-shift) ,bitsize)))
+                                                   (shift-towards-start (funcall src-ref-fn src src-word-offset)
+                                                                        (* src-byte-shift ,bitsize))))
+                                             ;; The destination starts further
+                                             ;; into the word than does the
+                                             ;; source, so we know the source
+                                             ;; cannot extend into a second
+                                             ;; word (or else the destination
+                                             ;; would too, and we wouldn't be
+                                             ;; in this branch).
+                                             (shift-towards-end
+                                              (funcall src-ref-fn src src-word-offset)
+                                              (* (- dst-byte-offset src-byte-offset) ,bitsize)))))
+                              (declare (type word mask orig value))
+                              (funcall dst-set-fn dst dst-word-offset
+                                       (word-logical-or (word-logical-and value mask)
+                                                        (word-logical-andc2 orig mask))))))))))
+                 ((= src-byte-offset dst-byte-offset)
+                  ;; The source and destination are aligned, so shifting
+                  ;; is unnecessary.  But we have to pick the direction
+                  ;; of the copy in case the source and destination are
+                  ;; really the same object.
+                  (multiple-value-bind (words final-bytes)
+                      (floor (+ dst-byte-offset length) ,bytes-per-word)
+                    (declare (type ,word-offset words)
+                             (type ,byte-offset final-bytes))
+                    (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
+                      (declare (type ,word-offset interior))
+                      (cond
+                        ((<= dst-offset src-offset)
+                         ;; We need to loop from left to right.
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop dst-byte-offset)
+                                ;; We are only writing part of the first word, so mask
+                                ;; off the bytes we want to preserve.
+                                (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))
+                                (incf src-word-offset)
+                                (incf dst-word-offset))))
+                         ;; Copy the interior words.
+                         (dotimes (i interior)
+                           (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset))
+                           (incf src-word-offset)
+                           (incf dst-word-offset))
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop final-bytes)
+                                ;; We are only writing part of the last word.
+                                (let ((mask (start-mask (* final-bytes ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))))))
+                        (t
+                         ;; We need to loop from right to left.
+                         (incf dst-word-offset words)
+                         (incf src-word-offset words)
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop final-bytes)
+                                (let ((mask (start-mask (* final-bytes ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask)))))))
+                         (dotimes (i interior)
+                           (decf src-word-offset)
+                           (decf dst-word-offset)
+                           (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset)))
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop dst-byte-offset)
+                                ;; We are only writing part of the last word.
+                                (decf src-word-offset)
+                                (decf dst-word-offset)
+                                (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))))))))))
+                 (t
+                  ;; Source and destination are not aligned.
+                  (multiple-value-bind (words final-bytes)
+                      (floor (+ dst-byte-offset length) ,bytes-per-word)
+                    (declare (type ,word-offset words)
+                             (type ,byte-offset final-bytes))
+                    (let ((src-shift (mod (- src-byte-offset dst-byte-offset)
+                                          ,bytes-per-word))
+                          (interior (floor (- length final-bytes) ,bytes-per-word)))
+                      (declare (type ,word-offset interior)
+                               (type ,byte-offset src-shift))
+                      (cond
+                        ((<= dst-offset src-offset)
+                         ;; We need to loop from left to right.
+                         (let ((prev 0)
+                               (next (funcall src-ref-fn src src-word-offset)))
+                           (declare (type word prev next))
+                           (flet ((get-next-src ()
+                                    (setf prev next)
+                                    (setf next (funcall src-ref-fn src (incf src-word-offset)))))
+                             (declare (inline get-next-src))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop dst-byte-offset)
+                                    (when (> src-byte-offset dst-byte-offset)
+                                      (get-next-src))
+                                    (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset))
+                                          (value (word-logical-or (shift-towards-start prev (* src-shift ,bitsize))
+                                                                  (shift-towards-end next (* (- src-shift) ,bitsize)))))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask))))
+                                    (incf dst-word-offset))))
+                             (dotimes (i interior)
+                               (get-next-src)
+                               (let ((value (word-logical-or
+                                             (shift-towards-end next (* (- src-shift) ,bitsize))
+                                             (shift-towards-start prev (* src-shift ,bitsize)))))
+                                 (declare (type word value))
+                                 (funcall dst-set-fn dst dst-word-offset value)
+                                 (incf dst-word-offset)))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop final-bytes)
+                                    (let ((value
+                                           (if (> (+ final-bytes src-shift) ,bytes-per-word)
+                                               (progn
+                                                 (get-next-src)
+                                                 (word-logical-or
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize))
+                                                  (shift-towards-start prev (* src-shift ,bitsize))))
+                                               (shift-towards-start next (* src-shift ,bitsize))))
+                                          (mask (start-mask (* final-bytes ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset)))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask))))))))))
+                        (t
+                         ;; We need to loop from right to left.
+                         (incf dst-word-offset words)
+                         (incf src-word-offset
+                               (1- (ceiling (+ src-byte-offset length) ,bytes-per-word)))
+                         (let ((next 0)
+                               (prev (funcall src-ref-fn src src-word-offset)))
+                           (declare (type word prev next))
+                           (flet ((get-next-src ()
+                                    (setf next prev)
+                                    (setf prev (funcall src-ref-fn src (decf src-word-offset)))))
+                             (declare (inline get-next-src))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop final-bytes)
+                                    (when (> final-bytes (- ,bytes-per-word src-shift))
+                                      (get-next-src))
+                                    (let ((value (word-logical-or
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize))
+                                                  (shift-towards-start prev (* src-shift ,bitsize))))
+                                          (mask (start-mask (* final-bytes ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset)))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask)))))))
+                             (decf dst-word-offset)
+                             (dotimes (i interior)
+                               (get-next-src)
+                               (let ((value (word-logical-or
+                                             (shift-towards-end next (* (- src-shift) ,bitsize))
+                                             (shift-towards-start prev (* src-shift ,bitsize)))))
+                                 (declare (type word value))
+                                 (funcall dst-set-fn dst dst-word-offset value)
+                                 (decf dst-word-offset)))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop dst-byte-offset)
+                                    (if (> src-byte-offset dst-byte-offset)
+                                        (get-next-src)
+                                        (setf next prev prev 0))
+                                    (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset))
+                                          (value (word-logical-or
+                                                  (shift-towards-start prev (* src-shift ,bitsize))
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize)))))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                              (word-logical-or (word-logical-and value mask)
+                                                               (word-logical-andc2 orig mask)))))))))))))))))
+           (values))
 
-(defun system-area-copy (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset)
-     (declare (type system-area-pointer src))
-     (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-       (declare (type system-area-pointer dst))
-       (unary-bit-bash src src-offset dst dst-offset length
-                      #'word-sap-ref #'%set-word-sap-ref
-                      #'word-sap-ref)))))
+         ;; common uses for unary-byte-bashing
+         (defun ,array-copy-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (,unary-bash-name src src-offset dst dst-offset length
+                               #'%vector-raw-bits
+                               #'%set-vector-raw-bits
+                               #'%vector-raw-bits)))
 
-(defun copy-to-system-area (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-     (unary-bit-bash src src-offset dst dst-offset length
-                    #'word-sap-ref #'%set-word-sap-ref #'%raw-bits))))
+         (defun ,system-area-copy-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset)
+               (declare (type sb!sys:system-area-pointer src))
+               (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset)
+                 (declare (type sb!sys:system-area-pointer dst))
+                 (,unary-bash-name src src-offset dst dst-offset length
+                                   #'word-sap-ref #'%set-word-sap-ref
+                                   #'word-sap-ref)))))
 
-(defun copy-from-system-area (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset)
-     (unary-bit-bash src src-offset dst dst-offset length
-                    #'%raw-bits #'%set-raw-bits #'word-sap-ref))))
+         (defun ,array-copy-to-system-area-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name  dst dst-offset)
+               (,unary-bash-name src src-offset dst dst-offset length
+                                 #'word-sap-ref #'%set-word-sap-ref
+                                 #'%vector-raw-bits))))
 
+         (defun ,system-area-copy-to-array-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset)
+               (,unary-bash-name src src-offset dst dst-offset length
+                                 #'%vector-raw-bits
+                                 #'%set-vector-raw-bits
+                                 #'word-sap-ref)))))))
+) ; EVAL-WHEN
+
+;;; We would normally do this with a MACROLET, but then we run into
+;;; problems with the lexical environment being too hairy for the
+;;; cross-compiler and it cannot inline the basic basher functions.
+#.(loop for i = 1 then (* i 2)
+        collect `(!define-sap-fixer ,i) into fixers
+        collect `(!define-byte-bashers ,i) into bashers
+        until (= i sb!vm:n-word-bits)
+        ;; FIXERS must come first so their inline expansions are available
+        ;; for the bashers.
+        finally (return `(progn ,@fixers ,@bashers)))
+\f
 ;;; a common idiom for calling COPY-TO-SYSTEM-AREA
 ;;;
 ;;; Copy the entire contents of the vector V to memory starting at SAP+OFFSET.
   (declare (type (simple-array (unsigned-byte 8) 1) bv))
   (declare (type system-area-pointer sap))
   (declare (type fixnum offset))
-  ;; FIXME: Actually it looks as though this, and most other calls to
-  ;; COPY-TO-SYSTEM-AREA, could be written more concisely with
-  ;; %BYTE-BLT. Except that the DST-END-DST-START convention for the
-  ;; length is confusing. Perhaps I could rename %BYTE-BLT to
-  ;; %BYTE-BLIT (and correspondingly rename the corresponding VOP) and
-  ;; replace the DST-END argument with an N-BYTES argument?
-  (copy-to-system-area bv
-                      (* vector-data-offset n-word-bits)
-                      sap
-                      offset
-                      (* (length bv) n-byte-bits)))
+  (copy-ub8-to-system-area bv 0 sap offset (length bv)))
index cf5a506..a961e70 100644 (file)
@@ -3286,7 +3286,7 @@ register."
        (setf (code-header-ref code-object (1+ real-lra-slot)) offset))
      (setf (code-header-ref code-object known-return-p-slot)
           known-return-p)
-     (system-area-copy src-start 0 dst-start 0 (* length sb!vm:n-byte-bits))
+     (system-area-ub8-copy src-start 0 dst-start 0 length)
      (sb!vm:sanctify-for-execution code-object)
      #!+(or x86 x86-64)
      (values dst-start code-object (sap- trap-loc src-start))
index 002e818..000e8e6 100644 (file)
 #-sb-xc-host (defsetf sbit %sbitset)
 (defsetf %array-dimension %set-array-dimension)
 (defsetf sb!kernel:%raw-bits sb!kernel:%set-raw-bits)
+(defsetf sb!kernel:%vector-raw-bits sb!kernel:%set-vector-raw-bits)
 #-sb-xc-host (defsetf symbol-value set)
 #-sb-xc-host (defsetf symbol-plist %set-symbol-plist)
 #-sb-xc-host (defsetf nth %setnth)
index 089b759..152acf2 100644 (file)
            ((zerop bytes)) ; easy case
            ((<= bytes space)
             (if (system-area-pointer-p thing)
-                (system-area-copy thing
-                                  (* start sb!vm:n-byte-bits)
-                                  (fd-stream-obuf-sap fd-stream)
-                                  (* tail sb!vm:n-byte-bits)
-                                  (* bytes sb!vm:n-byte-bits))
+                (system-area-ub8-copy thing start
+                                       (fd-stream-obuf-sap fd-stream)
+                                       tail
+                                       bytes)
                 ;; FIXME: There should be some type checking somewhere to
                 ;; verify that THING here is a vector, not just <not a SAP>.
-                (copy-to-system-area thing
-                                     (+ (* start sb!vm:n-byte-bits)
-                                        (* sb!vm:vector-data-offset
-                                           sb!vm:n-word-bits))
-                                     (fd-stream-obuf-sap fd-stream)
-                                     (* tail sb!vm:n-byte-bits)
-                                     (* bytes sb!vm:n-byte-bits)))
+                (copy-ub8-to-system-area thing start
+                                          (fd-stream-obuf-sap fd-stream)
+                                          tail
+                                          bytes))
             (setf (fd-stream-obuf-tail fd-stream) newtail))
            ((<= bytes len)
             (flush-output-buffer fd-stream)
             (if (system-area-pointer-p thing)
-                (system-area-copy thing
-                                  (* start sb!vm:n-byte-bits)
-                                  (fd-stream-obuf-sap fd-stream)
-                                  0
-                                  (* bytes sb!vm:n-byte-bits))
+                (system-area-ub8-copy thing
+                                       start
+                                       (fd-stream-obuf-sap fd-stream)
+                                       0
+                                       bytes)
                 ;; FIXME: There should be some type checking somewhere to
                 ;; verify that THING here is a vector, not just <not a SAP>.
-                (copy-to-system-area thing
-                                     (+ (* start sb!vm:n-byte-bits)
-                                        (* sb!vm:vector-data-offset
-                                           sb!vm:n-word-bits))
-                                     (fd-stream-obuf-sap fd-stream)
-                                     0
-                                     (* bytes sb!vm:n-byte-bits)))
+                (copy-ub8-to-system-area thing
+                                          start
+                                          (fd-stream-obuf-sap fd-stream)
+                                          0
+                                          bytes))
             (setf (fd-stream-obuf-tail fd-stream) bytes))
            (t
             (flush-output-buffer fd-stream)
             (setf (fd-stream-ibuf-tail stream) 0))
            (t
             (decf tail head)
-            (system-area-copy ibuf-sap (* head sb!vm:n-byte-bits)
-                              ibuf-sap 0 (* tail sb!vm:n-byte-bits))
+            (system-area-ub8-copy ibuf-sap head
+                                   ibuf-sap 0 tail)
             (setf head 0)
             (setf (fd-stream-ibuf-head stream) 0)
             (setf (fd-stream-ibuf-tail stream) tail))))
   (declare (type index start end))
   (let* ((length (- end start))
         (string (make-string length)))
-    (copy-from-system-area sap (* start sb!vm:n-byte-bits)
-                          string (* sb!vm:vector-data-offset
-                                    sb!vm:n-word-bits)
-                          (* length sb!vm:n-byte-bits))
+    (copy-ub8-from-system-area sap start
+                               string 0
+                               length)
     string))
 
 ;;; the N-BIN method for FD-STREAMs
index e1c2488..9c343d5 100644 (file)
   `(sap+ ,sap (/ ,offset sb!vm:n-byte-bits)))
 
 (define-alien-type-method (mem-block :deposit-gen) (type sap offset value)
-  (let ((bits (alien-mem-block-type-bits type)))
-    (unless bits
+  (let ((bytes (truncate (alien-mem-block-type-bits type) sb!vm:n-byte-bits)))
+    (unless bytes
       (error "can't deposit aliens of type ~S (unknown size)" type))
-    `(sb!kernel:system-area-copy ,value 0 ,sap ,offset ',bits)))
+    `(sb!kernel:system-area-ub8-copy ,value 0 ,sap ,offset ',bytes)))
 \f
 ;;;; the ARRAY type
 
index 23a161f..62a2b5c 100644 (file)
           (vector (make-array length :element-type '(unsigned-byte 8))))
       (declare (type (unsigned-byte 8) length)
               (type (simple-array (unsigned-byte 8) (*)) vector))
-      (copy-from-system-area pc (* n-byte-bits 5)
-                            vector (* n-word-bits
-                                      vector-data-offset)
-                            (* length n-byte-bits))
+      (copy-ub8-from-system-area pc 5 vector 0 length)
       (let* ((index 0)
             (error-number (sb!c:read-var-integer vector index)))
        (collect ((sc-offsets))
index cfcf76b..1311ee3 100644 (file)
 
 (defun %set-raw-bits (object offset value)
   (declare (type index offset))
-  (declare (type (unsigned-byte #.sb!vm:n-word-bits) value))
+  (declare (type sb!vm:word value))
   (setf (sb!kernel:%raw-bits object offset) value))
 
+(defun %vector-raw-bits (object offset)
+  (declare (type index offset))
+  (sb!kernel:%vector-raw-bits object offset))
+
+(defun %set-vector-raw-bits (object offset value)
+  (declare (type index offset))
+  (declare (type sb!vm:word value))
+  (setf (sb!kernel:%vector-raw-bits object offset) value))
+
 (defun make-single-float (x) (make-single-float x))
 (defun make-double-float (hi lo) (make-double-float hi lo))
 
index 264e871..d9f94e3 100644 (file)
       (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..")
       (/hexstr length)
       (/hexstr vector)
-      (copy-from-system-area pc (* n-byte-bits 5)
-                            vector (* n-word-bits
-                                      vector-data-offset)
-                            (* length n-byte-bits))
+      (copy-ub8-from-system-area pc 5 vector 0 length)
       (let* ((index 0)
             (error-number (sb!c:read-var-integer vector index)))
        (/hexstr error-number)
index 8788587..79ddf31 100644 (file)
     (declare (type system-area-pointer pc)
             (type (unsigned-byte 8) length)
             (type (simple-array (unsigned-byte 8) (*)) vector))
-    (copy-from-system-area pc (* sb!vm:n-byte-bits 5)
-                          vector (* sb!vm:n-word-bits
-                                    sb!vm:vector-data-offset)
-                          (* length sb!vm:n-byte-bits))
+    (copy-ub8-from-system-area pc 5 vector 0 length)
     (let* ((index 0)
           (error-number (sb!c:read-var-integer vector index)))
       (collect ((sc-offsets))
index 0808fdd..25c31c2 100644 (file)
        (declare (simple-string s))
        (let ((n (length s)))
          ;; Blast the string into place.
-         (sb-kernel:copy-to-system-area (the simple-base-string
-                                           ;; FIXME
-                                           (coerce s 'simple-base-string))
-                                        (* sb-vm:vector-data-offset
-                                           sb-vm:n-word-bits)
-                                        string-sap 0
-                                        (* (1+ n) sb-vm:n-byte-bits))
+         (sb-kernel:copy-ub8-to-system-area (the simple-base-string
+                                               ;; FIXME
+                                               (coerce s 'simple-base-string))
+                                             0
+                                             string-sap 0
+                                             (1+ n))
          ;; Blast the pointer to the string into place.
          (setf (sap-ref-sap vec-sap i) string-sap)
          (setf string-sap (sap+ string-sap (round-bytes-to-words (1+ n))))
                                      ~2I~_~A~:>"
                                 (strerror errno)))
                               (t
-                               (sb-kernel:copy-from-system-area
+                               (sb-kernel:copy-ub8-from-system-area
                                 (alien-sap buf) 0
-                                string (* sb-vm:vector-data-offset
-                                          sb-vm:n-word-bits)
-                                (* count sb-vm:n-byte-bits))
+                                string 0
+                                 count)
                                (write-string string stream
                                              :end count)))))))))))
 
index 1ef6009..16c1e81 100644 (file)
     (declare (type system-area-pointer pc)
             (type (unsigned-byte 8) length)
             (type (simple-array (unsigned-byte 8) (*)) vector))
-    (copy-from-system-area pc (* n-byte-bits 5)
-                          vector (* n-word-bits
-                                    vector-data-offset)
-                          (* length n-byte-bits))
+    (copy-ub8-from-system-area pc 5 vector 0 length)
     (let* ((index 0)
           (error-number (sb!c:read-var-integer vector index)))
       (collect ((sc-offsets))
index 869a0cc..4611a72 100644 (file)
               numbytes
               eof-error-p))
      ((<= numbytes num-buffered)
+      #+nil
+      (let ((copy-function (typecase buffer
+                             ((simple-array * (*)) #'ub8-bash-copy)
+                             (system-area-pointer #'copy-ub8-to-system-area))))
+        (funcall copy-function in-buffer index buffer start numbytes))
       (%byte-blt in-buffer index
                 buffer start (+ start numbytes))
       (setf (ansi-stream-in-index stream) (+ index numbytes))
       numbytes)
      (t
       (let ((end (+ start num-buffered)))
-       (%byte-blt in-buffer index buffer start end)
+       #+nil
+        (let ((copy-function (typecase buffer
+                             ((simple-array * (*)) #'ub8-bash-copy)
+                             (system-area-pointer #'copy-ub8-to-system-area))))
+          (funcall copy-function in-buffer index buffer start num-buffered))
+        (%byte-blt in-buffer index buffer start end)
        (setf (ansi-stream-in-index stream) +ansi-stream-in-buffer-length+)
        (+ (funcall (ansi-stream-n-bin stream)
                    stream
                          (- +ansi-stream-in-buffer-length+
                             +ansi-stream-in-buffer-extra+)
                          nil))
-         (start (- +ansi-stream-in-buffer-length+ count))
-         (n-character-array-bytes
-          #.(/ (sb!vm:saetp-n-bits
-                (find 'character
-                      sb!vm:*specialized-array-element-type-properties*
-                      :key #'sb!vm:saetp-specifier))
-               sb!vm:n-byte-bits)))
+         (start (- +ansi-stream-in-buffer-length+ count)))
     (declare (type index start count))
     (cond ((zerop count)
            (setf (ansi-stream-in-index stream)
            (funcall (ansi-stream-in stream) stream eof-error-p eof-value))
           (t
            (when (/= start +ansi-stream-in-buffer-extra+)
-             (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+
-                                       sb!vm:n-byte-bits
-                                       n-character-array-bytes)
-                                    (* sb!vm:vector-data-offset
-                                       sb!vm:n-word-bits))
-                            ibuf (+ (the index (* start
-                                                  sb!vm:n-byte-bits
-                                                  n-character-array-bytes))
-                                    (* sb!vm:vector-data-offset
-                                       sb!vm:n-word-bits))
-                            (* count
-                               sb!vm:n-byte-bits
-                               n-character-array-bytes)))
+             (#.(let* ((n-character-array-bits
+                        (sb!vm:saetp-n-bits
+                         (find 'character
+                               sb!vm:*specialized-array-element-type-properties*
+                               :key #'sb!vm:saetp-specifier)))
+                       (bash-function (intern (format nil "UB~A-BASH-COPY" n-character-array-bits)
+                                              (find-package "SB!KERNEL"))))
+                  bash-function)
+                ibuf +ansi-stream-in-buffer-extra+
+                ibuf start
+                count))
            (setf (ansi-stream-in-index stream) (1+ start))
            (aref ibuf start)))))
 
           (funcall (ansi-stream-bin stream) stream eof-error-p eof-value))
          (t
           (unless (zerop start)
-            (bit-bash-copy ibuf (* sb!vm:vector-data-offset sb!vm:n-word-bits)
-                           ibuf (+ (the index (* start sb!vm:n-byte-bits))
-                                   (* sb!vm:vector-data-offset
-                                      sb!vm:n-word-bits))
-                           (* count sb!vm:n-byte-bits)))
+             (ub8-bash-copy ibuf 0
+                            ibuf start 
+                            count))
           (setf (ansi-stream-in-index stream) (1+ start))
           (aref ibuf start)))))
 \f
     (when (plusp copy)
       (setf (string-input-stream-current stream)
            (truly-the index (+ index copy)))
+      ;; FIXME: why are we VECTOR-SAP'ing things here?  what's the point?
+      ;; and are there SB-UNICODE issues here as well?  --njf, 2005-03-24
       (sb!sys:without-gcing
-       (system-area-copy (vector-sap string)
-                        (* index sb!vm:n-byte-bits)
-                        (if (typep buffer 'system-area-pointer)
-                            buffer
-                            (vector-sap buffer))
-                        (* start sb!vm:n-byte-bits)
-                        (* copy sb!vm:n-byte-bits))))
+       (system-area-ub8-copy (vector-sap string)
+                             index
+                             (if (typep buffer 'system-area-pointer)
+                                 buffer
+                                 (vector-sap buffer))
+                             start
+                             copy)))
     (if (and (> requested copy) eof-error-p)
        (error 'end-of-file :stream stream)
        copy)))
index ccbdc53..4730dbd 100644 (file)
                         until (zerop (sap-ref-8 sap offset))
                         finally (return offset))))
       (let ((result (make-string length :element-type 'base-char)))
-       (sb!kernel:copy-from-system-area sap 0
-                                         result (* sb!vm:vector-data-offset
-                                                   sb!vm:n-word-bits)
-                                         (* length sb!vm:n-byte-bits))
+       (sb!kernel:copy-ub8-from-system-area sap 0 result 0 length)
        result))))
 
 (defun %naturalize-utf8-string (sap)
index 50cd4f8..f6fa3d3 100644 (file)
       (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..")
       (/hexstr length)
       (/hexstr vector)
-      (copy-from-system-area pc (* n-byte-bits 2)
-                            vector (* n-word-bits vector-data-offset)
-                            (* length n-byte-bits))
+      (copy-ub8-from-system-area pc 2 vector 0 length)
       (let* ((index 0)
             (error-number (sb!c:read-var-integer vector index)))
        (/hexstr error-number)
index d49f487..21fd69c 100644 (file)
       (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..")
       (/hexstr length)
       (/hexstr vector)
-      (copy-from-system-area pc (* n-byte-bits 2)
-                            vector (* n-word-bits vector-data-offset)
-                            (* length n-byte-bits))
+      (copy-ub8-from-system-area pc 2 vector 0 length)
       (let* ((index 0)
             (error-number (sb!c:read-var-integer vector index)))
        (/hexstr error-number)
index fa0674c..1c19682 100644 (file)
 (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num
   %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
-  unsigned-num %set-raw-bits #+gengc nil)
+  unsigned-num %set-raw-bits)
+(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %vector-raw-bits)
+(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %set-vector-raw-bits)
 
 \f
 ;;;; misc. array VOPs
index 4168eba..5023124 100644 (file)
 
       (setf (code-header-ref code-obj sb!vm:code-trace-table-offset-slot)
            length)
-      (copy-to-system-area trace-table
-                          (* sb!vm:vector-data-offset sb!vm:n-word-bits)
-                          fill-ptr
-                          0
-                          trace-table-bits)
+      ;; KLUDGE: the "old" COPY-TO-SYSTEM-AREA automagically worked if
+      ;; somebody changed the number of bytes in a trace table entry.
+      ;; This version is a bit more fragile; if only there were some way
+      ;; to insulate ourselves against changes like that...
+      ;;
+      ;; Then again, PACK-TRACE-TABLE in src/compiler/trace-table.lisp
+      ;; doesn't appear to do anything interesting, returning a 0-length
+      ;; array.  So it seemingly doesn't matter what we do here.  Is this
+      ;; stale code?
+      ;;   --njf, 2005-03-23
+      (copy-ub16-to-system-area trace-table 0 fill-ptr 0 trace-table-len)
 
       (do ((index sb!vm:code-constants-offset (1+ index)))
          ((>= index (length constants)))
index a2c9d35..98bebdc 100644 (file)
   (foldable flushable))
 (defknown (%set-raw-bits) (t fixnum sb!vm:word) sb!vm:word
   (unsafe))
+;; These two are mostly used for bit-bashing operations.
+(defknown %vector-raw-bits (t fixnum) sb!vm:word
+  (foldable flushable))
+(defknown (%set-vector-raw-bits) (t fixnum sb!vm:word) sb!vm:word
+  (unsafe))
 
 
 (defknown allocate-vector ((unsigned-byte 8) index index) (simple-array * (*))
 \f
 ;;;; bit-bashing routines
 
-(defknown copy-to-system-area
-         ((simple-unboxed-array (*)) index system-area-pointer index index)
-  (values)
-  ())
-
-(defknown copy-from-system-area
-         (system-area-pointer index (simple-unboxed-array (*)) index index)
-  (values)
-  ())
-
-(defknown system-area-copy
-         (system-area-pointer index system-area-pointer index index)
-  (values)
-  ())
-
-(defknown bit-bash-copy
-         ((simple-unboxed-array (*)) index
-          (simple-unboxed-array (*)) index index)
-  (values)
-  ())
+;;; FIXME: there's some ugly duplication between the (INTERN (FORMAT ...))
+;;; magic here and the same magic in src/code/bit-bash.lisp.  I don't know
+;;; of any good way to clean it up, but it's definitely violating OAOO.
+(macrolet ((define-known-copiers ()
+            `(progn
+              ,@(loop for i = 1 then (* i 2)
+                      collect `(defknown ,(intern (format nil "UB~A-BASH-COPY" i)
+                                                  (find-package "SB!KERNEL"))
+                                ((simple-unboxed-array (*)) index (simple-unboxed-array (*)) index index)
+                                (values)
+                                ())
+                      collect `(defknown ,(intern (format nil "SYSTEM-AREA-UB~A-COPY" i)
+                                                  (find-package "SB!KERNEL"))
+                                (system-area-pointer index system-area-pointer index index)
+                                (values)
+                                ())
+                      collect `(defknown ,(intern (format nil "COPY-UB~A-TO-SYSTEM-AREA" i)
+                                                  (find-package "SB!KERNEL"))
+                                ((simple-unboxed-array (*)) index system-area-pointer index index)
+                                (values)
+                                ())
+                      collect `(defknown ,(intern (format nil "COPY-UB~A-FROM-SYSTEM-AREA" i)
+                                                  (find-package "SB!KERNEL"))
+                                (system-area-pointer index (simple-unboxed-array (*)) index index)
+                                (values)
+                                ())
+                      until (= i sb!vm:n-word-bits)))))
+  (define-known-copiers))
 
 ;;; (not really a bit-bashing routine, but starting to take over from
 ;;; bit-bashing routines in byte-sized copies as of sbcl-0.6.12.29:)
index 0aa28cb..68dad24 100644 (file)
   %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
+(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %vector-raw-bits)
+(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %set-vector-raw-bits)
 \f
 ;;;; Misc. Array VOPs.
 (define-vop (get-vector-subtype get-header-data))
index e07d742..5cf0d93 100644 (file)
     (cond (length-only
            (values 0 (1+ length) nil nil))
           (t
-           (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                         vector (* n-word-bits
-                                                   vector-data-offset)
-                                         (* length n-byte-bits))
+           (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
            (collect ((sc-offsets)
                      (lengths))
              (lengths 1)                ; the length byte
index 73ce1c1..ae8c440 100644 (file)
   %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
+(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %vector-raw-bits)
+(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %set-vector-raw-bits)
 \f
 ;;;; Misc. Array VOPs.
 (define-vop (get-vector-subtype get-header-data))
index 5e733fe..b19fc9b 100644 (file)
     (cond (length-only
            (values 0 (1+ length) nil nil))
           (t
-           (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                         vector (* n-word-bits
-                                                   vector-data-offset)
-                                         (* length n-byte-bits))
+           (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
            (collect ((sc-offsets)
                      (lengths))
              (lengths 1)                ; the length byte
index 6eaef08..a7dece0 100644 (file)
   (:result-types unsigned-num)
   (:variant 0 other-pointer-lowtag))
 
+(define-vop (vector-raw-bits word-index-ref)
+  (:note "vector-raw-bits VOP")
+  (:translate %vector-raw-bits)
+  (:results (value :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:variant vector-data-offset other-pointer-lowtag))
 
+(define-vop (set-vector-raw-bits word-index-set)
+  (:note "setf vector-raw-bits VOP")
+  (:translate %set-vector-raw-bits)
+  (:args (object :scs (descriptor-reg))
+        (index :scs (any-reg zero immediate))
+        (value :scs (unsigned-reg)))
+  (:arg-types * positive-fixnum unsigned-num)
+  (:results (result :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:variant vector-data-offset other-pointer-lowtag))
 \f
 ;;;; Misc. Array VOPs.
 
index bac1efa..b31d85c 100644 (file)
     (cond (length-only
            (values 0 (1+ length) nil nil))
           (t
-           (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                         vector (* n-word-bits
-                                                   vector-data-offset)
-                                         (* length n-byte-bits))
+           (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
            (collect ((sc-offsets)
                      (lengths))
              (lengths 1)                ; the length byte
index b20f4db..5694a74 100644 (file)
 ;;;; type dispatch in AREF. The exception is CONCATENATE, since
 ;;;; a full call to CONCATENATE would have to look up the sequence
 ;;;; type, which can be really slow.
-;;;;
-;;;; FIXME: It would be nicer for these transforms to work for any
-;;;; calls when all arguments are vectors with the same element type,
-;;;; rather than restricting them to STRINGs only.
 
 ;;; Moved here from generic/vm-tran.lisp to satisfy clisp
 ;;;
 (def!constant vector-data-bit-offset
   (* sb!vm:vector-data-offset sb!vm:n-word-bits))
 
-(deftransform replace ((string1 string2 &key (start1 0) (start2 0)
-                               end1 end2)
-                      (simple-base-string simple-base-string &rest t)
-                      *
-                      ;; FIXME: consider replacing this policy test
-                      ;; with some tests for the STARTx and ENDx
-                      ;; indices being valid, conditional on high
-                      ;; SAFETY code.
-                      ;;
-                      ;; FIXME: It turns out that this transform is
-                      ;; critical for the performance of string
-                      ;; streams.  Make this more explicit.
-                      :policy (< (max safety space) 3))
-  `(locally
-     (declare (optimize (safety 0)))
-     (bit-bash-copy string2
-                   (the index
-                        (+ (the index (* start2 sb!vm:n-byte-bits))
-                           ,vector-data-bit-offset))
-                   string1
-                   (the index
-                        (+ (the index (* start1 sb!vm:n-byte-bits))
-                           ,vector-data-bit-offset))
-                   (the index
-                        (* (min (the index (- (or end1 (length string1))
-                                              start1))
-                                (the index (- (or end2 (length string2))
-                                              start2)))
-                           sb!vm:n-byte-bits)))
-     string1))
-
-;;; KLUDGE: This isn't the nicest way of achieving efficient string
-;;; streams, but it does work; a more general framework for this kind
-;;; of optimization, as well as better handling of the possible
-;;; keyword arguments, would be nice.
-#!+sb-unicode
-(deftransform replace ((string1 string2 &key (start1 0) (start2 0)
-                               end1 end2)
-                      ((simple-array character (*))
-                        (simple-array character (*))
-                        &rest t)
-                      *
-                      ;; FIXME: consider replacing this policy test
-                      ;; with some tests for the STARTx and ENDx
-                      ;; indices being valid, conditional on high
-                      ;; SAFETY code.
-                      ;;
-                      ;; FIXME: It turns out that this transform is
-                      ;; critical for the performance of string
-                      ;; streams.  Make this more explicit.
-                      :policy (< (max safety space) 3))
-  `(sb!impl::simple-character-string-replace-from-simple-character-string*
-    string1 string2 start1 end1 start2 end2))
+(eval-when (:compile-toplevel)
+(defun valid-bit-bash-saetp-p (saetp)
+  ;; BIT-BASHing isn't allowed on simple vectors that contain pointers
+  (and (not (eq t (sb!vm:saetp-specifier saetp)))
+       ;; Due to limitations with the current BIT-BASHing code, we can't
+       ;; BIT-BASH reliably on arrays whose element types are larger
+       ;; than the word size.
+       (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)))
+) ; EVAL-WHEN
+
+;; FIXME: It turns out that this transform (for SIMPLE-BASE-STRINGS)
+;; is critical for the performance of string streams.  Make this
+;; more explicit.
+(macrolet
+    ((define-replace-transforms ()
+       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
+             when (valid-bit-bash-saetp-p saetp)
+             collect
+             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
+                    (n-element-bits (sb!vm:saetp-n-bits saetp))
+                    (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits)
+                                            (find-package "SB!KERNEL"))))
+               `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
+                                       (,sequence-type ,sequence-type &rest t)
+                                       ,sequence-type
+                                       :node node)
+                 `(let* ((len1 (length seq1))
+                         (len2 (length seq2))
+                         (end1 (or end1 len1))
+                         (end2 (or end2 len2))
+                         (replace-len1 (- end1 start1))
+                         (replace-len2 (- end2 start2)))
+                   ,(unless (policy node (= safety 0))
+                     `(progn
+                       (unless (<= 0 start1 end1 len1)
+                         (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
+                       (unless (<= 0 start2 end2 len2)
+                         (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
+                   (funcall (function ,',bash-function)
+                    seq2 start2
+                    seq1 start1
+                    (min replace-len1 replace-len2))
+                   seq1)))
+             into forms
+             finally (return `(progn ,@forms)))))
+  (define-replace-transforms))
+
+(macrolet
+    ((define-subseq-transforms ()
+       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
+             when (valid-bit-bash-saetp-p saetp)
+             collect
+             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
+                    (n-element-bits (sb!vm:saetp-n-bits saetp))
+                    (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits)
+                                           (find-package "SB!KERNEL"))))
+               `(deftransform subseq ((seq start &optional end)
+                                      (,sequence-type t &optional t)
+                                      ,sequence-type :node node)
+                 `(let* ((length (length seq))
+                         (end (if end (min end length) length)))
+                   ,(unless (policy node (= safety 0))
+                     `(progn
+                       (unless (<= 0 start end length)
+                         (sb!impl::signal-bounding-indices-bad-error seq start end))))
+                   (let* ((size (- end start))
+                          (result (make-array size :element-type ',',(sb!vm:saetp-specifier saetp))))
+                     (funcall (function ,',bash-function)
+                              seq start result 0 size)
+                     result))))
+             into forms
+             finally (return `(progn ,@forms)))))
+  (define-subseq-transforms))
+
+(macrolet
+    ((define-copy-seq-transforms ()
+       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
+             when (valid-bit-bash-saetp-p saetp)
+             collect
+             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
+                    (n-element-bits (sb!vm:saetp-n-bits saetp))
+                    (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits)
+                                           (find-package "SB!KERNEL"))))
+               `(deftransform copy-seq ((seq) (,sequence-type)
+                                        ,sequence-type)
+                 `(let* ((length (length seq))
+                         (result (make-array length :element-type ',',(sb!vm:saetp-specifier saetp))))
+                   (funcall (function ,',bash-function)
+                    seq 0 result 0 length)
+                   result)))
+             into forms
+             finally (return `(progn ,@forms)))))
+  (define-copy-seq-transforms))
 
 ;;; FIXME: this would be a valid transform for certain excluded cases:
 ;;;   * :TEST 'CHAR= or :TEST #'CHAR=
index 0d56f1a..67c59a7 100644 (file)
   (:results (result :scs (unsigned-reg)))
   (:result-types unsigned-num)
   (:variant 0 other-pointer-lowtag))
+
+(define-vop (vector-raw-bits word-index-ref)
+  (:note "vector-raw-bits VOP")
+  (:translate %vector-raw-bits)
+  (:results (value :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:variant vector-data-offset other-pointer-lowtag))
+
+(define-vop (set-vector-raw-bits word-index-set)
+  (:note "setf vector-raw-bits VOP")
+  (:translate %set-vector-raw-bits)
+  (:args (object :scs (descriptor-reg))
+        (index :scs (any-reg zero immediate))
+        (value :scs (unsigned-reg)))
+  (:arg-types * tagged-num unsigned-num)
+  (:results (result :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:variant vector-data-offset other-pointer-lowtag))
\ No newline at end of file
index 801be79..d0fedf8 100644 (file)
@@ -1183,10 +1183,8 @@ about function addresses and register values.")
     (cond (length-only
            (values 0 (1+ length) nil nil))
           (t
-           (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                         vector (* n-word-bits
-                                                   vector-data-offset)
-                                         (* length n-byte-bits))
+           (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
            (collect ((sc-offsets)
                      (lengths))
              (lengths 1)                ; the length byte
index 6a8aebd..e2d4ef6 100644 (file)
   unsigned-num %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
+(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %vector-raw-bits)
+(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %set-vector-raw-bits)
 \f
 ;;;; miscellaneous array VOPs
 
index 3e0c6e2..17ad57d 100644 (file)
     (cond (length-only
           (values 0 (1+ length) nil nil))
          (t
-          (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                           vector (* n-word-bits
-                                                     vector-data-offset)
-                                           (* length n-byte-bits))
+          (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
           (collect ((sc-offsets)
                     (lengths))
             (lengths 1)                ; the length byte
index 493cd7d..f061798 100644 (file)
   unsigned-num %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
+(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %vector-raw-bits)
+(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
+  (unsigned-reg) unsigned-num %set-vector-raw-bits)
 \f
 ;;;; miscellaneous array VOPs
 
index fac5d41..fd398ba 100644 (file)
     (cond (length-only
           (values 0 (1+ length) nil nil))
          (t
-          (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset))
-                                           vector (* n-word-bits
-                                                     vector-data-offset)
-                                           (* length n-byte-bits))
+          (sb!kernel:copy-ub8-from-system-area sap (1+ offset)
+                                                vector 0 length)
           (collect ((sc-offsets)
                     (lengths))
             (lengths 1)                ; the length byte
index 33f128a..2570a73 100644 (file)
          (with-input-from-string (s string :start 6 :end 9)
            (read-char s)))))
 \f
+;;; testing bit-bashing according to _The Practice of Programming_
+(defun fill-bytes-for-testing (bitsize)
+  "Return a list of 'bytes' of type (MOD BITSIZE)."
+  (remove-duplicates (list 0
+                           (1- (ash 1 (1- bitsize)))
+                           (ash 1 (1- bitsize))
+                           (1- (ash 1 bitsize)))))
+
+(defun fill-with-known-value (value size &rest vectors)
+  (dolist (vec vectors)
+    (dotimes (i size)
+      (setf (aref vec i) value))))
+
+(defun collect-fill-amounts (n-power)
+  (remove-duplicates
+   (loop for i from 0 upto n-power
+         collect (1- (expt 2 i))
+         collect (expt 2 i)
+         collect (1+ (expt 2 i)))))
+
+(defun test-fill-bashing (bitsize padding-amount n-power)
+  (let* ((size (+ (* padding-amount 2) (expt 2 n-power) (* padding-amount 2)))
+         (standard (make-array size :element-type `(unsigned-byte ,bitsize)))
+         (bashed (make-array size :element-type `(unsigned-byte ,bitsize)))
+         (fill-amounts (collect-fill-amounts n-power))
+         (bash-function (intern (format nil "UB~A-BASH-FILL" bitsize)
+                                (find-package "SB-KERNEL"))))
+    (loop for offset from padding-amount below (* 2 padding-amount) do
+          (dolist (c (fill-bytes-for-testing bitsize))
+            (dolist (n fill-amounts)
+              (fill-with-known-value (mod (lognot c) (ash 1 bitsize)) n
+                                     standard bashed)
+              ;; fill vectors
+              ;; a) the standard slow way
+              (fill standard c :start offset :end (+ offset n))
+              ;; b) the blazingly fast way
+              (let ((value (loop for i from 0 by bitsize
+                                 until (= i sb-vm:n-word-bits)
+                                 sum (ash c i))))
+                (funcall bash-function value bashed offset n))
+              ;; check for errors
+              (when (mismatch standard bashed)
+                (format t "Test with offset ~A, fill ~A and length ~A failed.~%"
+                        offset c n)
+                (format t "Mismatch: ~A ~A~%"
+                        (subseq standard 0 (+ offset n 1))
+                        (subseq bashed 0 (+ offset n 1)))
+                (return-from test-fill-bashing nil))))
+          finally (return t))))
+
+(defun test-copy-bashing (bitsize padding-amount n-power)
+  (let* ((size (+ (* padding-amount 2) (expt 2 n-power) (* padding-amount 2)))
+         (standard-dst (make-array size :element-type `(unsigned-byte ,bitsize)))
+         (bashed-dst (make-array size :element-type `(unsigned-byte ,bitsize)))
+         (source (make-array size :element-type `(unsigned-byte ,bitsize)))
+         (fill-amounts (collect-fill-amounts n-power))
+         (bash-function (intern (format nil "UB~A-BASH-COPY" bitsize)
+                                (find-package "SB-KERNEL"))))
+    (do ((source-offset padding-amount (1+ source-offset)))
+        ((>= source-offset (* padding-amount 2))
+         ;; success!
+         t)
+     (do ((target-offset padding-amount (1+ target-offset)))
+         ((>= target-offset (* padding-amount 2)))
+       (dolist (c (fill-bytes-for-testing bitsize))
+         (dolist (n fill-amounts)
+           (fill-with-known-value (mod (lognot c) (ash 1 bitsize)) size
+                                  source standard-dst bashed-dst)
+           ;; fill with test data
+           (fill source c :start source-offset :end (+ source-offset n))
+           ;; copy filled test data to test vectors
+           ;; a) the slow way
+           (replace standard-dst source
+                    :start1 target-offset :end1 (+ target-offset n)
+                    :start2 source-offset :end2 (+ source-offset n))
+           ;; b) the blazingly fast way
+           (funcall bash-function source source-offset
+                    bashed-dst target-offset n)
+           ;; check for errors
+           (when (mismatch standard-dst bashed-dst)
+             (format t "Test with target-offset ~A, source-offset ~A, fill ~A, and length ~A failed.~%"
+                     target-offset source-offset c n)
+             (format t "Mismatch:~% correct ~A~% actual  ~A~%"
+                     standard-dst
+                     bashed-dst)
+             (return-from test-copy-bashing nil))))))))
+
+(loop for i = 1 then (* i 2) do
+     ;; the bare '32' here is fairly arbitrary; '8' provides a good
+     ;; range of lengths over which to fill and copy, which should tease
+     ;; out most errors in the code (if any exist).  (It also makes this
+     ;; part of the test suite finish reasonably quickly.)
+     (assert (test-fill-bashing i 32 8))
+     (assert (test-copy-bashing i 32 8))
+     until (= i sb-vm:n-word-bits))
+\f
 ;;; success
 (sb-ext:quit :unix-status 104)
index d58a62c..db33470 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.8.21.4"
+"0.8.21.5"