0.8.21.5:
[sbcl.git] / src / code / bit-bash.lisp
index 0ca7ed6..c2fb216 100644 (file)
 
 (in-package "SB!VM")
 \f
-;;;; constants and types
+;;;; types
 
-;;; the number of bits to process at a time
-(defconstant unit-bits n-word-bits)
+(deftype bit-offset () '(integer 0 (#.sb!vm:n-word-bits)))
 
-;;; the maximum number of bits that can be dealt with in a single call
-(defconstant max-bits (ash sb!xc:most-positive-fixnum -2))
-
-(deftype unit ()
-  `(unsigned-byte ,unit-bits))
-
-(deftype offset ()
-  `(integer 0 ,max-bits))
-
-(deftype bit-offset ()
-  `(integer 0 (,unit-bits)))
-
-(deftype bit-count ()
-  `(integer 1 (,unit-bits)))
-
-(deftype word-offset ()
-  `(integer 0 (,(ceiling max-bits unit-bits))))
-\f
 ;;;; support routines
 
 ;;; A particular implementation must offer either VOPs to translate
 ;;; machines this is a left-shift and on little-endian machines this
 ;;; is a right-shift.
 (defun shift-towards-start (number countoid)
-  (declare (type unit number) (fixnum countoid))
-  (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) countoid)))
+  (declare (type sb!vm:word number) (fixnum countoid))
+  (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) countoid)))
     (declare (type bit-offset count))
     (if (zerop count)
        number
        (ecase sb!c:*backend-byte-order*
          (:big-endian
-          (ash (ldb (byte (- unit-bits count) 0) number) count))
+          (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count))
          (:little-endian
           (ash number (- count)))))))
 
@@ -74,8 +55,8 @@
 ;;; removing bits from the "end". On big-endian machines this is a
 ;;; right-shift and on little-endian machines this is a left-shift.
 (defun shift-towards-end (number count)
-  (declare (type unit number) (fixnum count))
-  (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) count)))
+  (declare (type sb!vm:word number) (fixnum count))
+  (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) count)))
     (declare (type bit-offset count))
     (if (zerop count)
        number
@@ -83,9 +64,9 @@
          (:big-endian
           (ash number (- count)))
          (:little-endian
-          (ash (ldb (byte (- unit-bits count) 0) number) count))))))
+          (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count))))))
 
-#!-sb-fluid (declaim (inline start-mask end-mask fix-sap-and-offset))
+#!-sb-fluid (declaim (inline start-mask end-mask))
 
 ;;; Produce a mask that contains 1's for the COUNT "start" bits and
 ;;; 0's for the remaining "end" bits. Only the lower 5 bits of COUNT
@@ -93,7 +74,7 @@
 ;;; on 32-bit word size -- WHN 2001-03-19).
 (defun start-mask (count)
   (declare (fixnum count))
-  (shift-towards-start (1- (ash 1 unit-bits)) (- count)))
+  (shift-towards-start (1- (ash 1 sb!vm:n-word-bits)) (- count)))
 
 ;;; Produce a mask that contains 1's for the COUNT "end" bits and 0's
 ;;; for the remaining "start" bits. Only the lower 5 bits of COUNT are
 ;;; 32-bit word size -- WHN 2001-03-19).
 (defun end-mask (count)
   (declare (fixnum count))
-  (shift-towards-end (1- (ash 1 unit-bits)) (- count)))
-
-;;; Align the SAP to a word boundary, and update the offset accordingly.
-(defun fix-sap-and-offset (sap offset)
-  (declare (type system-area-pointer sap)
-          (type index offset)
-          (values system-area-pointer index))
-  (let ((address (sap-int sap)))
-    (values (int-sap #!-alpha (word-logical-andc2 address
-                                                 sb!vm::fixnum-tag-mask)
-                    #!+alpha (ash (ash address -2) 2))
-           (+ (* (logand address sb!vm::fixnum-tag-mask) n-byte-bits)
-              offset))))
+  (shift-towards-end (1- (ash 1 sb!vm:n-word-bits)) (- count)))
 
 #!-sb-fluid (declaim (inline word-sap-ref %set-word-sap-ref))
 (defun word-sap-ref (sap offset)
           (type index offset)
           (values sb!vm:word)
           (optimize (speed 3) (safety 0) #-sb-xc-host (inhibit-warnings 3)))
-  (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits))))
+  (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits))))
 (defun %set-word-sap-ref (sap offset value)
   (declare (type system-area-pointer sap)
           (type index offset)
           (type sb!vm:word value)
           (values sb!vm:word)
           (optimize (speed 3) (safety 0) (inhibit-warnings 3)))
-  (setf (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits)))
+  (setf (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits)))
        value))
-\f
-;;;; CONSTANT-BIT-BASH
 
-;;; Fill DST with VALUE starting at DST-OFFSET and continuing for
-;;; LENGTH bits.
-#!-sb-fluid (declaim (inline constant-bit-bash))
-(defun constant-bit-bash (dst dst-offset length value dst-ref-fn dst-set-fn)
-  (declare (type offset dst-offset) (type unit value)
-          (type function dst-ref-fn dst-set-fn))
-  (multiple-value-bind (dst-word-offset dst-bit-offset)
-      (floor dst-offset unit-bits)
-    (declare (type word-offset dst-word-offset)
-            (type bit-offset dst-bit-offset))
-    (multiple-value-bind (words final-bits)
-       (floor (+ dst-bit-offset length) unit-bits)
-      (declare (type word-offset words) (type bit-offset final-bits))
-      (if (zerop words)
-         (unless (zerop length)
-           (funcall dst-set-fn dst dst-word-offset
-                    (if (= length unit-bits)
-                        value
-                        (let ((mask (shift-towards-end (start-mask length)
-                                                       dst-bit-offset)))
-                          (declare (type unit mask))
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2
-                            (funcall dst-ref-fn dst dst-word-offset)
-                            mask))))))
-         (let ((interior (floor (- length final-bits) unit-bits)))
-           (unless (zerop dst-bit-offset)
-             (let ((mask (end-mask (- dst-bit-offset))))
-               (declare (type unit mask))
-               (funcall dst-set-fn dst dst-word-offset
-                        (word-logical-or
-                         (word-logical-and value mask)
-                         (word-logical-andc2
-                          (funcall dst-ref-fn dst dst-word-offset)
-                          mask))))
-             (incf dst-word-offset))
-           (dotimes (i interior)
-             (funcall dst-set-fn dst dst-word-offset value)
-             (incf dst-word-offset))
-           (unless (zerop final-bits)
-             (let ((mask (start-mask final-bits)))
-               (declare (type unit mask))
-               (funcall dst-set-fn dst dst-word-offset
-                        (word-logical-or
-                         (word-logical-and value mask)
-                         (word-logical-andc2
-                          (funcall dst-ref-fn dst dst-word-offset)
-                          mask)))))))))
-  (values))
 \f
-;;;; UNARY-BIT-BASH
+;;; the actual bashers and common uses of same
 
-#!-sb-fluid (declaim (inline unary-bit-bash))
-(defun unary-bit-bash (src src-offset dst dst-offset length
-                          dst-ref-fn dst-set-fn src-ref-fn)
-  ;; FIXME: Declaring these bit indices to be of type OFFSET, then
-  ;; using the inline expansion in SPEED 3 SAFETY 0 functions, is not
-  ;; a good thing. At the very least, we should make sure that the
-  ;; type (overflow) checks get done. Better would be to avoid
-  ;; using bit indices, and to use 32-bit unsigneds instead, and/or
-  ;; to call out to things like memmove(3) for big moves.
-  (declare (type offset src-offset dst-offset length)
-          (type function dst-ref-fn dst-set-fn src-ref-fn))
-  (multiple-value-bind (dst-word-offset dst-bit-offset)
-      (floor dst-offset unit-bits)
-    (declare (type word-offset dst-word-offset)
-            (type bit-offset dst-bit-offset))
-    (multiple-value-bind (src-word-offset src-bit-offset)
-       (floor src-offset unit-bits)
-      (declare (type word-offset src-word-offset)
-              (type bit-offset src-bit-offset))
-      (cond
-       ((<= (+ dst-bit-offset length) unit-bits)
-       ;; We are only writing one word, so it doesn't matter what
-       ;; order we do it in. But we might be reading from multiple
-       ;; words, so take care.
-       (cond
-        ((zerop length)
-         ;; Actually, we aren't even writing one word. This is really easy.
-         )
-        ((= length unit-bits)
-         ;; DST-BIT-OFFSET must be equal to zero, or we would be
-         ;; writing multiple words. If SRC-BIT-OFFSET is also zero,
-         ;; then we just transfer the single word. Otherwise we have
-         ;; to extract bits from two src words.
-         (funcall dst-set-fn dst dst-word-offset
-                  (if (zerop src-bit-offset)
-                      (funcall src-ref-fn src src-word-offset)
-                      (word-logical-or
-                       (shift-towards-start
-                        (funcall src-ref-fn src src-word-offset)
-                        src-bit-offset)
-                       (shift-towards-end
-                        (funcall src-ref-fn src (1+ src-word-offset))
-                        (- src-bit-offset))))))
-        (t
-         ;; We are only writing some portion of the dst word, so we
-         ;; need to preserve the extra bits. Also, we still don't
-         ;; know whether we need one or two source words.
-         (let ((mask (shift-towards-end (start-mask length) dst-bit-offset))
-               (orig (funcall dst-ref-fn dst dst-word-offset))
-               (value
-                (if (> src-bit-offset dst-bit-offset)
-                    ;; The source starts further into the word than
-                    ;; does the dst, so the source could extend into
-                    ;; the next word. If it does, we have to merge
-                    ;; the two words, and if not, we can just shift
-                    ;; the first word.
-                    (let ((src-bit-shift (- src-bit-offset dst-bit-offset)))
-                      (if (> (+ src-bit-offset length) unit-bits)
-                          (word-logical-or
-                           (shift-towards-start
-                            (funcall src-ref-fn src src-word-offset)
-                            src-bit-shift)
-                           (shift-towards-end
-                            (funcall src-ref-fn src (1+ src-word-offset))
-                            (- src-bit-shift)))
-                          (shift-towards-start
-                           (funcall src-ref-fn src src-word-offset)
-                           src-bit-shift)))
-                    ;; The dst starts further into the word than does
-                    ;; the source, so we know the source can not
-                    ;; extend into a second word (or else the dst
-                    ;; would too, and we wouldn't be in this branch.
-                    (shift-towards-end
-                     (funcall src-ref-fn src src-word-offset)
-                     (- dst-bit-offset src-bit-offset)))))
-           (declare (type unit mask orig value))
-           ;; Replace the dst word.
-           (funcall dst-set-fn dst dst-word-offset
-                    (word-logical-or
-                     (word-logical-and value mask)
-                     (word-logical-andc2 orig mask)))))))
-       ((= src-bit-offset dst-bit-offset)
-       ;; The source and dst are aligned, so we don't need to shift
-       ;; anything. But we have to pick the direction of the loop in
-       ;; case the source and dst are really the same thing.
-       (multiple-value-bind (words final-bits)
-           (floor (+ dst-bit-offset length) unit-bits)
-         (declare (type word-offset words) (type bit-offset final-bits))
-         (let ((interior (floor (- length final-bits) unit-bits)))
-           (declare (type word-offset interior))
-           (cond
-            ((<= dst-offset src-offset)
-             ;; We need to loop from left to right
-             (unless (zerop dst-bit-offset)
-               ;; We are only writing part of the first word, so mask
-               ;; off the bits we want to preserve.
-               (let ((mask (end-mask (- dst-bit-offset)))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or (word-logical-and value mask)
-                                            (word-logical-andc2 orig mask))))
-               (incf src-word-offset)
-               (incf dst-word-offset))
-             ;; Just copy the interior words.
-             (dotimes (i interior)
-               (funcall dst-set-fn dst dst-word-offset
-                        (funcall src-ref-fn src src-word-offset))
-               (incf src-word-offset)
-               (incf dst-word-offset))
-             (unless (zerop final-bits)
-               ;; We are only writing part of the last word.
-               (let ((mask (start-mask final-bits))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask))))))
-            (t
-             ;; We need to loop from right to left.
-             (incf dst-word-offset words)
-             (incf src-word-offset words)
-             (unless (zerop final-bits)
-               (let ((mask (start-mask final-bits))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask)))))
-             (dotimes (i interior)
-               (decf src-word-offset)
-               (decf dst-word-offset)
-               (funcall dst-set-fn dst dst-word-offset
-                        (funcall src-ref-fn src src-word-offset)))
-             (unless (zerop dst-bit-offset)
-               (decf src-word-offset)
-               (decf dst-word-offset)
-               (let ((mask (end-mask (- dst-bit-offset)))
-                     (orig (funcall dst-ref-fn dst dst-word-offset))
-                     (value (funcall src-ref-fn src src-word-offset)))
-                 (declare (type unit mask orig value))
-                 (funcall dst-set-fn dst dst-word-offset
-                          (word-logical-or
-                           (word-logical-and value mask)
-                           (word-logical-andc2 orig mask))))))))))
-       (t
-       ;; They aren't aligned.
-       (multiple-value-bind (words final-bits)
-           (floor (+ dst-bit-offset length) unit-bits)
-         (declare (type word-offset words) (type bit-offset final-bits))
-         (let ((src-shift (mod (- src-bit-offset dst-bit-offset) unit-bits))
-               (interior (floor (- length final-bits) unit-bits)))
-           (declare (type bit-offset src-shift)
-                    (type word-offset interior))
-           (cond
-            ((<= dst-offset src-offset)
-             ;; We need to loop from left to right
-             (let ((prev 0)
-                   (next (funcall src-ref-fn src src-word-offset)))
-               (declare (type unit prev next))
-               (flet ((get-next-src ()
-                        (setf prev next)
-                        (setf next (funcall src-ref-fn src
-                                            (incf src-word-offset)))))
-                 (declare (inline get-next-src))
-                 (unless (zerop dst-bit-offset)
-                   (when (> src-bit-offset dst-bit-offset)
-                     (get-next-src))
-                   (let ((mask (end-mask (- dst-bit-offset)))
-                         (orig (funcall dst-ref-fn dst dst-word-offset))
-                         (value (word-logical-or
-                                 (shift-towards-start prev src-shift)
-                                 (shift-towards-end next (- src-shift)))))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))
-                     (incf dst-word-offset)))
-                 (dotimes (i interior)
-                   (get-next-src)
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift))))
-                     (declare (type unit value))
-                     (funcall dst-set-fn dst dst-word-offset value)
-                     (incf dst-word-offset)))
-                 (unless (zerop final-bits)
-                   (let ((value
-                          (if (> (+ final-bits src-shift) unit-bits)
-                              (progn
-                                (get-next-src)
-                                (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift)))
-                              (shift-towards-start next src-shift)))
-                         (mask (start-mask final-bits))
-                         (orig (funcall dst-ref-fn dst dst-word-offset)))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask))))))))
-            (t
-             ;; We need to loop from right to left.
-             (incf dst-word-offset words)
-             (incf src-word-offset
-                   (1- (ceiling (+ src-bit-offset length) unit-bits)))
-             (let ((next 0)
-                   (prev (funcall src-ref-fn src src-word-offset)))
-               (declare (type unit prev next))
-               (flet ((get-next-src ()
-                        (setf next prev)
-                        (setf prev (funcall src-ref-fn src
-                                            (decf src-word-offset)))))
-                 (declare (inline get-next-src))
-                 (unless (zerop final-bits)
-                   (when (> final-bits (- unit-bits src-shift))
-                     (get-next-src))
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift)))
-                         (mask (start-mask final-bits))
-                         (orig (funcall dst-ref-fn dst dst-word-offset)))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))))
-                 (decf dst-word-offset)
-                 (dotimes (i interior)
-                   (get-next-src)
-                   (let ((value (word-logical-or
-                                 (shift-towards-end next (- src-shift))
-                                 (shift-towards-start prev src-shift))))
-                     (declare (type unit value))
-                     (funcall dst-set-fn dst dst-word-offset value)
-                     (decf dst-word-offset)))
-                 (unless (zerop dst-bit-offset)
-                   (if (> src-bit-offset dst-bit-offset)
-                       (get-next-src)
-                       (setf next prev prev 0))
-                   (let ((mask (end-mask (- dst-bit-offset)))
-                         (orig (funcall dst-ref-fn dst dst-word-offset))
-                         (value (word-logical-or
-                                 (shift-towards-start prev src-shift)
-                                 (shift-towards-end next (- src-shift)))))
-                     (declare (type unit mask orig value))
-                     (funcall dst-set-fn dst dst-word-offset
-                              (word-logical-or
-                               (word-logical-and value mask)
-                               (word-logical-andc2 orig mask)))))))))))))))
-  (values))
-\f
-;;;; the actual bashers
+;;; This is a little ugly.  Fixing bug 188 would bring the ability to
+;;; wrap a MACROLET or something similar around this whole thing would
+;;; make things significantly less ugly.  --njf, 2005-02-23
+(eval-when (:compile-toplevel :load-toplevel :execute)
 
-(defun bit-bash-fill (value dst dst-offset length)
-  (declare (type unit value) (type offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (constant-bit-bash dst dst-offset length value
-                     #'%raw-bits #'%set-raw-bits)))
+;;; Align the SAP to a word boundary, and update the offset accordingly.
+(defmacro !define-sap-fixer (bitsize)
+  (let ((name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize))))
+    `(progn
+      (declaim (inline ,name))
+      (defun ,name (sap offset)
+        (declare (type system-area-pointer sap)
+                 (type index offset)
+                 (values system-area-pointer index))
+        (let ((address (sap-int sap)))
+          (values (int-sap #!-alpha (word-logical-andc2 address
+                                                        sb!vm:fixnum-tag-mask)
+                           #!+alpha (ash (ash address -2) 2))
+                  (+ ,(ecase bitsize
+                       (1 '(* (logand address sb!vm:fixnum-tag-mask) n-byte-bits))
+                       (2 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 2)))
+                       (4 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 4)))
+                       ((8 16 32 64) '(logand address sb!vm:fixnum-tag-mask)))
+                     offset)))))))
+
+(defmacro !define-byte-bashers (bitsize)
+  (let* ((bytes-per-word (/ n-word-bits bitsize))
+         (byte-offset `(integer 0 (,bytes-per-word)))
+         (byte-count `(integer 1 (,bytes-per-word)))
+         (max-bytes (ash most-positive-fixnum
+                         ;; FIXME: this reflects code contained in the
+                         ;; original bit-bash.lisp, but seems very
+                         ;; nonsensical.  Why shouldn't we be able to
+                         ;; handle M-P-FIXNUM bits?  And if we can't,
+                         ;; are these other shift amounts bogus, too?
+                         (ecase bitsize
+                           (1 -2)
+                           (2 -1)
+                           (4  0)
+                           (8  0)
+                           (16 0)
+                           (32 0))))
+         (offset `(integer 0 ,max-bytes))
+         (max-word-offset (ceiling max-bytes bytes-per-word))
+         (word-offset `(integer 0 ,max-word-offset))
+         (fix-sap-and-offset-name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize)))
+         (constant-bash-name (intern (format nil "CONSTANT-UB~A-BASH" bitsize) (find-package "SB!KERNEL")))
+         (array-fill-name (intern (format nil "UB~A-BASH-FILL" bitsize) (find-package "SB!KERNEL")))
+         (system-area-fill-name (intern (format nil "SYSTEM-AREA-UB~A-FILL" bitsize) (find-package "SB!KERNEL")))
+         (unary-bash-name (intern (format nil "UNARY-UB~A-BASH" bitsize) (find-package "SB!KERNEL")))
+         (array-copy-name (intern (format nil "UB~A-BASH-COPY" bitsize) (find-package "SB!KERNEL")))
+         (system-area-copy-name (intern (format nil "SYSTEM-AREA-UB~A-COPY" bitsize) (find-package "SB!KERNEL")))
+         (array-copy-to-system-area-name
+          (intern (format nil "COPY-UB~A-TO-SYSTEM-AREA" bitsize) (find-package "SB!KERNEL")))
+         (system-area-copy-to-array-name
+          (intern (format nil "COPY-UB~A-FROM-SYSTEM-AREA" bitsize)
+                  (find-package "SB!KERNEL"))))
+    `(progn
+      (declaim (inline ,constant-bash-name ,unary-bash-name))
+      ;; Fill DST with VALUE starting at DST-OFFSET and continuing
+      ;; for LENGTH bytes (however bytes are defined).
+      (defun ,constant-bash-name (dst dst-offset length value
+                                      dst-ref-fn dst-set-fn)
+        (declare (type word value) (type index dst-offset length))
+        (declare (ignorable dst-ref-fn))
+        (multiple-value-bind (dst-word-offset dst-byte-offset)
+            (floor dst-offset ,bytes-per-word)
+          (declare (type ,word-offset dst-word-offset)
+                   (type ,byte-offset dst-byte-offset))
+          (multiple-value-bind (n-words final-bytes)
+              (floor (+ dst-byte-offset length) ,bytes-per-word)
+            (declare (type ,word-offset n-words)
+                     (type ,byte-offset final-bytes))
+            (if (zerop n-words)
+                ,(unless (= bytes-per-word 1)
+                  `(unless (zerop length)
+                    (locally (declare (type ,byte-count length))
+                      (funcall dst-set-fn dst dst-word-offset
+                               (if (= length ,bytes-per-word)
+                                   value
+                                   (let ((mask (shift-towards-end
+                                                (start-mask (* length ,bitsize))
+                                                (* dst-byte-offset ,bitsize))))
+                                     (word-logical-or (word-logical-and value mask)
+                                                      (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                          mask))))))))
+                (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
+                  ,@(unless (= bytes-per-word 1)
+                     `((unless (zerop dst-byte-offset)
+                         (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))))
+                           (funcall dst-set-fn dst dst-word-offset
+                                    (word-logical-or (word-logical-and value mask)
+                                                     (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                         mask))))
+                         (incf dst-word-offset))))
+                  (dotimes (i interior)
+                    (funcall dst-set-fn dst dst-word-offset value)
+                    (incf dst-word-offset))
+                  ,@(unless (= bytes-per-word 1)
+                     `((unless (zerop final-bytes)
+                         (let ((mask (start-mask (* final-bytes ,bitsize))))
+                           (funcall dst-set-fn dst dst-word-offset
+                                    (word-logical-or (word-logical-and value mask)
+                                                     (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset)
+                                                                         mask)))))))))))
+        (values))
 
-(defun system-area-fill (value dst dst-offset length)
-  (declare (type unit value) (type offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-     (constant-bit-bash dst dst-offset length value
-                       #'word-sap-ref #'%set-word-sap-ref))))
+      ;; common uses for constant-byte-bashing
+      (defun ,array-fill-name (value dst dst-offset length)
+        (declare (type word value) (type ,offset dst-offset length))
+        (declare (optimize (speed 3) (safety 1)))
+        (,constant-bash-name dst dst-offset length value
+                             #'%vector-raw-bits #'%set-vector-raw-bits))
+      (defun ,system-area-fill-name (value dst dst-offset length)
+        (declare (type word value) (type ,offset dst-offset length))
+        (declare (optimize (speed 3) (safety 1)))
+        (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset)
+          (,constant-bash-name dst dst-offset length value
+                               #'word-sap-ref #'%set-word-sap-ref)))
 
-(defun bit-bash-copy (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0))
-           (inline unary-bit-bash))
-   (unary-bit-bash src src-offset dst dst-offset length
-                  #'%raw-bits #'%set-raw-bits #'%raw-bits)))
+         ;; unary byte bashing (copying)
+         (defun ,unary-bash-name (src src-offset dst dst-offset length
+                                      dst-ref-fn dst-set-fn src-ref-fn)
+           (declare (type index src-offset dst-offset length)
+                    (type function dst-ref-fn dst-set-fn src-ref-fn)
+                    (ignorable dst-ref-fn))
+           (multiple-value-bind (dst-word-offset dst-byte-offset)
+               (floor dst-offset ,bytes-per-word)
+             (declare (type ,word-offset dst-word-offset)
+                      (type ,byte-offset dst-byte-offset))
+             (multiple-value-bind (src-word-offset src-byte-offset)
+                 (floor src-offset ,bytes-per-word)
+               (declare (type ,word-offset src-word-offset)
+                        (type ,byte-offset src-byte-offset))
+               (cond
+                 ((<= (+ dst-byte-offset length) ,bytes-per-word)
+                  ;; We are only writing one word, so it doesn't matter what
+                  ;; order we do it in.  But we might be reading from
+                  ;; multiple words, so take care.
+                  (cond
+                    ((zerop length)
+                     ;; We're not writing anything.  This is really easy.
+                     )
+                    ((= length ,bytes-per-word)
+                     ;; DST-BYTE-OFFSET must be equal to zero, or we would be
+                     ;; writing multiple words.  If SRC-BYTE-OFFSET is also zero,
+                     ;; the we just transfer the single word.  Otherwise we have
+                     ;; to extract bytes from two source words.
+                     (funcall dst-set-fn dst dst-word-offset
+                             (cond
+                               ((zerop src-byte-offset)
+                                (funcall src-ref-fn src src-word-offset))
+                               ,@(unless (= bytes-per-word 1)
+                                  `((t (word-logical-or (shift-towards-start
+                                                         (funcall src-ref-fn src src-word-offset)
+                                                         (* src-byte-offset ,bitsize))
+                                        (shift-towards-end
+                                          (funcall src-ref-fn src (1+ src-word-offset))
+                                          (* (- src-byte-offset) ,bitsize)))))))))
+                    ,@(unless (= bytes-per-word 1)
+                       `((t
+                          ;; We are only writing some portion of the destination word.
+                          ;; We still don't know whether we need one or two source words.
+                          (locally (declare (type ,byte-count length))
+                            (let ((mask (shift-towards-end (start-mask (* length ,bitsize))
+                                                           (* dst-byte-offset ,bitsize)))
+                                  (orig (funcall dst-ref-fn dst dst-word-offset))
+                                  (value (if (> src-byte-offset dst-byte-offset)
+                                             ;; The source starts further
+                                             ;; into the word than does the
+                                             ;; destination, so the source
+                                             ;; could extend into the next
+                                             ;; word.  If it does, we have
+                                             ;; to merge the two words, and
+                                             ;; it not, we can just shift
+                                             ;; the first word.
+                                             (let ((src-byte-shift (- src-byte-offset
+                                                                      dst-byte-offset)))
+                                               (if (> (+ src-byte-offset length) ,bytes-per-word)
+                                                   (word-logical-or
+                                                    (shift-towards-start
+                                                     (funcall src-ref-fn src src-word-offset)
+                                                     (* src-byte-shift ,bitsize))
+                                                    (shift-towards-end
+                                                     (funcall src-ref-fn src (1+ src-word-offset))
+                                                     (* (- src-byte-shift) ,bitsize)))
+                                                   (shift-towards-start (funcall src-ref-fn src src-word-offset)
+                                                                        (* src-byte-shift ,bitsize))))
+                                             ;; The destination starts further
+                                             ;; into the word than does the
+                                             ;; source, so we know the source
+                                             ;; cannot extend into a second
+                                             ;; word (or else the destination
+                                             ;; would too, and we wouldn't be
+                                             ;; in this branch).
+                                             (shift-towards-end
+                                              (funcall src-ref-fn src src-word-offset)
+                                              (* (- dst-byte-offset src-byte-offset) ,bitsize)))))
+                              (declare (type word mask orig value))
+                              (funcall dst-set-fn dst dst-word-offset
+                                       (word-logical-or (word-logical-and value mask)
+                                                        (word-logical-andc2 orig mask))))))))))
+                 ((= src-byte-offset dst-byte-offset)
+                  ;; The source and destination are aligned, so shifting
+                  ;; is unnecessary.  But we have to pick the direction
+                  ;; of the copy in case the source and destination are
+                  ;; really the same object.
+                  (multiple-value-bind (words final-bytes)
+                      (floor (+ dst-byte-offset length) ,bytes-per-word)
+                    (declare (type ,word-offset words)
+                             (type ,byte-offset final-bytes))
+                    (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
+                      (declare (type ,word-offset interior))
+                      (cond
+                        ((<= dst-offset src-offset)
+                         ;; We need to loop from left to right.
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop dst-byte-offset)
+                                ;; We are only writing part of the first word, so mask
+                                ;; off the bytes we want to preserve.
+                                (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))
+                                (incf src-word-offset)
+                                (incf dst-word-offset))))
+                         ;; Copy the interior words.
+                         (dotimes (i interior)
+                           (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset))
+                           (incf src-word-offset)
+                           (incf dst-word-offset))
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop final-bytes)
+                                ;; We are only writing part of the last word.
+                                (let ((mask (start-mask (* final-bytes ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))))))
+                        (t
+                         ;; We need to loop from right to left.
+                         (incf dst-word-offset words)
+                         (incf src-word-offset words)
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop final-bytes)
+                                (let ((mask (start-mask (* final-bytes ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask)))))))
+                         (dotimes (i interior)
+                           (decf src-word-offset)
+                           (decf dst-word-offset)
+                           (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset)))
+                         ,@(unless (= bytes-per-word 1)
+                            `((unless (zerop dst-byte-offset)
+                                ;; We are only writing part of the last word.
+                                (decf src-word-offset)
+                                (decf dst-word-offset)
+                                (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                      (orig (funcall dst-ref-fn dst dst-word-offset))
+                                      (value (funcall src-ref-fn src src-word-offset)))
+                                  (declare (type word mask orig value))
+                                  (funcall dst-set-fn dst dst-word-offset
+                                           (word-logical-or (word-logical-and value mask)
+                                                            (word-logical-andc2 orig mask))))))))))))
+                 (t
+                  ;; Source and destination are not aligned.
+                  (multiple-value-bind (words final-bytes)
+                      (floor (+ dst-byte-offset length) ,bytes-per-word)
+                    (declare (type ,word-offset words)
+                             (type ,byte-offset final-bytes))
+                    (let ((src-shift (mod (- src-byte-offset dst-byte-offset)
+                                          ,bytes-per-word))
+                          (interior (floor (- length final-bytes) ,bytes-per-word)))
+                      (declare (type ,word-offset interior)
+                               (type ,byte-offset src-shift))
+                      (cond
+                        ((<= dst-offset src-offset)
+                         ;; We need to loop from left to right.
+                         (let ((prev 0)
+                               (next (funcall src-ref-fn src src-word-offset)))
+                           (declare (type word prev next))
+                           (flet ((get-next-src ()
+                                    (setf prev next)
+                                    (setf next (funcall src-ref-fn src (incf src-word-offset)))))
+                             (declare (inline get-next-src))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop dst-byte-offset)
+                                    (when (> src-byte-offset dst-byte-offset)
+                                      (get-next-src))
+                                    (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset))
+                                          (value (word-logical-or (shift-towards-start prev (* src-shift ,bitsize))
+                                                                  (shift-towards-end next (* (- src-shift) ,bitsize)))))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask))))
+                                    (incf dst-word-offset))))
+                             (dotimes (i interior)
+                               (get-next-src)
+                               (let ((value (word-logical-or
+                                             (shift-towards-end next (* (- src-shift) ,bitsize))
+                                             (shift-towards-start prev (* src-shift ,bitsize)))))
+                                 (declare (type word value))
+                                 (funcall dst-set-fn dst dst-word-offset value)
+                                 (incf dst-word-offset)))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop final-bytes)
+                                    (let ((value
+                                           (if (> (+ final-bytes src-shift) ,bytes-per-word)
+                                               (progn
+                                                 (get-next-src)
+                                                 (word-logical-or
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize))
+                                                  (shift-towards-start prev (* src-shift ,bitsize))))
+                                               (shift-towards-start next (* src-shift ,bitsize))))
+                                          (mask (start-mask (* final-bytes ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset)))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask))))))))))
+                        (t
+                         ;; We need to loop from right to left.
+                         (incf dst-word-offset words)
+                         (incf src-word-offset
+                               (1- (ceiling (+ src-byte-offset length) ,bytes-per-word)))
+                         (let ((next 0)
+                               (prev (funcall src-ref-fn src src-word-offset)))
+                           (declare (type word prev next))
+                           (flet ((get-next-src ()
+                                    (setf next prev)
+                                    (setf prev (funcall src-ref-fn src (decf src-word-offset)))))
+                             (declare (inline get-next-src))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop final-bytes)
+                                    (when (> final-bytes (- ,bytes-per-word src-shift))
+                                      (get-next-src))
+                                    (let ((value (word-logical-or
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize))
+                                                  (shift-towards-start prev (* src-shift ,bitsize))))
+                                          (mask (start-mask (* final-bytes ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset)))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                               (word-logical-or (word-logical-and value mask)
+                                                                (word-logical-andc2 orig mask)))))))
+                             (decf dst-word-offset)
+                             (dotimes (i interior)
+                               (get-next-src)
+                               (let ((value (word-logical-or
+                                             (shift-towards-end next (* (- src-shift) ,bitsize))
+                                             (shift-towards-start prev (* src-shift ,bitsize)))))
+                                 (declare (type word value))
+                                 (funcall dst-set-fn dst dst-word-offset value)
+                                 (decf dst-word-offset)))
+                             ,@(unless (= bytes-per-word 1)
+                                `((unless (zerop dst-byte-offset)
+                                    (if (> src-byte-offset dst-byte-offset)
+                                        (get-next-src)
+                                        (setf next prev prev 0))
+                                    (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+                                          (orig (funcall dst-ref-fn dst dst-word-offset))
+                                          (value (word-logical-or
+                                                  (shift-towards-start prev (* src-shift ,bitsize))
+                                                  (shift-towards-end next (* (- src-shift) ,bitsize)))))
+                                      (declare (type word mask orig value))
+                                      (funcall dst-set-fn dst dst-word-offset
+                                              (word-logical-or (word-logical-and value mask)
+                                                               (word-logical-andc2 orig mask)))))))))))))))))
+           (values))
 
-(defun system-area-copy (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset)
-     (declare (type system-area-pointer src))
-     (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-       (declare (type system-area-pointer dst))
-       (unary-bit-bash src src-offset dst dst-offset length
-                      #'word-sap-ref #'%set-word-sap-ref
-                      #'word-sap-ref)))))
+         ;; common uses for unary-byte-bashing
+         (defun ,array-copy-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (,unary-bash-name src src-offset dst dst-offset length
+                               #'%vector-raw-bits
+                               #'%set-vector-raw-bits
+                               #'%vector-raw-bits)))
 
-(defun copy-to-system-area (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset)
-     (unary-bit-bash src src-offset dst dst-offset length
-                    #'word-sap-ref #'%set-word-sap-ref #'%raw-bits))))
+         (defun ,system-area-copy-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset)
+               (declare (type sb!sys:system-area-pointer src))
+               (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset)
+                 (declare (type sb!sys:system-area-pointer dst))
+                 (,unary-bash-name src src-offset dst dst-offset length
+                                   #'word-sap-ref #'%set-word-sap-ref
+                                   #'word-sap-ref)))))
 
-(defun copy-from-system-area (src src-offset dst dst-offset length)
-  (declare (type offset src-offset dst-offset length))
-  (locally
-   (declare (optimize (speed 3) (safety 0)))
-   (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset)
-     (unary-bit-bash src src-offset dst dst-offset length
-                    #'%raw-bits #'%set-raw-bits #'word-sap-ref))))
+         (defun ,array-copy-to-system-area-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name  dst dst-offset)
+               (,unary-bash-name src src-offset dst dst-offset length
+                                 #'word-sap-ref #'%set-word-sap-ref
+                                 #'%vector-raw-bits))))
 
+         (defun ,system-area-copy-to-array-name (src src-offset dst dst-offset length)
+           (declare (type ,offset src-offset dst-offset length))
+           (locally (declare (optimize (speed 3) (safety 1)))
+             (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset)
+               (,unary-bash-name src src-offset dst dst-offset length
+                                 #'%vector-raw-bits
+                                 #'%set-vector-raw-bits
+                                 #'word-sap-ref)))))))
+) ; EVAL-WHEN
+
+;;; We would normally do this with a MACROLET, but then we run into
+;;; problems with the lexical environment being too hairy for the
+;;; cross-compiler and it cannot inline the basic basher functions.
+#.(loop for i = 1 then (* i 2)
+        collect `(!define-sap-fixer ,i) into fixers
+        collect `(!define-byte-bashers ,i) into bashers
+        until (= i sb!vm:n-word-bits)
+        ;; FIXERS must come first so their inline expansions are available
+        ;; for the bashers.
+        finally (return `(progn ,@fixers ,@bashers)))
+\f
 ;;; a common idiom for calling COPY-TO-SYSTEM-AREA
 ;;;
 ;;; Copy the entire contents of the vector V to memory starting at SAP+OFFSET.
   (declare (type (simple-array (unsigned-byte 8) 1) bv))
   (declare (type system-area-pointer sap))
   (declare (type fixnum offset))
-  ;; FIXME: Actually it looks as though this, and most other calls to
-  ;; COPY-TO-SYSTEM-AREA, could be written more concisely with
-  ;; %BYTE-BLT. Except that the DST-END-DST-START convention for the
-  ;; length is confusing. Perhaps I could rename %BYTE-BLT to
-  ;; %BYTE-BLIT (and correspondingly rename the corresponding VOP) and
-  ;; replace the DST-END argument with an N-BYTES argument?
-  (copy-to-system-area bv
-                      (* vector-data-offset n-word-bits)
-                      sap
-                      offset
-                      (* (length bv) n-byte-bits)))
+  (copy-ub8-to-system-area bv 0 sap offset (length bv)))