+ ;; unary byte bashing (copying)
+ (defun ,unary-bash-name (src src-offset dst dst-offset length
+ dst-ref-fn dst-set-fn src-ref-fn)
+ (declare (type index src-offset dst-offset length)
+ (type function dst-ref-fn dst-set-fn src-ref-fn)
+ (ignorable dst-ref-fn))
+ (multiple-value-bind (dst-word-offset dst-byte-offset)
+ (floor dst-offset ,bytes-per-word)
+ (declare (type ,word-offset dst-word-offset)
+ (type ,byte-offset dst-byte-offset))
+ (multiple-value-bind (src-word-offset src-byte-offset)
+ (floor src-offset ,bytes-per-word)
+ (declare (type ,word-offset src-word-offset)
+ (type ,byte-offset src-byte-offset))
+ (cond
+ ((<= (+ dst-byte-offset length) ,bytes-per-word)
+ ;; We are only writing one word, so it doesn't matter what
+ ;; order we do it in. But we might be reading from
+ ;; multiple words, so take care.
+ (cond
+ ((zerop length)
+ ;; We're not writing anything. This is really easy.
+ )
+ ((= length ,bytes-per-word)
+ ;; DST-BYTE-OFFSET must be equal to zero, or we would be
+ ;; writing multiple words. If SRC-BYTE-OFFSET is also zero,
+ ;; the we just transfer the single word. Otherwise we have
+ ;; to extract bytes from two source words.
+ (funcall dst-set-fn dst dst-word-offset
+ (cond
+ ((zerop src-byte-offset)
+ (funcall src-ref-fn src src-word-offset))
+ ,@(unless (= bytes-per-word 1)
+ `((t (word-logical-or (shift-towards-start
+ (funcall src-ref-fn src src-word-offset)
+ (* src-byte-offset ,bitsize))
+ (shift-towards-end
+ (funcall src-ref-fn src (1+ src-word-offset))
+ (* (- src-byte-offset) ,bitsize)))))))))
+ ,@(unless (= bytes-per-word 1)
+ `((t
+ ;; We are only writing some portion of the destination word.
+ ;; We still don't know whether we need one or two source words.
+ (locally (declare (type ,byte-count length))
+ (let ((mask (shift-towards-end (start-mask (* length ,bitsize))
+ (* dst-byte-offset ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (if (> src-byte-offset dst-byte-offset)
+ ;; The source starts further
+ ;; into the word than does the
+ ;; destination, so the source
+ ;; could extend into the next
+ ;; word. If it does, we have
+ ;; to merge the two words, and
+ ;; it not, we can just shift
+ ;; the first word.
+ (let ((src-byte-shift (- src-byte-offset
+ dst-byte-offset)))
+ (if (> (+ src-byte-offset length) ,bytes-per-word)
+ (word-logical-or
+ (shift-towards-start
+ (funcall src-ref-fn src src-word-offset)
+ (* src-byte-shift ,bitsize))
+ (shift-towards-end
+ (funcall src-ref-fn src (1+ src-word-offset))
+ (* (- src-byte-shift) ,bitsize)))
+ (shift-towards-start (funcall src-ref-fn src src-word-offset)
+ (* src-byte-shift ,bitsize))))
+ ;; The destination starts further
+ ;; into the word than does the
+ ;; source, so we know the source
+ ;; cannot extend into a second
+ ;; word (or else the destination
+ ;; would too, and we wouldn't be
+ ;; in this branch).
+ (shift-towards-end
+ (funcall src-ref-fn src src-word-offset)
+ (* (- dst-byte-offset src-byte-offset) ,bitsize)))))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))))))))
+ ((= src-byte-offset dst-byte-offset)
+ ;; The source and destination are aligned, so shifting
+ ;; is unnecessary. But we have to pick the direction
+ ;; of the copy in case the source and destination are
+ ;; really the same object.
+ (multiple-value-bind (words final-bytes)
+ (floor (+ dst-byte-offset length) ,bytes-per-word)
+ (declare (type ,word-offset words)
+ (type ,byte-offset final-bytes))
+ (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
+ (declare (type ,word-offset interior))
+ (cond
+ ((<= dst-offset src-offset)
+ ;; We need to loop from left to right.
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop dst-byte-offset)
+ ;; We are only writing part of the first word, so mask
+ ;; off the bytes we want to preserve.
+ (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (funcall src-ref-fn src src-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))
+ (incf src-word-offset)
+ (incf dst-word-offset))))
+ ;; Copy the interior words.
+ (let ((end ,(if (= bytes-per-word 1)
+ `(truly-the ,word-offset
+ (+ dst-word-offset interior))
+ `(+ dst-word-offset interior))))
+ (declare (type ,word-offset end))
+ (do ()
+ ((>= dst-word-offset end))
+ (funcall dst-set-fn dst dst-word-offset
+ (funcall src-ref-fn src src-word-offset))
+ ,(if (= bytes-per-word 1)
+ `(setf src-word-offset (truly-the ,word-offset (+ src-word-offset 1)))
+ `(incf src-word-offset))
+ (incf dst-word-offset)))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop final-bytes)
+ ;; We are only writing part of the last word.
+ (let ((mask (start-mask (* final-bytes ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (funcall src-ref-fn src src-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))))))
+ (t
+ ;; We need to loop from right to left.
+ ,(if (= bytes-per-word 1)
+ `(setf dst-word-offset (truly-the ,word-offset
+ (+ dst-word-offset words)))
+ `(incf dst-word-offset words))
+ ,(if (= bytes-per-word 1)
+ `(setf src-word-offset (truly-the ,word-offset
+ (+ src-word-offset words)))
+ `(incf src-word-offset words))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop final-bytes)
+ (let ((mask (start-mask (* final-bytes ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (funcall src-ref-fn src src-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask)))))))
+ (let ((end (- dst-word-offset interior)))
+ (do ()
+ ((<= dst-word-offset end))
+ (decf src-word-offset)
+ (decf dst-word-offset)
+ (funcall dst-set-fn dst dst-word-offset
+ (funcall src-ref-fn src src-word-offset))))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop dst-byte-offset)
+ ;; We are only writing part of the last word.
+ (decf src-word-offset)
+ (decf dst-word-offset)
+ (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (funcall src-ref-fn src src-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))))))))))
+ (t
+ ;; Source and destination are not aligned.
+ (multiple-value-bind (words final-bytes)
+ (floor (+ dst-byte-offset length) ,bytes-per-word)
+ (declare (type ,word-offset words)
+ (type ,byte-offset final-bytes))
+ (let ((src-shift (mod (- src-byte-offset dst-byte-offset)
+ ,bytes-per-word))
+ (interior (floor (- length final-bytes) ,bytes-per-word)))
+ (declare (type ,word-offset interior)
+ (type ,byte-offset src-shift))
+ (cond
+ ((<= dst-offset src-offset)
+ ;; We need to loop from left to right.
+ (let ((prev 0)
+ (next (funcall src-ref-fn src src-word-offset)))
+ (declare (type word prev next))
+ (flet ((get-next-src ()
+ (setf prev next)
+ (setf next (funcall src-ref-fn src
+ (incf src-word-offset)))))
+ (declare (inline get-next-src))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop dst-byte-offset)
+ (when (> src-byte-offset dst-byte-offset)
+ (get-next-src))
+ (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (word-logical-or (shift-towards-start prev (* src-shift ,bitsize))
+ (shift-towards-end next (* (- src-shift) ,bitsize)))))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))
+ (incf dst-word-offset))))
+ (let ((end (+ dst-word-offset interior)))
+ (declare (type ,word-offset end))
+ (do ()
+ ((>= dst-word-offset end))
+ (get-next-src)
+ (let ((value (word-logical-or
+ (shift-towards-end next (* (- src-shift) ,bitsize))
+ (shift-towards-start prev (* src-shift ,bitsize)))))
+ (declare (type word value))
+ (funcall dst-set-fn dst dst-word-offset value)
+ (incf dst-word-offset))))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop final-bytes)
+ (let ((value
+ (if (> (+ final-bytes src-shift) ,bytes-per-word)
+ (progn
+ (get-next-src)
+ (word-logical-or
+ (shift-towards-end next (* (- src-shift) ,bitsize))
+ (shift-towards-start prev (* src-shift ,bitsize))))
+ (shift-towards-start next (* src-shift ,bitsize))))
+ (mask (start-mask (* final-bytes ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask))))))))))
+ (t
+ ;; We need to loop from right to left.
+ (incf dst-word-offset words)
+ (incf src-word-offset (1- (ceiling (+ src-byte-offset length) ,bytes-per-word)))
+ (let ((next 0)
+ (prev (funcall src-ref-fn src src-word-offset)))
+ (declare (type word prev next))
+ (flet ((get-next-src ()
+ (setf next prev)
+ (setf prev (funcall src-ref-fn src (decf src-word-offset)))))
+ (declare (inline get-next-src))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop final-bytes)
+ (when (> final-bytes (- ,bytes-per-word src-shift))
+ (get-next-src))
+ (let ((value (word-logical-or
+ (shift-towards-end next (* (- src-shift) ,bitsize))
+ (shift-towards-start prev (* src-shift ,bitsize))))
+ (mask (start-mask (* final-bytes ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset)))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask)))))))
+ (decf dst-word-offset)
+ (let ((end (- dst-word-offset interior)))
+ (do ()
+ ((<= dst-word-offset end))
+ (get-next-src)
+ (let ((value (word-logical-or
+ (shift-towards-end next (* (- src-shift) ,bitsize))
+ (shift-towards-start prev (* src-shift ,bitsize)))))
+ (declare (type word value))
+ (funcall dst-set-fn dst dst-word-offset value)
+ (decf dst-word-offset))))
+ ,@(unless (= bytes-per-word 1)
+ `((unless (zerop dst-byte-offset)
+ (if (> src-byte-offset dst-byte-offset)
+ (get-next-src)
+ (setf next prev prev 0))
+ (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
+ (orig (funcall dst-ref-fn dst dst-word-offset))
+ (value (word-logical-or
+ (shift-towards-start prev (* src-shift ,bitsize))
+ (shift-towards-end next (* (- src-shift) ,bitsize)))))
+ (declare (type word mask orig value))
+ (funcall dst-set-fn dst dst-word-offset
+ (word-logical-or (word-logical-and value mask)
+ (word-logical-andc2 orig mask)))))))))))))))))
+ (values))