From 338732358d49ab202fe55c3581294597d63aec6b Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Mon, 28 Mar 2005 18:54:50 +0000 Subject: [PATCH] 0.8.21.5: Enable faster REPLACE on declared specialized arrays. Overview of necessary machinery: * New %VECTOR-RAW-BITS and %SET-VECTOR-RAW-BITS functions/VOPs which automatically take into account VECTOR-DATA-OFFSET (eliminates tedium associated with previous bit-bashing code and makes things slightly faster). It's not clear if the old %RAW-BITS and %SET-RAW-BITS functions need to remain; * Generalize the old bit-bashing code to generate bit-bashers for differently sized "bytes" (1-bit, 2-bit, 4-bit, etc.); * Add REPLACE transforms for most specialized array types (those with elements not larger than the word size); * Replace various incantations of COPY-FROM-SYSTEM-AREA, COPY-TO-SYSTEM-AREA, BIT-BASH-COPY, etc. with their new width-aware equivalents (this accounts for the bulk of the changed files, if not the changed lines); * Add systematic tests for UB*-BASH-{FILL,COPY}; * Add generalized SUBSEQ and COPY-SEQ transforms while we're at it (FILL would be nice to have, but is a little bit trickier to do in the general case). These changes also open up the possibility of removing %BYTE-BLT from the sources. Benefits: decrease in the number of WITHOUT-GCING forms required, less calling out to C, more of the system in Lisp, etc. %BYTE-BLT remains in this version, but may be removed if there is sufficient support for its removal. --- NEWS | 3 + contrib/sb-md5/md5.lisp | 19 +- contrib/sb-simple-streams/internal.lisp | 6 +- package-data-list.lisp-expr | 38 +- src/code/alpha-vm.lisp | 4 +- src/code/bit-bash.lisp | 851 ++++++++++++++++--------------- src/code/debug-int.lisp | 2 +- src/code/defsetfs.lisp | 1 + src/code/fd-stream.lisp | 53 +- src/code/host-alieneval.lisp | 6 +- src/code/hppa-vm.lisp | 5 +- src/code/kernel.lisp | 11 +- src/code/mips-vm.lisp | 5 +- src/code/ppc-vm.lisp | 5 +- src/code/run-program.lisp | 20 +- src/code/sparc-vm.lisp | 5 +- src/code/stream.lisp | 68 +-- src/code/target-c-call.lisp | 5 +- src/code/x86-64-vm.lisp | 4 +- src/code/x86-vm.lisp | 4 +- src/compiler/alpha/array.lisp | 6 +- src/compiler/generic/target-core.lisp | 16 +- src/compiler/generic/vm-fndb.lisp | 53 +- src/compiler/hppa/array.lisp | 4 + src/compiler/hppa/insts.lisp | 6 +- src/compiler/mips/array.lisp | 4 + src/compiler/mips/insts.lisp | 6 +- src/compiler/ppc/array.lisp | 16 + src/compiler/ppc/insts.lisp | 6 +- src/compiler/seqtran.lisp | 150 +++--- src/compiler/sparc/array.lisp | 18 + src/compiler/sparc/insts.lisp | 6 +- src/compiler/x86-64/array.lisp | 4 + src/compiler/x86-64/insts.lisp | 6 +- src/compiler/x86/array.lisp | 4 + src/compiler/x86/insts.lisp | 6 +- tests/seq.impure.lisp | 96 ++++ version.lisp-expr | 2 +- 38 files changed, 870 insertions(+), 654 deletions(-) diff --git a/NEWS b/NEWS index ea7f290..e18ea00 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ changes in sbcl-0.8.22 relative to sbcl-0.8.21: * incompatible change: the --noprogrammer option, deprecated since version 0.7.5, has been removed. Please use the equivalent --disable-debugger option instead. + * optimization: REPLACE on declared (UNSIGNED-BYTE 8) vectors, as well + as other specialized array types, is much faster. SUBSEQ and + COPY-SEQ on such arrays have also been sped up. * fixed inference of the upper bound of an iteration variable. (reported by Rajat Datta). * fixed bug 376: CONJUGATE type deriver. diff --git a/contrib/sb-md5/md5.lisp b/contrib/sb-md5/md5.lisp index 12515fb..eb7c7d5 100644 --- a/contrib/sb-md5/md5.lisp +++ b/contrib/sb-md5/md5.lisp @@ -277,11 +277,7 @@ starting from offset into the given 16 word MD5 block." block (* vm:vector-data-offset vm:word-bits) (* 64 vm:byte-bits)) #+(and :sbcl :little-endian) - (sb-kernel:bit-bash-copy - buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* offset sb-vm:n-byte-bits)) - block (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* 64 sb-vm:n-byte-bits)) + (sb-kernel:ub8-bash-copy buffer offset block 0 64) #-(or (and :sbcl :little-endian) (and :cmu :little-endian)) (loop for i of-type (integer 0 16) from 0 for j of-type (integer 0 #.most-positive-fixnum) @@ -306,11 +302,7 @@ offset into the given 16 word MD5 block." block (* vm:vector-data-offset vm:word-bits) (* 64 vm:byte-bits)) #+(and :sbcl :little-endian) - (sb-kernel:bit-bash-copy - buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* offset sb-vm:n-byte-bits)) - block (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* 64 sb-vm:n-byte-bits)) + (sb-kernel:ub8-bash-copy buffer offset block 0 64) #-(or (and :sbcl :little-endian) (and :cmu :little-endian)) (loop for i of-type (integer 0 16) from 0 for j of-type (integer 0 #.most-positive-fixnum) @@ -380,12 +372,7 @@ starting at buffer-offset." (* buffer-offset vm:byte-bits)) (* count vm:byte-bits)) #+sbcl - (sb-kernel:bit-bash-copy - from (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* from-offset sb-vm:n-byte-bits)) - buffer (+ (* sb-vm:vector-data-offset sb-vm:n-word-bits) - (* buffer-offset sb-vm:n-byte-bits)) - (* count sb-vm:n-byte-bits)) + (sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count) #-(or cmu sbcl) (etypecase from (simple-string diff --git a/contrib/sb-simple-streams/internal.lisp b/contrib/sb-simple-streams/internal.lisp index e5e926e..a74aabb 100644 --- a/contrib/sb-simple-streams/internal.lisp +++ b/contrib/sb-simple-streams/internal.lisp @@ -43,9 +43,9 @@ (declare (type simple-stream-buffer src dst) (type fixnum soff doff length)) (sb-sys:without-gcing ;; is this necessary?? - (sb-kernel:system-area-copy (buffer-sap src) (* soff 8) - (buffer-sap dst) (* doff 8) - (* length 8)))) + (sb-kernel:system-area-ub8-copy (buffer-sap src) soff + (buffer-sap dst) doff + length))) (defun allocate-buffer (size) (if (= size sb-impl::bytes-per-buffer) diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 49b56e7..2f1ba5f 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1098,7 +1098,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%MAKE-RATIO" "%MAP" "%MAP-TO-SIMPLE-VECTOR-ARITY-1" "%MAP-TO-LIST-ARITY-1" "%MAP-TO-NIL-ON-SEQUENCE" "%MAP-TO-NIL-ON-SIMPLE-VECTOR" "%MAP-TO-NIL-ON-VECTOR" - "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH" "%RAW-BITS" + "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH" + "%RAW-BITS" "%VECTOR-RAW-BITS" "%RAW-REF-COMPLEX-DOUBLE" "%RAW-REF-COMPLEX-LONG" "%RAW-REF-COMPLEX-SINGLE" "%RAW-REF-DOUBLE" "%RAW-REF-LONG" "%RAW-REF-SINGLE" @@ -1106,7 +1107,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%RAW-SET-COMPLEX-SINGLE" "%RAW-SET-DOUBLE" "%RAW-SET-LONG" "%RAW-SET-SINGLE" "%SCALB" "%SCALBN" "%SET-ARRAY-DIMENSION" "%SET-FUNCALLABLE-INSTANCE-FUN" - "%SET-FUNCALLABLE-INSTANCE-INFO" "%SET-RAW-BITS" + "%SET-FUNCALLABLE-INSTANCE-INFO" + "%SET-RAW-BITS" "%SET-VECTOR-RAW-BITS" "%SET-SAP-REF-16" "%SET-SAP-REF-32" "%SET-SAP-REF-64" "%SET-SAP-REF-WORD" "%SET-SAP-REF-8" "%SET-SAP-REF-DOUBLE" "%SET-SAP-REF-LONG" "%SET-SAP-REF-SAP" @@ -1144,7 +1146,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "ASH-INDEX" "ASSERT-ERROR" #!+sb-unicode "BASE-CHAR-P" "BASE-STRING-P" - "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX" + "BINDING-STACK-POINTER-SAP" "BIT-INDEX" "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE" "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT" "CALLABLE" "CASE-BODY-ERROR" @@ -1166,8 +1168,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "CONS-TYPE-CDR-TYPE" "CONS-TYPE-P" "CONSED-SEQUENCE" "CONSTANT" "CONSTANT-TYPE" "CONSTANT-TYPE-P" "CONSTANT-TYPE-TYPE" "CONTAINING-INTEGER-TYPE" - "CONTROL-STACK-POINTER-SAP" "COPY-FROM-SYSTEM-AREA" - "COPY-TO-SYSTEM-AREA" "COPY-BYTE-VECTOR-TO-SYSTEM-AREA" + "CONTROL-STACK-POINTER-SAP" "COPY-BYTE-VECTOR-TO-SYSTEM-AREA" "CSUBTYPEP" "CTYPE" "TYPE-HASH-VALUE" "CTYPE-OF" "CTYPE-P" "CTYPEP" "CURRENT-FP" "CURRENT-SP" "CURRENT-DYNAMIC-SPACE-START" "DATA-VECTOR-REF" @@ -1389,7 +1390,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "STACK-REF" "STREAM-DESIGNATOR" "STRING-DESIGNATOR" "STRUCTURE-RAW-SLOT-TYPE-AND-SIZE" "SUB-GC" "SYMBOLS-DESIGNATOR" "%INSTANCE-LENGTH" "%INSTANCE-REF" - "%INSTANCE-SET" "SYSTEM-AREA-CLEAR" "SYSTEM-AREA-COPY" + "%INSTANCE-SET" "SYSTEM-AREA-CLEAR" "TWO-ARG-*" "TWO-ARG-+" "TWO-ARG--" "TWO-ARG-/" "TWO-ARG-/=" "TWO-ARG-<" "TWO-ARG-<=" "TWO-ARG-=" "TWO-ARG->" "TWO-ARG->=" "TWO-ARG-AND" "TWO-ARG-EQV" @@ -1417,6 +1418,31 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "VECTOR-OF-CHECKED-LENGTH-GIVEN-LENGTH" "WITH-ARRAY-DATA" "WRONG-NUMBER-OF-INDICES-ERROR" + ;; bit bash fillers (FIXME: 32/64-bit issues) + "UB1-BASH-FILL" "SYSTEM-AREA-UB1-FILL" + "UB2-BASH-FILL" "SYSTEM-AREA-UB2-FILL" + "UB4-BASH-FILL" "SYSTEM-AREA-UB4-FILL" + "UB8-BASH-FILL" "SYSTEM-AREA-UB8-FILL" + "UB16-BASH-FILL" "SYSTEM-AREA-UB16-FILL" + "UB32-BASH-FILL" "SYSTEM-AREA-UB32-FILL" + "UB64-BASH-FILL" "SYSTEM-AREA-UB64-FILL" + + ;; bit bash copiers (FIXME: 32/64-bit issues) + "UB1-BASH-COPY" "SYSTEM-AREA-UB1-COPY" + "COPY-UB1-TO-SYSTEM-AREA" "COPY-UB1-FROM-SYSTEM-AREA" + "UB2-BASH-COPY" "SYSTEM-AREA-UB2-COPY" + "COPY-UB2-TO-SYSTEM-AREA" "COPY-UB2-FROM-SYSTEM-AREA" + "UB4-BASH-COPY" "SYSTEM-AREA-UB4-COPY" + "COPY-UB4-TO-SYSTEM-AREA" "COPY-UB4-FROM-SYSTEM-AREA" + "UB8-BASH-COPY" "SYSTEM-AREA-UB8-COPY" + "COPY-UB8-TO-SYSTEM-AREA" "COPY-UB8-FROM-SYSTEM-AREA" + "UB16-BASH-COPY" "SYSTEM-AREA-UB16-COPY" + "COPY-UB16-TO-SYSTEM-AREA" "COPY-UB16-FROM-SYSTEM-AREA" + "UB32-BASH-COPY" "SYSTEM-AREA-UB32-COPY" + "COPY-UB32-TO-SYSTEM-AREA" "COPY-UB32-FROM-SYSTEM-AREA" + "UB64-BASH-COPY" "SYSTEM-AREA-UB64-COPY" + "COPY-UB64-TO-SYSTEM-AREA" "COPY-UB64-FROM-SYSTEM-AREA" + "FDEFN" "MAKE-FDEFN" "FDEFN-P" "FDEFN-NAME" "FDEFN-FUN" "FDEFN-MAKUNBOUND" "OUTER-FDEFN" "%COERCE-CALLABLE-TO-FUN" "FUN-SUBTYPE" diff --git a/src/code/alpha-vm.lisp b/src/code/alpha-vm.lisp index fff421e..3b421b8 100644 --- a/src/code/alpha-vm.lisp +++ b/src/code/alpha-vm.lisp @@ -157,9 +157,7 @@ (vector (make-array length :element-type '(unsigned-byte 8)))) (declare (type (unsigned-byte 8) length) (type (simple-array (unsigned-byte 8) (*)) vector)) - (copy-from-system-area pc (* n-byte-bits 5) - vector (* n-word-bits vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 5 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (collect ((sc-offsets)) diff --git a/src/code/bit-bash.lisp b/src/code/bit-bash.lisp index 0ca7ed6..c2fb216 100644 --- a/src/code/bit-bash.lisp +++ b/src/code/bit-bash.lisp @@ -11,29 +11,10 @@ (in-package "SB!VM") -;;;; constants and types +;;;; types -;;; the number of bits to process at a time -(defconstant unit-bits n-word-bits) +(deftype bit-offset () '(integer 0 (#.sb!vm:n-word-bits))) -;;; the maximum number of bits that can be dealt with in a single call -(defconstant max-bits (ash sb!xc:most-positive-fixnum -2)) - -(deftype unit () - `(unsigned-byte ,unit-bits)) - -(deftype offset () - `(integer 0 ,max-bits)) - -(deftype bit-offset () - `(integer 0 (,unit-bits))) - -(deftype bit-count () - `(integer 1 (,unit-bits))) - -(deftype word-offset () - `(integer 0 (,(ceiling max-bits unit-bits)))) - ;;;; support routines ;;; A particular implementation must offer either VOPs to translate @@ -59,14 +40,14 @@ ;;; machines this is a left-shift and on little-endian machines this ;;; is a right-shift. (defun shift-towards-start (number countoid) - (declare (type unit number) (fixnum countoid)) - (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) countoid))) + (declare (type sb!vm:word number) (fixnum countoid)) + (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) countoid))) (declare (type bit-offset count)) (if (zerop count) number (ecase sb!c:*backend-byte-order* (:big-endian - (ash (ldb (byte (- unit-bits count) 0) number) count)) + (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count)) (:little-endian (ash number (- count))))))) @@ -74,8 +55,8 @@ ;;; removing bits from the "end". On big-endian machines this is a ;;; right-shift and on little-endian machines this is a left-shift. (defun shift-towards-end (number count) - (declare (type unit number) (fixnum count)) - (let ((count (ldb (byte (1- (integer-length unit-bits)) 0) count))) + (declare (type sb!vm:word number) (fixnum count)) + (let ((count (ldb (byte (1- (integer-length sb!vm:n-word-bits)) 0) count))) (declare (type bit-offset count)) (if (zerop count) number @@ -83,9 +64,9 @@ (:big-endian (ash number (- count))) (:little-endian - (ash (ldb (byte (- unit-bits count) 0) number) count)))))) + (ash (ldb (byte (- sb!vm:n-word-bits count) 0) number) count)))))) -#!-sb-fluid (declaim (inline start-mask end-mask fix-sap-and-offset)) +#!-sb-fluid (declaim (inline start-mask end-mask)) ;;; Produce a mask that contains 1's for the COUNT "start" bits and ;;; 0's for the remaining "end" bits. Only the lower 5 bits of COUNT @@ -93,7 +74,7 @@ ;;; on 32-bit word size -- WHN 2001-03-19). (defun start-mask (count) (declare (fixnum count)) - (shift-towards-start (1- (ash 1 unit-bits)) (- count))) + (shift-towards-start (1- (ash 1 sb!vm:n-word-bits)) (- count))) ;;; Produce a mask that contains 1's for the COUNT "end" bits and 0's ;;; for the remaining "start" bits. Only the lower 5 bits of COUNT are @@ -101,19 +82,7 @@ ;;; 32-bit word size -- WHN 2001-03-19). (defun end-mask (count) (declare (fixnum count)) - (shift-towards-end (1- (ash 1 unit-bits)) (- count))) - -;;; Align the SAP to a word boundary, and update the offset accordingly. -(defun fix-sap-and-offset (sap offset) - (declare (type system-area-pointer sap) - (type index offset) - (values system-area-pointer index)) - (let ((address (sap-int sap))) - (values (int-sap #!-alpha (word-logical-andc2 address - sb!vm::fixnum-tag-mask) - #!+alpha (ash (ash address -2) 2)) - (+ (* (logand address sb!vm::fixnum-tag-mask) n-byte-bits) - offset)))) + (shift-towards-end (1- (ash 1 sb!vm:n-word-bits)) (- count))) #!-sb-fluid (declaim (inline word-sap-ref %set-word-sap-ref)) (defun word-sap-ref (sap offset) @@ -121,383 +90,449 @@ (type index offset) (values sb!vm:word) (optimize (speed 3) (safety 0) #-sb-xc-host (inhibit-warnings 3))) - (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits)))) + (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits)))) (defun %set-word-sap-ref (sap offset value) (declare (type system-area-pointer sap) (type index offset) (type sb!vm:word value) (values sb!vm:word) (optimize (speed 3) (safety 0) (inhibit-warnings 3))) - (setf (sap-ref-word sap (the index (ash offset sb!vm::n-fixnum-tag-bits))) + (setf (sap-ref-word sap (the index (ash offset sb!vm:n-fixnum-tag-bits))) value)) - -;;;; CONSTANT-BIT-BASH -;;; Fill DST with VALUE starting at DST-OFFSET and continuing for -;;; LENGTH bits. -#!-sb-fluid (declaim (inline constant-bit-bash)) -(defun constant-bit-bash (dst dst-offset length value dst-ref-fn dst-set-fn) - (declare (type offset dst-offset) (type unit value) - (type function dst-ref-fn dst-set-fn)) - (multiple-value-bind (dst-word-offset dst-bit-offset) - (floor dst-offset unit-bits) - (declare (type word-offset dst-word-offset) - (type bit-offset dst-bit-offset)) - (multiple-value-bind (words final-bits) - (floor (+ dst-bit-offset length) unit-bits) - (declare (type word-offset words) (type bit-offset final-bits)) - (if (zerop words) - (unless (zerop length) - (funcall dst-set-fn dst dst-word-offset - (if (= length unit-bits) - value - (let ((mask (shift-towards-end (start-mask length) - dst-bit-offset))) - (declare (type unit mask)) - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 - (funcall dst-ref-fn dst dst-word-offset) - mask)))))) - (let ((interior (floor (- length final-bits) unit-bits))) - (unless (zerop dst-bit-offset) - (let ((mask (end-mask (- dst-bit-offset)))) - (declare (type unit mask)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 - (funcall dst-ref-fn dst dst-word-offset) - mask)))) - (incf dst-word-offset)) - (dotimes (i interior) - (funcall dst-set-fn dst dst-word-offset value) - (incf dst-word-offset)) - (unless (zerop final-bits) - (let ((mask (start-mask final-bits))) - (declare (type unit mask)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 - (funcall dst-ref-fn dst dst-word-offset) - mask))))))))) - (values)) -;;;; UNARY-BIT-BASH +;;; the actual bashers and common uses of same -#!-sb-fluid (declaim (inline unary-bit-bash)) -(defun unary-bit-bash (src src-offset dst dst-offset length - dst-ref-fn dst-set-fn src-ref-fn) - ;; FIXME: Declaring these bit indices to be of type OFFSET, then - ;; using the inline expansion in SPEED 3 SAFETY 0 functions, is not - ;; a good thing. At the very least, we should make sure that the - ;; type (overflow) checks get done. Better would be to avoid - ;; using bit indices, and to use 32-bit unsigneds instead, and/or - ;; to call out to things like memmove(3) for big moves. - (declare (type offset src-offset dst-offset length) - (type function dst-ref-fn dst-set-fn src-ref-fn)) - (multiple-value-bind (dst-word-offset dst-bit-offset) - (floor dst-offset unit-bits) - (declare (type word-offset dst-word-offset) - (type bit-offset dst-bit-offset)) - (multiple-value-bind (src-word-offset src-bit-offset) - (floor src-offset unit-bits) - (declare (type word-offset src-word-offset) - (type bit-offset src-bit-offset)) - (cond - ((<= (+ dst-bit-offset length) unit-bits) - ;; We are only writing one word, so it doesn't matter what - ;; order we do it in. But we might be reading from multiple - ;; words, so take care. - (cond - ((zerop length) - ;; Actually, we aren't even writing one word. This is really easy. - ) - ((= length unit-bits) - ;; DST-BIT-OFFSET must be equal to zero, or we would be - ;; writing multiple words. If SRC-BIT-OFFSET is also zero, - ;; then we just transfer the single word. Otherwise we have - ;; to extract bits from two src words. - (funcall dst-set-fn dst dst-word-offset - (if (zerop src-bit-offset) - (funcall src-ref-fn src src-word-offset) - (word-logical-or - (shift-towards-start - (funcall src-ref-fn src src-word-offset) - src-bit-offset) - (shift-towards-end - (funcall src-ref-fn src (1+ src-word-offset)) - (- src-bit-offset)))))) - (t - ;; We are only writing some portion of the dst word, so we - ;; need to preserve the extra bits. Also, we still don't - ;; know whether we need one or two source words. - (let ((mask (shift-towards-end (start-mask length) dst-bit-offset)) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value - (if (> src-bit-offset dst-bit-offset) - ;; The source starts further into the word than - ;; does the dst, so the source could extend into - ;; the next word. If it does, we have to merge - ;; the two words, and if not, we can just shift - ;; the first word. - (let ((src-bit-shift (- src-bit-offset dst-bit-offset))) - (if (> (+ src-bit-offset length) unit-bits) - (word-logical-or - (shift-towards-start - (funcall src-ref-fn src src-word-offset) - src-bit-shift) - (shift-towards-end - (funcall src-ref-fn src (1+ src-word-offset)) - (- src-bit-shift))) - (shift-towards-start - (funcall src-ref-fn src src-word-offset) - src-bit-shift))) - ;; The dst starts further into the word than does - ;; the source, so we know the source can not - ;; extend into a second word (or else the dst - ;; would too, and we wouldn't be in this branch. - (shift-towards-end - (funcall src-ref-fn src src-word-offset) - (- dst-bit-offset src-bit-offset))))) - (declare (type unit mask orig value)) - ;; Replace the dst word. - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask))))))) - ((= src-bit-offset dst-bit-offset) - ;; The source and dst are aligned, so we don't need to shift - ;; anything. But we have to pick the direction of the loop in - ;; case the source and dst are really the same thing. - (multiple-value-bind (words final-bits) - (floor (+ dst-bit-offset length) unit-bits) - (declare (type word-offset words) (type bit-offset final-bits)) - (let ((interior (floor (- length final-bits) unit-bits))) - (declare (type word-offset interior)) - (cond - ((<= dst-offset src-offset) - ;; We need to loop from left to right - (unless (zerop dst-bit-offset) - ;; We are only writing part of the first word, so mask - ;; off the bits we want to preserve. - (let ((mask (end-mask (- dst-bit-offset))) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (funcall src-ref-fn src src-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or (word-logical-and value mask) - (word-logical-andc2 orig mask)))) - (incf src-word-offset) - (incf dst-word-offset)) - ;; Just copy the interior words. - (dotimes (i interior) - (funcall dst-set-fn dst dst-word-offset - (funcall src-ref-fn src src-word-offset)) - (incf src-word-offset) - (incf dst-word-offset)) - (unless (zerop final-bits) - ;; We are only writing part of the last word. - (let ((mask (start-mask final-bits)) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (funcall src-ref-fn src src-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask)))))) - (t - ;; We need to loop from right to left. - (incf dst-word-offset words) - (incf src-word-offset words) - (unless (zerop final-bits) - (let ((mask (start-mask final-bits)) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (funcall src-ref-fn src src-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask))))) - (dotimes (i interior) - (decf src-word-offset) - (decf dst-word-offset) - (funcall dst-set-fn dst dst-word-offset - (funcall src-ref-fn src src-word-offset))) - (unless (zerop dst-bit-offset) - (decf src-word-offset) - (decf dst-word-offset) - (let ((mask (end-mask (- dst-bit-offset))) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (funcall src-ref-fn src src-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask)))))))))) - (t - ;; They aren't aligned. - (multiple-value-bind (words final-bits) - (floor (+ dst-bit-offset length) unit-bits) - (declare (type word-offset words) (type bit-offset final-bits)) - (let ((src-shift (mod (- src-bit-offset dst-bit-offset) unit-bits)) - (interior (floor (- length final-bits) unit-bits))) - (declare (type bit-offset src-shift) - (type word-offset interior)) - (cond - ((<= dst-offset src-offset) - ;; We need to loop from left to right - (let ((prev 0) - (next (funcall src-ref-fn src src-word-offset))) - (declare (type unit prev next)) - (flet ((get-next-src () - (setf prev next) - (setf next (funcall src-ref-fn src - (incf src-word-offset))))) - (declare (inline get-next-src)) - (unless (zerop dst-bit-offset) - (when (> src-bit-offset dst-bit-offset) - (get-next-src)) - (let ((mask (end-mask (- dst-bit-offset))) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (word-logical-or - (shift-towards-start prev src-shift) - (shift-towards-end next (- src-shift))))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask))) - (incf dst-word-offset))) - (dotimes (i interior) - (get-next-src) - (let ((value (word-logical-or - (shift-towards-end next (- src-shift)) - (shift-towards-start prev src-shift)))) - (declare (type unit value)) - (funcall dst-set-fn dst dst-word-offset value) - (incf dst-word-offset))) - (unless (zerop final-bits) - (let ((value - (if (> (+ final-bits src-shift) unit-bits) - (progn - (get-next-src) - (word-logical-or - (shift-towards-end next (- src-shift)) - (shift-towards-start prev src-shift))) - (shift-towards-start next src-shift))) - (mask (start-mask final-bits)) - (orig (funcall dst-ref-fn dst dst-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask)))))))) - (t - ;; We need to loop from right to left. - (incf dst-word-offset words) - (incf src-word-offset - (1- (ceiling (+ src-bit-offset length) unit-bits))) - (let ((next 0) - (prev (funcall src-ref-fn src src-word-offset))) - (declare (type unit prev next)) - (flet ((get-next-src () - (setf next prev) - (setf prev (funcall src-ref-fn src - (decf src-word-offset))))) - (declare (inline get-next-src)) - (unless (zerop final-bits) - (when (> final-bits (- unit-bits src-shift)) - (get-next-src)) - (let ((value (word-logical-or - (shift-towards-end next (- src-shift)) - (shift-towards-start prev src-shift))) - (mask (start-mask final-bits)) - (orig (funcall dst-ref-fn dst dst-word-offset))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask))))) - (decf dst-word-offset) - (dotimes (i interior) - (get-next-src) - (let ((value (word-logical-or - (shift-towards-end next (- src-shift)) - (shift-towards-start prev src-shift)))) - (declare (type unit value)) - (funcall dst-set-fn dst dst-word-offset value) - (decf dst-word-offset))) - (unless (zerop dst-bit-offset) - (if (> src-bit-offset dst-bit-offset) - (get-next-src) - (setf next prev prev 0)) - (let ((mask (end-mask (- dst-bit-offset))) - (orig (funcall dst-ref-fn dst dst-word-offset)) - (value (word-logical-or - (shift-towards-start prev src-shift) - (shift-towards-end next (- src-shift))))) - (declare (type unit mask orig value)) - (funcall dst-set-fn dst dst-word-offset - (word-logical-or - (word-logical-and value mask) - (word-logical-andc2 orig mask))))))))))))))) - (values)) - -;;;; the actual bashers +;;; This is a little ugly. Fixing bug 188 would bring the ability to +;;; wrap a MACROLET or something similar around this whole thing would +;;; make things significantly less ugly. --njf, 2005-02-23 +(eval-when (:compile-toplevel :load-toplevel :execute) -(defun bit-bash-fill (value dst dst-offset length) - (declare (type unit value) (type offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (constant-bit-bash dst dst-offset length value - #'%raw-bits #'%set-raw-bits))) +;;; Align the SAP to a word boundary, and update the offset accordingly. +(defmacro !define-sap-fixer (bitsize) + (let ((name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize)))) + `(progn + (declaim (inline ,name)) + (defun ,name (sap offset) + (declare (type system-area-pointer sap) + (type index offset) + (values system-area-pointer index)) + (let ((address (sap-int sap))) + (values (int-sap #!-alpha (word-logical-andc2 address + sb!vm:fixnum-tag-mask) + #!+alpha (ash (ash address -2) 2)) + (+ ,(ecase bitsize + (1 '(* (logand address sb!vm:fixnum-tag-mask) n-byte-bits)) + (2 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 2))) + (4 '(* (logand address sb!vm:fixnum-tag-mask) (/ n-byte-bits 4))) + ((8 16 32 64) '(logand address sb!vm:fixnum-tag-mask))) + offset))))))) + +(defmacro !define-byte-bashers (bitsize) + (let* ((bytes-per-word (/ n-word-bits bitsize)) + (byte-offset `(integer 0 (,bytes-per-word))) + (byte-count `(integer 1 (,bytes-per-word))) + (max-bytes (ash most-positive-fixnum + ;; FIXME: this reflects code contained in the + ;; original bit-bash.lisp, but seems very + ;; nonsensical. Why shouldn't we be able to + ;; handle M-P-FIXNUM bits? And if we can't, + ;; are these other shift amounts bogus, too? + (ecase bitsize + (1 -2) + (2 -1) + (4 0) + (8 0) + (16 0) + (32 0)))) + (offset `(integer 0 ,max-bytes)) + (max-word-offset (ceiling max-bytes bytes-per-word)) + (word-offset `(integer 0 ,max-word-offset)) + (fix-sap-and-offset-name (intern (format nil "FIX-SAP-AND-OFFSET-UB~A" bitsize))) + (constant-bash-name (intern (format nil "CONSTANT-UB~A-BASH" bitsize) (find-package "SB!KERNEL"))) + (array-fill-name (intern (format nil "UB~A-BASH-FILL" bitsize) (find-package "SB!KERNEL"))) + (system-area-fill-name (intern (format nil "SYSTEM-AREA-UB~A-FILL" bitsize) (find-package "SB!KERNEL"))) + (unary-bash-name (intern (format nil "UNARY-UB~A-BASH" bitsize) (find-package "SB!KERNEL"))) + (array-copy-name (intern (format nil "UB~A-BASH-COPY" bitsize) (find-package "SB!KERNEL"))) + (system-area-copy-name (intern (format nil "SYSTEM-AREA-UB~A-COPY" bitsize) (find-package "SB!KERNEL"))) + (array-copy-to-system-area-name + (intern (format nil "COPY-UB~A-TO-SYSTEM-AREA" bitsize) (find-package "SB!KERNEL"))) + (system-area-copy-to-array-name + (intern (format nil "COPY-UB~A-FROM-SYSTEM-AREA" bitsize) + (find-package "SB!KERNEL")))) + `(progn + (declaim (inline ,constant-bash-name ,unary-bash-name)) + ;; Fill DST with VALUE starting at DST-OFFSET and continuing + ;; for LENGTH bytes (however bytes are defined). + (defun ,constant-bash-name (dst dst-offset length value + dst-ref-fn dst-set-fn) + (declare (type word value) (type index dst-offset length)) + (declare (ignorable dst-ref-fn)) + (multiple-value-bind (dst-word-offset dst-byte-offset) + (floor dst-offset ,bytes-per-word) + (declare (type ,word-offset dst-word-offset) + (type ,byte-offset dst-byte-offset)) + (multiple-value-bind (n-words final-bytes) + (floor (+ dst-byte-offset length) ,bytes-per-word) + (declare (type ,word-offset n-words) + (type ,byte-offset final-bytes)) + (if (zerop n-words) + ,(unless (= bytes-per-word 1) + `(unless (zerop length) + (locally (declare (type ,byte-count length)) + (funcall dst-set-fn dst dst-word-offset + (if (= length ,bytes-per-word) + value + (let ((mask (shift-towards-end + (start-mask (* length ,bitsize)) + (* dst-byte-offset ,bitsize)))) + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset) + mask)))))))) + (let ((interior (floor (- length final-bytes) ,bytes-per-word))) + ,@(unless (= bytes-per-word 1) + `((unless (zerop dst-byte-offset) + (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset) + mask)))) + (incf dst-word-offset)))) + (dotimes (i interior) + (funcall dst-set-fn dst dst-word-offset value) + (incf dst-word-offset)) + ,@(unless (= bytes-per-word 1) + `((unless (zerop final-bytes) + (let ((mask (start-mask (* final-bytes ,bitsize)))) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 (funcall dst-ref-fn dst dst-word-offset) + mask))))))))))) + (values)) -(defun system-area-fill (value dst dst-offset length) - (declare (type unit value) (type offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset) - (constant-bit-bash dst dst-offset length value - #'word-sap-ref #'%set-word-sap-ref)))) + ;; common uses for constant-byte-bashing + (defun ,array-fill-name (value dst dst-offset length) + (declare (type word value) (type ,offset dst-offset length)) + (declare (optimize (speed 3) (safety 1))) + (,constant-bash-name dst dst-offset length value + #'%vector-raw-bits #'%set-vector-raw-bits)) + (defun ,system-area-fill-name (value dst dst-offset length) + (declare (type word value) (type ,offset dst-offset length)) + (declare (optimize (speed 3) (safety 1))) + (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset) + (,constant-bash-name dst dst-offset length value + #'word-sap-ref #'%set-word-sap-ref))) -(defun bit-bash-copy (src src-offset dst dst-offset length) - (declare (type offset src-offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0)) - (inline unary-bit-bash)) - (unary-bit-bash src src-offset dst dst-offset length - #'%raw-bits #'%set-raw-bits #'%raw-bits))) + ;; unary byte bashing (copying) + (defun ,unary-bash-name (src src-offset dst dst-offset length + dst-ref-fn dst-set-fn src-ref-fn) + (declare (type index src-offset dst-offset length) + (type function dst-ref-fn dst-set-fn src-ref-fn) + (ignorable dst-ref-fn)) + (multiple-value-bind (dst-word-offset dst-byte-offset) + (floor dst-offset ,bytes-per-word) + (declare (type ,word-offset dst-word-offset) + (type ,byte-offset dst-byte-offset)) + (multiple-value-bind (src-word-offset src-byte-offset) + (floor src-offset ,bytes-per-word) + (declare (type ,word-offset src-word-offset) + (type ,byte-offset src-byte-offset)) + (cond + ((<= (+ dst-byte-offset length) ,bytes-per-word) + ;; We are only writing one word, so it doesn't matter what + ;; order we do it in. But we might be reading from + ;; multiple words, so take care. + (cond + ((zerop length) + ;; We're not writing anything. This is really easy. + ) + ((= length ,bytes-per-word) + ;; DST-BYTE-OFFSET must be equal to zero, or we would be + ;; writing multiple words. If SRC-BYTE-OFFSET is also zero, + ;; the we just transfer the single word. Otherwise we have + ;; to extract bytes from two source words. + (funcall dst-set-fn dst dst-word-offset + (cond + ((zerop src-byte-offset) + (funcall src-ref-fn src src-word-offset)) + ,@(unless (= bytes-per-word 1) + `((t (word-logical-or (shift-towards-start + (funcall src-ref-fn src src-word-offset) + (* src-byte-offset ,bitsize)) + (shift-towards-end + (funcall src-ref-fn src (1+ src-word-offset)) + (* (- src-byte-offset) ,bitsize))))))))) + ,@(unless (= bytes-per-word 1) + `((t + ;; We are only writing some portion of the destination word. + ;; We still don't know whether we need one or two source words. + (locally (declare (type ,byte-count length)) + (let ((mask (shift-towards-end (start-mask (* length ,bitsize)) + (* dst-byte-offset ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (if (> src-byte-offset dst-byte-offset) + ;; The source starts further + ;; into the word than does the + ;; destination, so the source + ;; could extend into the next + ;; word. If it does, we have + ;; to merge the two words, and + ;; it not, we can just shift + ;; the first word. + (let ((src-byte-shift (- src-byte-offset + dst-byte-offset))) + (if (> (+ src-byte-offset length) ,bytes-per-word) + (word-logical-or + (shift-towards-start + (funcall src-ref-fn src src-word-offset) + (* src-byte-shift ,bitsize)) + (shift-towards-end + (funcall src-ref-fn src (1+ src-word-offset)) + (* (- src-byte-shift) ,bitsize))) + (shift-towards-start (funcall src-ref-fn src src-word-offset) + (* src-byte-shift ,bitsize)))) + ;; The destination starts further + ;; into the word than does the + ;; source, so we know the source + ;; cannot extend into a second + ;; word (or else the destination + ;; would too, and we wouldn't be + ;; in this branch). + (shift-towards-end + (funcall src-ref-fn src src-word-offset) + (* (- dst-byte-offset src-byte-offset) ,bitsize))))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))))))))) + ((= src-byte-offset dst-byte-offset) + ;; The source and destination are aligned, so shifting + ;; is unnecessary. But we have to pick the direction + ;; of the copy in case the source and destination are + ;; really the same object. + (multiple-value-bind (words final-bytes) + (floor (+ dst-byte-offset length) ,bytes-per-word) + (declare (type ,word-offset words) + (type ,byte-offset final-bytes)) + (let ((interior (floor (- length final-bytes) ,bytes-per-word))) + (declare (type ,word-offset interior)) + (cond + ((<= dst-offset src-offset) + ;; We need to loop from left to right. + ,@(unless (= bytes-per-word 1) + `((unless (zerop dst-byte-offset) + ;; We are only writing part of the first word, so mask + ;; off the bytes we want to preserve. + (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (funcall src-ref-fn src src-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))) + (incf src-word-offset) + (incf dst-word-offset)))) + ;; Copy the interior words. + (dotimes (i interior) + (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset)) + (incf src-word-offset) + (incf dst-word-offset)) + ,@(unless (= bytes-per-word 1) + `((unless (zerop final-bytes) + ;; We are only writing part of the last word. + (let ((mask (start-mask (* final-bytes ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (funcall src-ref-fn src src-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))))))) + (t + ;; We need to loop from right to left. + (incf dst-word-offset words) + (incf src-word-offset words) + ,@(unless (= bytes-per-word 1) + `((unless (zerop final-bytes) + (let ((mask (start-mask (* final-bytes ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (funcall src-ref-fn src src-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask))))))) + (dotimes (i interior) + (decf src-word-offset) + (decf dst-word-offset) + (funcall dst-set-fn dst dst-word-offset (funcall src-ref-fn src src-word-offset))) + ,@(unless (= bytes-per-word 1) + `((unless (zerop dst-byte-offset) + ;; We are only writing part of the last word. + (decf src-word-offset) + (decf dst-word-offset) + (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (funcall src-ref-fn src src-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))))))))))) + (t + ;; Source and destination are not aligned. + (multiple-value-bind (words final-bytes) + (floor (+ dst-byte-offset length) ,bytes-per-word) + (declare (type ,word-offset words) + (type ,byte-offset final-bytes)) + (let ((src-shift (mod (- src-byte-offset dst-byte-offset) + ,bytes-per-word)) + (interior (floor (- length final-bytes) ,bytes-per-word))) + (declare (type ,word-offset interior) + (type ,byte-offset src-shift)) + (cond + ((<= dst-offset src-offset) + ;; We need to loop from left to right. + (let ((prev 0) + (next (funcall src-ref-fn src src-word-offset))) + (declare (type word prev next)) + (flet ((get-next-src () + (setf prev next) + (setf next (funcall src-ref-fn src (incf src-word-offset))))) + (declare (inline get-next-src)) + ,@(unless (= bytes-per-word 1) + `((unless (zerop dst-byte-offset) + (when (> src-byte-offset dst-byte-offset) + (get-next-src)) + (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (word-logical-or (shift-towards-start prev (* src-shift ,bitsize)) + (shift-towards-end next (* (- src-shift) ,bitsize))))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))) + (incf dst-word-offset)))) + (dotimes (i interior) + (get-next-src) + (let ((value (word-logical-or + (shift-towards-end next (* (- src-shift) ,bitsize)) + (shift-towards-start prev (* src-shift ,bitsize))))) + (declare (type word value)) + (funcall dst-set-fn dst dst-word-offset value) + (incf dst-word-offset))) + ,@(unless (= bytes-per-word 1) + `((unless (zerop final-bytes) + (let ((value + (if (> (+ final-bytes src-shift) ,bytes-per-word) + (progn + (get-next-src) + (word-logical-or + (shift-towards-end next (* (- src-shift) ,bitsize)) + (shift-towards-start prev (* src-shift ,bitsize)))) + (shift-towards-start next (* src-shift ,bitsize)))) + (mask (start-mask (* final-bytes ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask)))))))))) + (t + ;; We need to loop from right to left. + (incf dst-word-offset words) + (incf src-word-offset + (1- (ceiling (+ src-byte-offset length) ,bytes-per-word))) + (let ((next 0) + (prev (funcall src-ref-fn src src-word-offset))) + (declare (type word prev next)) + (flet ((get-next-src () + (setf next prev) + (setf prev (funcall src-ref-fn src (decf src-word-offset))))) + (declare (inline get-next-src)) + ,@(unless (= bytes-per-word 1) + `((unless (zerop final-bytes) + (when (> final-bytes (- ,bytes-per-word src-shift)) + (get-next-src)) + (let ((value (word-logical-or + (shift-towards-end next (* (- src-shift) ,bitsize)) + (shift-towards-start prev (* src-shift ,bitsize)))) + (mask (start-mask (* final-bytes ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask))))))) + (decf dst-word-offset) + (dotimes (i interior) + (get-next-src) + (let ((value (word-logical-or + (shift-towards-end next (* (- src-shift) ,bitsize)) + (shift-towards-start prev (* src-shift ,bitsize))))) + (declare (type word value)) + (funcall dst-set-fn dst dst-word-offset value) + (decf dst-word-offset))) + ,@(unless (= bytes-per-word 1) + `((unless (zerop dst-byte-offset) + (if (> src-byte-offset dst-byte-offset) + (get-next-src) + (setf next prev prev 0)) + (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))) + (orig (funcall dst-ref-fn dst dst-word-offset)) + (value (word-logical-or + (shift-towards-start prev (* src-shift ,bitsize)) + (shift-towards-end next (* (- src-shift) ,bitsize))))) + (declare (type word mask orig value)) + (funcall dst-set-fn dst dst-word-offset + (word-logical-or (word-logical-and value mask) + (word-logical-andc2 orig mask))))))))))))))))) + (values)) -(defun system-area-copy (src src-offset dst dst-offset length) - (declare (type offset src-offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset) - (declare (type system-area-pointer src)) - (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset) - (declare (type system-area-pointer dst)) - (unary-bit-bash src src-offset dst dst-offset length - #'word-sap-ref #'%set-word-sap-ref - #'word-sap-ref))))) + ;; common uses for unary-byte-bashing + (defun ,array-copy-name (src src-offset dst dst-offset length) + (declare (type ,offset src-offset dst-offset length)) + (locally (declare (optimize (speed 3) (safety 1))) + (,unary-bash-name src src-offset dst dst-offset length + #'%vector-raw-bits + #'%set-vector-raw-bits + #'%vector-raw-bits))) -(defun copy-to-system-area (src src-offset dst dst-offset length) - (declare (type offset src-offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (multiple-value-bind (dst dst-offset) (fix-sap-and-offset dst dst-offset) - (unary-bit-bash src src-offset dst dst-offset length - #'word-sap-ref #'%set-word-sap-ref #'%raw-bits)))) + (defun ,system-area-copy-name (src src-offset dst dst-offset length) + (declare (type ,offset src-offset dst-offset length)) + (locally (declare (optimize (speed 3) (safety 1))) + (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset) + (declare (type sb!sys:system-area-pointer src)) + (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset) + (declare (type sb!sys:system-area-pointer dst)) + (,unary-bash-name src src-offset dst dst-offset length + #'word-sap-ref #'%set-word-sap-ref + #'word-sap-ref))))) -(defun copy-from-system-area (src src-offset dst dst-offset length) - (declare (type offset src-offset dst-offset length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (multiple-value-bind (src src-offset) (fix-sap-and-offset src src-offset) - (unary-bit-bash src src-offset dst dst-offset length - #'%raw-bits #'%set-raw-bits #'word-sap-ref)))) + (defun ,array-copy-to-system-area-name (src src-offset dst dst-offset length) + (declare (type ,offset src-offset dst-offset length)) + (locally (declare (optimize (speed 3) (safety 1))) + (multiple-value-bind (dst dst-offset) (,fix-sap-and-offset-name dst dst-offset) + (,unary-bash-name src src-offset dst dst-offset length + #'word-sap-ref #'%set-word-sap-ref + #'%vector-raw-bits)))) + (defun ,system-area-copy-to-array-name (src src-offset dst dst-offset length) + (declare (type ,offset src-offset dst-offset length)) + (locally (declare (optimize (speed 3) (safety 1))) + (multiple-value-bind (src src-offset) (,fix-sap-and-offset-name src src-offset) + (,unary-bash-name src src-offset dst dst-offset length + #'%vector-raw-bits + #'%set-vector-raw-bits + #'word-sap-ref))))))) +) ; EVAL-WHEN + +;;; We would normally do this with a MACROLET, but then we run into +;;; problems with the lexical environment being too hairy for the +;;; cross-compiler and it cannot inline the basic basher functions. +#.(loop for i = 1 then (* i 2) + collect `(!define-sap-fixer ,i) into fixers + collect `(!define-byte-bashers ,i) into bashers + until (= i sb!vm:n-word-bits) + ;; FIXERS must come first so their inline expansions are available + ;; for the bashers. + finally (return `(progn ,@fixers ,@bashers))) + ;;; a common idiom for calling COPY-TO-SYSTEM-AREA ;;; ;;; Copy the entire contents of the vector V to memory starting at SAP+OFFSET. @@ -509,14 +544,4 @@ (declare (type (simple-array (unsigned-byte 8) 1) bv)) (declare (type system-area-pointer sap)) (declare (type fixnum offset)) - ;; FIXME: Actually it looks as though this, and most other calls to - ;; COPY-TO-SYSTEM-AREA, could be written more concisely with - ;; %BYTE-BLT. Except that the DST-END-DST-START convention for the - ;; length is confusing. Perhaps I could rename %BYTE-BLT to - ;; %BYTE-BLIT (and correspondingly rename the corresponding VOP) and - ;; replace the DST-END argument with an N-BYTES argument? - (copy-to-system-area bv - (* vector-data-offset n-word-bits) - sap - offset - (* (length bv) n-byte-bits))) + (copy-ub8-to-system-area bv 0 sap offset (length bv))) diff --git a/src/code/debug-int.lisp b/src/code/debug-int.lisp index cf5a506..a961e70 100644 --- a/src/code/debug-int.lisp +++ b/src/code/debug-int.lisp @@ -3286,7 +3286,7 @@ register." (setf (code-header-ref code-object (1+ real-lra-slot)) offset)) (setf (code-header-ref code-object known-return-p-slot) known-return-p) - (system-area-copy src-start 0 dst-start 0 (* length sb!vm:n-byte-bits)) + (system-area-ub8-copy src-start 0 dst-start 0 length) (sb!vm:sanctify-for-execution code-object) #!+(or x86 x86-64) (values dst-start code-object (sap- trap-loc src-start)) diff --git a/src/code/defsetfs.lisp b/src/code/defsetfs.lisp index 002e818..000e8e6 100644 --- a/src/code/defsetfs.lisp +++ b/src/code/defsetfs.lisp @@ -104,6 +104,7 @@ #-sb-xc-host (defsetf sbit %sbitset) (defsetf %array-dimension %set-array-dimension) (defsetf sb!kernel:%raw-bits sb!kernel:%set-raw-bits) +(defsetf sb!kernel:%vector-raw-bits sb!kernel:%set-vector-raw-bits) #-sb-xc-host (defsetf symbol-value set) #-sb-xc-host (defsetf symbol-plist %set-symbol-plist) #-sb-xc-host (defsetf nth %setnth) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 089b759..152acf2 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -408,38 +408,32 @@ ((zerop bytes)) ; easy case ((<= bytes space) (if (system-area-pointer-p thing) - (system-area-copy thing - (* start sb!vm:n-byte-bits) - (fd-stream-obuf-sap fd-stream) - (* tail sb!vm:n-byte-bits) - (* bytes sb!vm:n-byte-bits)) + (system-area-ub8-copy thing start + (fd-stream-obuf-sap fd-stream) + tail + bytes) ;; FIXME: There should be some type checking somewhere to ;; verify that THING here is a vector, not just . - (copy-to-system-area thing - (+ (* start sb!vm:n-byte-bits) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - (fd-stream-obuf-sap fd-stream) - (* tail sb!vm:n-byte-bits) - (* bytes sb!vm:n-byte-bits))) + (copy-ub8-to-system-area thing start + (fd-stream-obuf-sap fd-stream) + tail + bytes)) (setf (fd-stream-obuf-tail fd-stream) newtail)) ((<= bytes len) (flush-output-buffer fd-stream) (if (system-area-pointer-p thing) - (system-area-copy thing - (* start sb!vm:n-byte-bits) - (fd-stream-obuf-sap fd-stream) - 0 - (* bytes sb!vm:n-byte-bits)) + (system-area-ub8-copy thing + start + (fd-stream-obuf-sap fd-stream) + 0 + bytes) ;; FIXME: There should be some type checking somewhere to ;; verify that THING here is a vector, not just . - (copy-to-system-area thing - (+ (* start sb!vm:n-byte-bits) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - (fd-stream-obuf-sap fd-stream) - 0 - (* bytes sb!vm:n-byte-bits))) + (copy-ub8-to-system-area thing + start + (fd-stream-obuf-sap fd-stream) + 0 + bytes)) (setf (fd-stream-obuf-tail fd-stream) bytes)) (t (flush-output-buffer fd-stream) @@ -606,8 +600,8 @@ (setf (fd-stream-ibuf-tail stream) 0)) (t (decf tail head) - (system-area-copy ibuf-sap (* head sb!vm:n-byte-bits) - ibuf-sap 0 (* tail sb!vm:n-byte-bits)) + (system-area-ub8-copy ibuf-sap head + ibuf-sap 0 tail) (setf head 0) (setf (fd-stream-ibuf-head stream) 0) (setf (fd-stream-ibuf-tail stream) tail)))) @@ -880,10 +874,9 @@ (declare (type index start end)) (let* ((length (- end start)) (string (make-string length))) - (copy-from-system-area sap (* start sb!vm:n-byte-bits) - string (* sb!vm:vector-data-offset - sb!vm:n-word-bits) - (* length sb!vm:n-byte-bits)) + (copy-ub8-from-system-area sap start + string 0 + length) string)) ;;; the N-BIN method for FD-STREAMs diff --git a/src/code/host-alieneval.lisp b/src/code/host-alieneval.lisp index e1c2488..9c343d5 100644 --- a/src/code/host-alieneval.lisp +++ b/src/code/host-alieneval.lisp @@ -837,10 +837,10 @@ `(sap+ ,sap (/ ,offset sb!vm:n-byte-bits))) (define-alien-type-method (mem-block :deposit-gen) (type sap offset value) - (let ((bits (alien-mem-block-type-bits type))) - (unless bits + (let ((bytes (truncate (alien-mem-block-type-bits type) sb!vm:n-byte-bits))) + (unless bytes (error "can't deposit aliens of type ~S (unknown size)" type)) - `(sb!kernel:system-area-copy ,value 0 ,sap ,offset ',bits))) + `(sb!kernel:system-area-ub8-copy ,value 0 ,sap ,offset ',bytes))) ;;;; the ARRAY type diff --git a/src/code/hppa-vm.lisp b/src/code/hppa-vm.lisp index 23a161f..62a2b5c 100644 --- a/src/code/hppa-vm.lisp +++ b/src/code/hppa-vm.lisp @@ -88,10 +88,7 @@ (vector (make-array length :element-type '(unsigned-byte 8)))) (declare (type (unsigned-byte 8) length) (type (simple-array (unsigned-byte 8) (*)) vector)) - (copy-from-system-area pc (* n-byte-bits 5) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 5 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (collect ((sc-offsets)) diff --git a/src/code/kernel.lisp b/src/code/kernel.lisp index cfcf76b..1311ee3 100644 --- a/src/code/kernel.lisp +++ b/src/code/kernel.lisp @@ -120,9 +120,18 @@ (defun %set-raw-bits (object offset value) (declare (type index offset)) - (declare (type (unsigned-byte #.sb!vm:n-word-bits) value)) + (declare (type sb!vm:word value)) (setf (sb!kernel:%raw-bits object offset) value)) +(defun %vector-raw-bits (object offset) + (declare (type index offset)) + (sb!kernel:%vector-raw-bits object offset)) + +(defun %set-vector-raw-bits (object offset value) + (declare (type index offset)) + (declare (type sb!vm:word value)) + (setf (sb!kernel:%vector-raw-bits object offset) value)) + (defun make-single-float (x) (make-single-float x)) (defun make-double-float (hi lo) (make-double-float hi lo)) diff --git a/src/code/mips-vm.lisp b/src/code/mips-vm.lisp index 264e871..d9f94e3 100644 --- a/src/code/mips-vm.lisp +++ b/src/code/mips-vm.lisp @@ -117,10 +117,7 @@ (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..") (/hexstr length) (/hexstr vector) - (copy-from-system-area pc (* n-byte-bits 5) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 5 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (/hexstr error-number) diff --git a/src/code/ppc-vm.lisp b/src/code/ppc-vm.lisp index 8788587..79ddf31 100644 --- a/src/code/ppc-vm.lisp +++ b/src/code/ppc-vm.lisp @@ -169,10 +169,7 @@ (declare (type system-area-pointer pc) (type (unsigned-byte 8) length) (type (simple-array (unsigned-byte 8) (*)) vector)) - (copy-from-system-area pc (* sb!vm:n-byte-bits 5) - vector (* sb!vm:n-word-bits - sb!vm:vector-data-offset) - (* length sb!vm:n-byte-bits)) + (copy-ub8-from-system-area pc 5 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (collect ((sc-offsets)) diff --git a/src/code/run-program.lisp b/src/code/run-program.lisp index 0808fdd..25c31c2 100644 --- a/src/code/run-program.lisp +++ b/src/code/run-program.lisp @@ -348,13 +348,12 @@ (declare (simple-string s)) (let ((n (length s))) ;; Blast the string into place. - (sb-kernel:copy-to-system-area (the simple-base-string - ;; FIXME - (coerce s 'simple-base-string)) - (* sb-vm:vector-data-offset - sb-vm:n-word-bits) - string-sap 0 - (* (1+ n) sb-vm:n-byte-bits)) + (sb-kernel:copy-ub8-to-system-area (the simple-base-string + ;; FIXME + (coerce s 'simple-base-string)) + 0 + string-sap 0 + (1+ n)) ;; Blast the pointer to the string into place. (setf (sap-ref-sap vec-sap i) string-sap) (setf string-sap (sap+ string-sap (round-bytes-to-words (1+ n)))) @@ -666,11 +665,10 @@ ~2I~_~A~:>" (strerror errno))) (t - (sb-kernel:copy-from-system-area + (sb-kernel:copy-ub8-from-system-area (alien-sap buf) 0 - string (* sb-vm:vector-data-offset - sb-vm:n-word-bits) - (* count sb-vm:n-byte-bits)) + string 0 + count) (write-string string stream :end count))))))))))) diff --git a/src/code/sparc-vm.lisp b/src/code/sparc-vm.lisp index 1ef6009..16c1e81 100644 --- a/src/code/sparc-vm.lisp +++ b/src/code/sparc-vm.lisp @@ -138,10 +138,7 @@ (declare (type system-area-pointer pc) (type (unsigned-byte 8) length) (type (simple-array (unsigned-byte 8) (*)) vector)) - (copy-from-system-area pc (* n-byte-bits 5) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 5 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (collect ((sc-offsets)) diff --git a/src/code/stream.lisp b/src/code/stream.lisp index 869a0cc..4611a72 100644 --- a/src/code/stream.lisp +++ b/src/code/stream.lisp @@ -394,13 +394,23 @@ numbytes eof-error-p)) ((<= numbytes num-buffered) + #+nil + (let ((copy-function (typecase buffer + ((simple-array * (*)) #'ub8-bash-copy) + (system-area-pointer #'copy-ub8-to-system-area)))) + (funcall copy-function in-buffer index buffer start numbytes)) (%byte-blt in-buffer index buffer start (+ start numbytes)) (setf (ansi-stream-in-index stream) (+ index numbytes)) numbytes) (t (let ((end (+ start num-buffered))) - (%byte-blt in-buffer index buffer start end) + #+nil + (let ((copy-function (typecase buffer + ((simple-array * (*)) #'ub8-bash-copy) + (system-area-pointer #'copy-ub8-to-system-area)))) + (funcall copy-function in-buffer index buffer start num-buffered)) + (%byte-blt in-buffer index buffer start end) (setf (ansi-stream-in-index stream) +ansi-stream-in-buffer-length+) (+ (funcall (ansi-stream-n-bin stream) stream @@ -429,13 +439,7 @@ (- +ansi-stream-in-buffer-length+ +ansi-stream-in-buffer-extra+) nil)) - (start (- +ansi-stream-in-buffer-length+ count)) - (n-character-array-bytes - #.(/ (sb!vm:saetp-n-bits - (find 'character - sb!vm:*specialized-array-element-type-properties* - :key #'sb!vm:saetp-specifier)) - sb!vm:n-byte-bits))) + (start (- +ansi-stream-in-buffer-length+ count))) (declare (type index start count)) (cond ((zerop count) (setf (ansi-stream-in-index stream) @@ -443,19 +447,17 @@ (funcall (ansi-stream-in stream) stream eof-error-p eof-value)) (t (when (/= start +ansi-stream-in-buffer-extra+) - (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+ - sb!vm:n-byte-bits - n-character-array-bytes) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - ibuf (+ (the index (* start - sb!vm:n-byte-bits - n-character-array-bytes)) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - (* count - sb!vm:n-byte-bits - n-character-array-bytes))) + (#.(let* ((n-character-array-bits + (sb!vm:saetp-n-bits + (find 'character + sb!vm:*specialized-array-element-type-properties* + :key #'sb!vm:saetp-specifier))) + (bash-function (intern (format nil "UB~A-BASH-COPY" n-character-array-bits) + (find-package "SB!KERNEL")))) + bash-function) + ibuf +ansi-stream-in-buffer-extra+ + ibuf start + count)) (setf (ansi-stream-in-index stream) (1+ start)) (aref ibuf start))))) @@ -473,11 +475,9 @@ (funcall (ansi-stream-bin stream) stream eof-error-p eof-value)) (t (unless (zerop start) - (bit-bash-copy ibuf (* sb!vm:vector-data-offset sb!vm:n-word-bits) - ibuf (+ (the index (* start sb!vm:n-byte-bits)) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - (* count sb!vm:n-byte-bits))) + (ub8-bash-copy ibuf 0 + ibuf start + count)) (setf (ansi-stream-in-index stream) (1+ start)) (aref ibuf start))))) @@ -1069,14 +1069,16 @@ (when (plusp copy) (setf (string-input-stream-current stream) (truly-the index (+ index copy))) + ;; FIXME: why are we VECTOR-SAP'ing things here? what's the point? + ;; and are there SB-UNICODE issues here as well? --njf, 2005-03-24 (sb!sys:without-gcing - (system-area-copy (vector-sap string) - (* index sb!vm:n-byte-bits) - (if (typep buffer 'system-area-pointer) - buffer - (vector-sap buffer)) - (* start sb!vm:n-byte-bits) - (* copy sb!vm:n-byte-bits)))) + (system-area-ub8-copy (vector-sap string) + index + (if (typep buffer 'system-area-pointer) + buffer + (vector-sap buffer)) + start + copy))) (if (and (> requested copy) eof-error-p) (error 'end-of-file :stream stream) copy))) diff --git a/src/code/target-c-call.lisp b/src/code/target-c-call.lisp index ccbdc53..4730dbd 100644 --- a/src/code/target-c-call.lisp +++ b/src/code/target-c-call.lisp @@ -47,10 +47,7 @@ until (zerop (sap-ref-8 sap offset)) finally (return offset)))) (let ((result (make-string length :element-type 'base-char))) - (sb!kernel:copy-from-system-area sap 0 - result (* sb!vm:vector-data-offset - sb!vm:n-word-bits) - (* length sb!vm:n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap 0 result 0 length) result)))) (defun %naturalize-utf8-string (sap) diff --git a/src/code/x86-64-vm.lisp b/src/code/x86-64-vm.lisp index 50cd4f8..f6fa3d3 100644 --- a/src/code/x86-64-vm.lisp +++ b/src/code/x86-64-vm.lisp @@ -286,9 +286,7 @@ (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..") (/hexstr length) (/hexstr vector) - (copy-from-system-area pc (* n-byte-bits 2) - vector (* n-word-bits vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 2 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (/hexstr error-number) diff --git a/src/code/x86-vm.lisp b/src/code/x86-vm.lisp index d49f487..21fd69c 100644 --- a/src/code/x86-vm.lisp +++ b/src/code/x86-vm.lisp @@ -301,9 +301,7 @@ (/show0 "LENGTH,VECTOR,ERROR-NUMBER=..") (/hexstr length) (/hexstr vector) - (copy-from-system-area pc (* n-byte-bits 2) - vector (* n-word-bits vector-data-offset) - (* length n-byte-bits)) + (copy-ub8-from-system-area pc 2 vector 0 length) (let* ((index 0) (error-number (sb!c:read-var-integer vector index))) (/hexstr error-number) diff --git a/src/compiler/alpha/array.lisp b/src/compiler/alpha/array.lisp index fa0674c..1c19682 100644 --- a/src/compiler/alpha/array.lisp +++ b/src/compiler/alpha/array.lisp @@ -567,7 +567,11 @@ (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) - unsigned-num %set-raw-bits #+gengc nil) + unsigned-num %set-raw-bits) +(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %vector-raw-bits) +(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %set-vector-raw-bits) ;;;; misc. array VOPs diff --git a/src/compiler/generic/target-core.lisp b/src/compiler/generic/target-core.lisp index 4168eba..5023124 100644 --- a/src/compiler/generic/target-core.lisp +++ b/src/compiler/generic/target-core.lisp @@ -75,11 +75,17 @@ (setf (code-header-ref code-obj sb!vm:code-trace-table-offset-slot) length) - (copy-to-system-area trace-table - (* sb!vm:vector-data-offset sb!vm:n-word-bits) - fill-ptr - 0 - trace-table-bits) + ;; KLUDGE: the "old" COPY-TO-SYSTEM-AREA automagically worked if + ;; somebody changed the number of bytes in a trace table entry. + ;; This version is a bit more fragile; if only there were some way + ;; to insulate ourselves against changes like that... + ;; + ;; Then again, PACK-TRACE-TABLE in src/compiler/trace-table.lisp + ;; doesn't appear to do anything interesting, returning a 0-length + ;; array. So it seemingly doesn't matter what we do here. Is this + ;; stale code? + ;; --njf, 2005-03-23 + (copy-ub16-to-system-area trace-table 0 fill-ptr 0 trace-table-len) (do ((index sb!vm:code-constants-offset (1+ index))) ((>= index (length constants))) diff --git a/src/compiler/generic/vm-fndb.lisp b/src/compiler/generic/vm-fndb.lisp index a2c9d35..98bebdc 100644 --- a/src/compiler/generic/vm-fndb.lisp +++ b/src/compiler/generic/vm-fndb.lisp @@ -166,6 +166,11 @@ (foldable flushable)) (defknown (%set-raw-bits) (t fixnum sb!vm:word) sb!vm:word (unsafe)) +;; These two are mostly used for bit-bashing operations. +(defknown %vector-raw-bits (t fixnum) sb!vm:word + (foldable flushable)) +(defknown (%set-vector-raw-bits) (t fixnum sb!vm:word) sb!vm:word + (unsafe)) (defknown allocate-vector ((unsigned-byte 8) index index) (simple-array * (*)) @@ -283,26 +288,34 @@ ;;;; bit-bashing routines -(defknown copy-to-system-area - ((simple-unboxed-array (*)) index system-area-pointer index index) - (values) - ()) - -(defknown copy-from-system-area - (system-area-pointer index (simple-unboxed-array (*)) index index) - (values) - ()) - -(defknown system-area-copy - (system-area-pointer index system-area-pointer index index) - (values) - ()) - -(defknown bit-bash-copy - ((simple-unboxed-array (*)) index - (simple-unboxed-array (*)) index index) - (values) - ()) +;;; FIXME: there's some ugly duplication between the (INTERN (FORMAT ...)) +;;; magic here and the same magic in src/code/bit-bash.lisp. I don't know +;;; of any good way to clean it up, but it's definitely violating OAOO. +(macrolet ((define-known-copiers () + `(progn + ,@(loop for i = 1 then (* i 2) + collect `(defknown ,(intern (format nil "UB~A-BASH-COPY" i) + (find-package "SB!KERNEL")) + ((simple-unboxed-array (*)) index (simple-unboxed-array (*)) index index) + (values) + ()) + collect `(defknown ,(intern (format nil "SYSTEM-AREA-UB~A-COPY" i) + (find-package "SB!KERNEL")) + (system-area-pointer index system-area-pointer index index) + (values) + ()) + collect `(defknown ,(intern (format nil "COPY-UB~A-TO-SYSTEM-AREA" i) + (find-package "SB!KERNEL")) + ((simple-unboxed-array (*)) index system-area-pointer index index) + (values) + ()) + collect `(defknown ,(intern (format nil "COPY-UB~A-FROM-SYSTEM-AREA" i) + (find-package "SB!KERNEL")) + (system-area-pointer index (simple-unboxed-array (*)) index index) + (values) + ()) + until (= i sb!vm:n-word-bits))))) + (define-known-copiers)) ;;; (not really a bit-bashing routine, but starting to take over from ;;; bit-bashing routines in byte-sized copies as of sbcl-0.6.12.29:) diff --git a/src/compiler/hppa/array.lisp b/src/compiler/hppa/array.lisp index 0aa28cb..68dad24 100644 --- a/src/compiler/hppa/array.lisp +++ b/src/compiler/hppa/array.lisp @@ -449,6 +449,10 @@ %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) +(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %vector-raw-bits) +(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %set-vector-raw-bits) ;;;; Misc. Array VOPs. (define-vop (get-vector-subtype get-header-data)) diff --git a/src/compiler/hppa/insts.lisp b/src/compiler/hppa/insts.lisp index e07d742..5cf0d93 100644 --- a/src/compiler/hppa/insts.lisp +++ b/src/compiler/hppa/insts.lisp @@ -398,10 +398,8 @@ (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/src/compiler/mips/array.lisp b/src/compiler/mips/array.lisp index 73ce1c1..ae8c440 100644 --- a/src/compiler/mips/array.lisp +++ b/src/compiler/mips/array.lisp @@ -547,6 +547,10 @@ %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) +(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %vector-raw-bits) +(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %set-vector-raw-bits) ;;;; Misc. Array VOPs. (define-vop (get-vector-subtype get-header-data)) diff --git a/src/compiler/mips/insts.lisp b/src/compiler/mips/insts.lisp index 5e733fe..b19fc9b 100644 --- a/src/compiler/mips/insts.lisp +++ b/src/compiler/mips/insts.lisp @@ -986,10 +986,8 @@ (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/src/compiler/ppc/array.lisp b/src/compiler/ppc/array.lisp index 6eaef08..a7dece0 100644 --- a/src/compiler/ppc/array.lisp +++ b/src/compiler/ppc/array.lisp @@ -534,7 +534,23 @@ (:result-types unsigned-num) (:variant 0 other-pointer-lowtag)) +(define-vop (vector-raw-bits word-index-ref) + (:note "vector-raw-bits VOP") + (:translate %vector-raw-bits) + (:results (value :scs (unsigned-reg))) + (:result-types unsigned-num) + (:variant vector-data-offset other-pointer-lowtag)) +(define-vop (set-vector-raw-bits word-index-set) + (:note "setf vector-raw-bits VOP") + (:translate %set-vector-raw-bits) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg zero immediate)) + (value :scs (unsigned-reg))) + (:arg-types * positive-fixnum unsigned-num) + (:results (result :scs (unsigned-reg))) + (:result-types unsigned-num) + (:variant vector-data-offset other-pointer-lowtag)) ;;;; Misc. Array VOPs. diff --git a/src/compiler/ppc/insts.lisp b/src/compiler/ppc/insts.lisp index bac1efa..b31d85c 100644 --- a/src/compiler/ppc/insts.lisp +++ b/src/compiler/ppc/insts.lisp @@ -232,10 +232,8 @@ (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index b20f4db..5694a74 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -671,10 +671,6 @@ ;;;; type dispatch in AREF. The exception is CONCATENATE, since ;;;; a full call to CONCATENATE would have to look up the sequence ;;;; type, which can be really slow. -;;;; -;;;; FIXME: It would be nicer for these transforms to work for any -;;;; calls when all arguments are vectors with the same element type, -;;;; rather than restricting them to STRINGs only. ;;; Moved here from generic/vm-tran.lisp to satisfy clisp ;;; @@ -683,59 +679,99 @@ (def!constant vector-data-bit-offset (* sb!vm:vector-data-offset sb!vm:n-word-bits)) -(deftransform replace ((string1 string2 &key (start1 0) (start2 0) - end1 end2) - (simple-base-string simple-base-string &rest t) - * - ;; FIXME: consider replacing this policy test - ;; with some tests for the STARTx and ENDx - ;; indices being valid, conditional on high - ;; SAFETY code. - ;; - ;; FIXME: It turns out that this transform is - ;; critical for the performance of string - ;; streams. Make this more explicit. - :policy (< (max safety space) 3)) - `(locally - (declare (optimize (safety 0))) - (bit-bash-copy string2 - (the index - (+ (the index (* start2 sb!vm:n-byte-bits)) - ,vector-data-bit-offset)) - string1 - (the index - (+ (the index (* start1 sb!vm:n-byte-bits)) - ,vector-data-bit-offset)) - (the index - (* (min (the index (- (or end1 (length string1)) - start1)) - (the index (- (or end2 (length string2)) - start2))) - sb!vm:n-byte-bits))) - string1)) - -;;; KLUDGE: This isn't the nicest way of achieving efficient string -;;; streams, but it does work; a more general framework for this kind -;;; of optimization, as well as better handling of the possible -;;; keyword arguments, would be nice. -#!+sb-unicode -(deftransform replace ((string1 string2 &key (start1 0) (start2 0) - end1 end2) - ((simple-array character (*)) - (simple-array character (*)) - &rest t) - * - ;; FIXME: consider replacing this policy test - ;; with some tests for the STARTx and ENDx - ;; indices being valid, conditional on high - ;; SAFETY code. - ;; - ;; FIXME: It turns out that this transform is - ;; critical for the performance of string - ;; streams. Make this more explicit. - :policy (< (max safety space) 3)) - `(sb!impl::simple-character-string-replace-from-simple-character-string* - string1 string2 start1 end1 start2 end2)) +(eval-when (:compile-toplevel) +(defun valid-bit-bash-saetp-p (saetp) + ;; BIT-BASHing isn't allowed on simple vectors that contain pointers + (and (not (eq t (sb!vm:saetp-specifier saetp))) + ;; Due to limitations with the current BIT-BASHing code, we can't + ;; BIT-BASH reliably on arrays whose element types are larger + ;; than the word size. + (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits))) +) ; EVAL-WHEN + +;; FIXME: It turns out that this transform (for SIMPLE-BASE-STRINGS) +;; is critical for the performance of string streams. Make this +;; more explicit. +(macrolet + ((define-replace-transforms () + (loop for saetp across sb!vm:*specialized-array-element-type-properties* + when (valid-bit-bash-saetp-p saetp) + collect + (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) + (n-element-bits (sb!vm:saetp-n-bits saetp)) + (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits) + (find-package "SB!KERNEL")))) + `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2) + (,sequence-type ,sequence-type &rest t) + ,sequence-type + :node node) + `(let* ((len1 (length seq1)) + (len2 (length seq2)) + (end1 (or end1 len1)) + (end2 (or end2 len2)) + (replace-len1 (- end1 start1)) + (replace-len2 (- end2 start2))) + ,(unless (policy node (= safety 0)) + `(progn + (unless (<= 0 start1 end1 len1) + (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1)) + (unless (<= 0 start2 end2 len2) + (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2)))) + (funcall (function ,',bash-function) + seq2 start2 + seq1 start1 + (min replace-len1 replace-len2)) + seq1))) + into forms + finally (return `(progn ,@forms))))) + (define-replace-transforms)) + +(macrolet + ((define-subseq-transforms () + (loop for saetp across sb!vm:*specialized-array-element-type-properties* + when (valid-bit-bash-saetp-p saetp) + collect + (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) + (n-element-bits (sb!vm:saetp-n-bits saetp)) + (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits) + (find-package "SB!KERNEL")))) + `(deftransform subseq ((seq start &optional end) + (,sequence-type t &optional t) + ,sequence-type :node node) + `(let* ((length (length seq)) + (end (if end (min end length) length))) + ,(unless (policy node (= safety 0)) + `(progn + (unless (<= 0 start end length) + (sb!impl::signal-bounding-indices-bad-error seq start end)))) + (let* ((size (- end start)) + (result (make-array size :element-type ',',(sb!vm:saetp-specifier saetp)))) + (funcall (function ,',bash-function) + seq start result 0 size) + result)))) + into forms + finally (return `(progn ,@forms))))) + (define-subseq-transforms)) + +(macrolet + ((define-copy-seq-transforms () + (loop for saetp across sb!vm:*specialized-array-element-type-properties* + when (valid-bit-bash-saetp-p saetp) + collect + (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) + (n-element-bits (sb!vm:saetp-n-bits saetp)) + (bash-function (intern (format nil "UB~A-BASH-COPY" n-element-bits) + (find-package "SB!KERNEL")))) + `(deftransform copy-seq ((seq) (,sequence-type) + ,sequence-type) + `(let* ((length (length seq)) + (result (make-array length :element-type ',',(sb!vm:saetp-specifier saetp)))) + (funcall (function ,',bash-function) + seq 0 result 0 length) + result))) + into forms + finally (return `(progn ,@forms))))) + (define-copy-seq-transforms)) ;;; FIXME: this would be a valid transform for certain excluded cases: ;;; * :TEST 'CHAR= or :TEST #'CHAR= diff --git a/src/compiler/sparc/array.lisp b/src/compiler/sparc/array.lisp index 0d56f1a..67c59a7 100644 --- a/src/compiler/sparc/array.lisp +++ b/src/compiler/sparc/array.lisp @@ -673,3 +673,21 @@ (:results (result :scs (unsigned-reg))) (:result-types unsigned-num) (:variant 0 other-pointer-lowtag)) + +(define-vop (vector-raw-bits word-index-ref) + (:note "vector-raw-bits VOP") + (:translate %vector-raw-bits) + (:results (value :scs (unsigned-reg))) + (:result-types unsigned-num) + (:variant vector-data-offset other-pointer-lowtag)) + +(define-vop (set-vector-raw-bits word-index-set) + (:note "setf vector-raw-bits VOP") + (:translate %set-vector-raw-bits) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg zero immediate)) + (value :scs (unsigned-reg))) + (:arg-types * tagged-num unsigned-num) + (:results (result :scs (unsigned-reg))) + (:result-types unsigned-num) + (:variant vector-data-offset other-pointer-lowtag)) \ No newline at end of file diff --git a/src/compiler/sparc/insts.lisp b/src/compiler/sparc/insts.lisp index 801be79..d0fedf8 100644 --- a/src/compiler/sparc/insts.lisp +++ b/src/compiler/sparc/insts.lisp @@ -1183,10 +1183,8 @@ about function addresses and register values.") (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp index 6a8aebd..e2d4ef6 100644 --- a/src/compiler/x86-64/array.lisp +++ b/src/compiler/x86-64/array.lisp @@ -1386,6 +1386,10 @@ unsigned-num %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) +(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %vector-raw-bits) +(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %set-vector-raw-bits) ;;;; miscellaneous array VOPs diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp index 3e0c6e2..17ad57d 100644 --- a/src/compiler/x86-64/insts.lisp +++ b/src/compiler/x86-64/insts.lisp @@ -2391,10 +2391,8 @@ (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/src/compiler/x86/array.lisp b/src/compiler/x86/array.lisp index 493cd7d..f061798 100644 --- a/src/compiler/x86/array.lisp +++ b/src/compiler/x86/array.lisp @@ -1318,6 +1318,10 @@ unsigned-num %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) +(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %vector-raw-bits) +(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag + (unsigned-reg) unsigned-num %set-vector-raw-bits) ;;;; miscellaneous array VOPs diff --git a/src/compiler/x86/insts.lisp b/src/compiler/x86/insts.lisp index fac5d41..fd398ba 100644 --- a/src/compiler/x86/insts.lisp +++ b/src/compiler/x86/insts.lisp @@ -1896,10 +1896,8 @@ (cond (length-only (values 0 (1+ length) nil nil)) (t - (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) - vector (* n-word-bits - vector-data-offset) - (* length n-byte-bits)) + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) (collect ((sc-offsets) (lengths)) (lengths 1) ; the length byte diff --git a/tests/seq.impure.lisp b/tests/seq.impure.lisp index 33f128a..2570a73 100644 --- a/tests/seq.impure.lisp +++ b/tests/seq.impure.lisp @@ -897,5 +897,101 @@ (with-input-from-string (s string :start 6 :end 9) (read-char s))))) +;;; testing bit-bashing according to _The Practice of Programming_ +(defun fill-bytes-for-testing (bitsize) + "Return a list of 'bytes' of type (MOD BITSIZE)." + (remove-duplicates (list 0 + (1- (ash 1 (1- bitsize))) + (ash 1 (1- bitsize)) + (1- (ash 1 bitsize))))) + +(defun fill-with-known-value (value size &rest vectors) + (dolist (vec vectors) + (dotimes (i size) + (setf (aref vec i) value)))) + +(defun collect-fill-amounts (n-power) + (remove-duplicates + (loop for i from 0 upto n-power + collect (1- (expt 2 i)) + collect (expt 2 i) + collect (1+ (expt 2 i))))) + +(defun test-fill-bashing (bitsize padding-amount n-power) + (let* ((size (+ (* padding-amount 2) (expt 2 n-power) (* padding-amount 2))) + (standard (make-array size :element-type `(unsigned-byte ,bitsize))) + (bashed (make-array size :element-type `(unsigned-byte ,bitsize))) + (fill-amounts (collect-fill-amounts n-power)) + (bash-function (intern (format nil "UB~A-BASH-FILL" bitsize) + (find-package "SB-KERNEL")))) + (loop for offset from padding-amount below (* 2 padding-amount) do + (dolist (c (fill-bytes-for-testing bitsize)) + (dolist (n fill-amounts) + (fill-with-known-value (mod (lognot c) (ash 1 bitsize)) n + standard bashed) + ;; fill vectors + ;; a) the standard slow way + (fill standard c :start offset :end (+ offset n)) + ;; b) the blazingly fast way + (let ((value (loop for i from 0 by bitsize + until (= i sb-vm:n-word-bits) + sum (ash c i)))) + (funcall bash-function value bashed offset n)) + ;; check for errors + (when (mismatch standard bashed) + (format t "Test with offset ~A, fill ~A and length ~A failed.~%" + offset c n) + (format t "Mismatch: ~A ~A~%" + (subseq standard 0 (+ offset n 1)) + (subseq bashed 0 (+ offset n 1))) + (return-from test-fill-bashing nil)))) + finally (return t)))) + +(defun test-copy-bashing (bitsize padding-amount n-power) + (let* ((size (+ (* padding-amount 2) (expt 2 n-power) (* padding-amount 2))) + (standard-dst (make-array size :element-type `(unsigned-byte ,bitsize))) + (bashed-dst (make-array size :element-type `(unsigned-byte ,bitsize))) + (source (make-array size :element-type `(unsigned-byte ,bitsize))) + (fill-amounts (collect-fill-amounts n-power)) + (bash-function (intern (format nil "UB~A-BASH-COPY" bitsize) + (find-package "SB-KERNEL")))) + (do ((source-offset padding-amount (1+ source-offset))) + ((>= source-offset (* padding-amount 2)) + ;; success! + t) + (do ((target-offset padding-amount (1+ target-offset))) + ((>= target-offset (* padding-amount 2))) + (dolist (c (fill-bytes-for-testing bitsize)) + (dolist (n fill-amounts) + (fill-with-known-value (mod (lognot c) (ash 1 bitsize)) size + source standard-dst bashed-dst) + ;; fill with test data + (fill source c :start source-offset :end (+ source-offset n)) + ;; copy filled test data to test vectors + ;; a) the slow way + (replace standard-dst source + :start1 target-offset :end1 (+ target-offset n) + :start2 source-offset :end2 (+ source-offset n)) + ;; b) the blazingly fast way + (funcall bash-function source source-offset + bashed-dst target-offset n) + ;; check for errors + (when (mismatch standard-dst bashed-dst) + (format t "Test with target-offset ~A, source-offset ~A, fill ~A, and length ~A failed.~%" + target-offset source-offset c n) + (format t "Mismatch:~% correct ~A~% actual ~A~%" + standard-dst + bashed-dst) + (return-from test-copy-bashing nil)))))))) + +(loop for i = 1 then (* i 2) do + ;; the bare '32' here is fairly arbitrary; '8' provides a good + ;; range of lengths over which to fill and copy, which should tease + ;; out most errors in the code (if any exist). (It also makes this + ;; part of the test suite finish reasonably quickly.) + (assert (test-fill-bashing i 32 8)) + (assert (test-copy-bashing i 32 8)) + until (= i sb-vm:n-word-bits)) + ;;; success (sb-ext:quit :unix-status 104) diff --git a/version.lisp-expr b/version.lisp-expr index d58a62c..db33470 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.21.4" +"0.8.21.5" -- 1.7.10.4