From 8cbd7fc0f27222a778ce61bae7d943a5081362cc Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Tue, 10 Apr 2007 13:50:43 +0000 Subject: [PATCH] 1.0.4.55: Optimized REPLACE and UB*-BASH-COPY routines * Expand simple cases of UB*-BASH-COPY inline to avoid full call overhead and generate better code generally; * Handle more cases of REPLACE; we now optimize REPLACE on all simple specialized array types (only element types <= n-word-bits are handled generally, though); * Use a single COPY-SEQ and SUBSEQ transform rather than one per specialized array type; generate inline copies for these too when possible; * Tests; * Backend cleanup: introduce a FIND-SAETP to eliminate duplicate code; * Backend cleanup: change %{SET-,}VECTOR-RAW-BITS on x86 to use the *-WITH-OFFSET machinery. --- NEWS | 2 + package-data-list.lisp-expr | 5 +- src/compiler/generic/vm-array.lisp | 6 + src/compiler/generic/vm-fndb.lisp | 10 +- src/compiler/generic/vm-tran.lisp | 49 ++++-- src/compiler/seqtran.lisp | 325 ++++++++++++++++++++++++++---------- src/compiler/x86/array.lisp | 13 +- tests/seq.impure.lisp | 68 +++++++- version.lisp-expr | 2 +- 9 files changed, 355 insertions(+), 125 deletions(-) diff --git a/NEWS b/NEWS index 4881cec..f3034d9 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ changes in sbcl-1.0.5 relative to sbcl-1.0.4: in multithreaded application code. * optimization: GET-INTERNAL-REAL-TIME has been optimized on POSIX platforms. (thanks to James Anderson for the optimization hint) + * optimization: REPLACE, SUBSEQ, and COPY-SEQ are now optimized in + more cases. * enhancement: XREF information is now collected to references made to global variables using SYMBOL-VALUE with a constant argument. * bug fix: dead unbound variable references now signal an error. diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 8645a54..19e7f4e 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1145,7 +1145,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%MAP-TO-LIST-ARITY-1" "%MAP-TO-NIL-ON-SEQUENCE" "%MAP-TO-NIL-ON-SIMPLE-VECTOR" "%MAP-TO-NIL-ON-VECTOR" "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH" - "%RAW-BITS" "%VECTOR-RAW-BITS" + "%RAW-BITS" "%RAW-BITS-WITH-OFFSET" "%VECTOR-RAW-BITS" "%RAW-REF-COMPLEX-DOUBLE" "%RAW-REF-COMPLEX-LONG" "%RAW-REF-COMPLEX-SINGLE" "%RAW-REF-DOUBLE" "%RAW-REF-LONG" "%RAW-REF-SINGLE" @@ -1160,7 +1160,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%RAW-INSTANCE-REF/COMPLEX-DOUBLE" "%RAW-INSTANCE-SET/COMPLEX-DOUBLE" "%SET-ARRAY-DIMENSION" "%SET-FUNCALLABLE-INSTANCE-INFO" - "%SET-RAW-BITS" "%SET-VECTOR-RAW-BITS" + "%SET-RAW-BITS" "%SET-RAW-BITS-WITH-OFFSET" + "%SET-VECTOR-RAW-BITS" "%SET-SAP-REF-16" "%SET-SAP-REF-32" "%SET-SAP-REF-64" "%SET-SAP-REF-WORD" "%SET-SAP-REF-8" "%SET-SAP-REF-DOUBLE" "%SET-SAP-REF-LONG" "%SET-SAP-REF-SAP" diff --git a/src/compiler/generic/vm-array.lisp b/src/compiler/generic/vm-array.lisp index ba48b3d..e0925e5 100644 --- a/src/compiler/generic/vm-array.lisp +++ b/src/compiler/generic/vm-array.lisp @@ -179,3 +179,9 @@ #!+sb-doc "An alist for mapping simple array element types to their corresponding primitive types.") + +(in-package "SB!C") + +(defun find-saetp (element-type) + (find element-type sb!vm:*specialized-array-element-type-properties* + :key #'sb!vm:saetp-specifier :test #'equal)) diff --git a/src/compiler/generic/vm-fndb.lisp b/src/compiler/generic/vm-fndb.lisp index 355cf7b..81ff030 100644 --- a/src/compiler/generic/vm-fndb.lisp +++ b/src/compiler/generic/vm-fndb.lisp @@ -200,11 +200,17 @@ (defknown %raw-bits (t fixnum) sb!vm:word (foldable flushable)) +#!+x86 +(defknown %raw-bits-with-offset (t fixnum fixnum) sb!vm:word + (flushable always-translatable)) (defknown (%set-raw-bits) (t fixnum sb!vm:word) sb!vm:word (unsafe)) -;; These two are mostly used for bit-bashing operations. +#!+x86 +(defknown (%set-raw-bits-with-offset) (t fixnum fixnum sb!vm:word) sb!vm:word + (unsafe always-translatable)) +;;; These two are mostly used for bit-bashing operations. (defknown %vector-raw-bits (t fixnum) sb!vm:word - (foldable flushable)) + (flushable)) (defknown (%set-vector-raw-bits) (t fixnum sb!vm:word) sb!vm:word (unsafe)) diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index 86efb42..f3791f0 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -73,6 +73,31 @@ sb!vm:bignum-digits-offset index offset)) +#!+x86 +(progn +(define-source-transform sb!kernel:%vector-raw-bits (thing index) + `(sb!kernel:%raw-bits-with-offset ,thing ,index 2)) + +(define-source-transform sb!kernel:%raw-bits (thing index) + `(sb!kernel:%raw-bits-with-offset ,thing ,index 0)) + +(define-source-transform sb!kernel:%set-vector-raw-bits (thing index value) + `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 2 ,value)) + +(define-source-transform sb!kernel:%set-raw-bits (thing index value) + `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 0 ,value)) + +(deftransform sb!kernel:%raw-bits-with-offset ((thing index offset) * * :node node) + (fold-index-addressing 'sb!kernel:%raw-bits-with-offset + sb!vm:n-word-bits sb!vm:other-pointer-lowtag + 0 index offset)) + +(deftransform sb!kernel:%set-raw-bits-with-offset ((thing index offset value) * *) + (fold-index-addressing 'sb!kernel:%set-raw-bits-with-offset + sb!vm:n-word-bits sb!vm:other-pointer-lowtag + 0 index offset t)) +) ; PROGN + ;;; The layout is stored in slot 0. (define-source-transform %instance-layout (x) `(truly-the layout (%instance-ref ,x 0))) @@ -149,32 +174,28 @@ ;;; Transform data vector access to a form that opens up optimization ;;; opportunities. #!+x86 -(deftransform data-vector-ref ((array index) ((or simple-unboxed-array +(deftransform data-vector-ref ((array index) ((or (simple-unboxed-array (*)) simple-vector) t)) (let ((array-type (lvar-type array))) (unless (array-type-p array-type) (give-up-ir1-transform)) (let* ((element-type (type-specifier (array-type-specialized-element-type array-type))) - (saetp (find element-type - sb!vm:*specialized-array-element-type-properties* - :key #'sb!vm:saetp-specifier :test #'equal))) + (saetp (find-saetp element-type))) (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits) (give-up-ir1-transform)) `(data-vector-ref-with-offset array index 0)))) #!+x86 (deftransform data-vector-ref-with-offset ((array index offset) - ((or simple-unboxed-array + ((or (simple-unboxed-array (*)) simple-vector) t t)) (let ((array-type (lvar-type array))) (unless (array-type-p array-type) (give-up-ir1-transform)) (let* ((element-type (type-specifier (array-type-specialized-element-type array-type))) - (saetp (find element-type - sb!vm:*specialized-array-element-type-properties* - :key #'sb!vm:saetp-specifier :test #'equal))) + (saetp (find-saetp element-type))) (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)) (fold-index-addressing 'data-vector-ref-with-offset (sb!vm:saetp-n-bits saetp) @@ -243,31 +264,27 @@ ;;; opportunities. #!+x86 (deftransform data-vector-set ((array index new-value) - ((or simple-unboxed-array simple-vector) + ((or (simple-unboxed-array (*)) simple-vector) t t)) (let ((array-type (lvar-type array))) (unless (array-type-p array-type) (give-up-ir1-transform)) (let* ((element-type (type-specifier (array-type-specialized-element-type array-type))) - (saetp (find element-type - sb!vm:*specialized-array-element-type-properties* - :key #'sb!vm:saetp-specifier :test #'equal))) + (saetp (find-saetp element-type))) (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits) (give-up-ir1-transform)) `(data-vector-set-with-offset array index 0 new-value)))) #!+x86 (deftransform data-vector-set-with-offset ((array index offset new-value) - ((or simple-unboxed-array + ((or (simple-unboxed-array (*)) simple-vector) t t t)) (let ((array-type (lvar-type array))) (unless (array-type-p array-type) (give-up-ir1-transform)) (let* ((element-type (type-specifier (array-type-specialized-element-type array-type))) - (saetp (find element-type - sb!vm:*specialized-array-element-type-properties* - :key #'sb!vm:saetp-specifier :test #'equal))) + (saetp (find-saetp element-type))) (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)) (fold-index-addressing 'data-vector-set-with-offset (sb!vm:saetp-n-bits saetp) diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index 24fe5ce..a3126da 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -663,21 +663,10 @@ (def string/=* identity)) -;;;; string-only transforms for sequence functions -;;;; -;;;; Note: CMU CL had more of these, including transforms for -;;;; functions which cons. In SBCL, we've gotten rid of most of the -;;;; transforms for functions which cons, since our GC overhead is -;;;; sufficiently large that it doesn't seem worth it to try to -;;;; economize on function call overhead or on the overhead of runtime -;;;; type dispatch in AREF. The exception is CONCATENATE, since -;;;; a full call to CONCATENATE would have to look up the sequence -;;;; type, which can be really slow. - -;;; Moved here from generic/vm-tran.lisp to satisfy clisp -;;; -;;; FIXME: Add a comment telling whether this holds for all vectors -;;; or only for vectors based on simple arrays (non-adjustable, etc.). +;;;; transforms for sequence functions + +;;; Moved here from generic/vm-tran.lisp to satisfy clisp. Only applies +;;; to vectors based on simple arrays. (def!constant vector-data-bit-offset (* sb!vm:vector-data-offset sb!vm:n-word-bits)) @@ -695,89 +684,245 @@ (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits))) ) ; EVAL-WHEN -;; FIXME: It turns out that this transform (for SIMPLE-BASE-STRINGS) -;; is critical for the performance of string streams. Make this -;; more explicit. +;;; FIXME: In the copy loops below, we code the loops in a strange +;;; fashion: +;;; +;;; (do ((i (+ src-offset length) (1- i))) +;;; ((<= i 0) ...) +;;; (... (aref foo (1- i)) ...)) +;;; +;;; rather than the more natural (and seemingly more efficient): +;;; +;;; (do ((i (1- (+ src-offset length)) (1- i))) +;;; ((< i 0) ...) +;;; (... (aref foo i) ...)) +;;; +;;; (more efficient because we don't have to do the index adjusting on +;;; every iteration of the loop) +;;; +;;; We do this to avoid a suboptimality in SBCL's backend. In the +;;; latter case, the backend thinks I is a FIXNUM (which it is), but +;;; when used as an array index, the backend thinks I is a +;;; POSITIVE-FIXNUM (which it is). However, since the backend thinks of +;;; these as distinct storage classes, it cannot coerce a move from a +;;; FIXNUM TN to a POSITIVE-FIXNUM TN. The practical effect of this +;;; deficiency is that we have two extra moves and increased register +;;; pressure, which can lead to some spectacularly bad register +;;; allocation. (sub-FIXME: the register allocation even with the +;;; strangely written loops is not always excellent, either...). Doing +;;; it the first way, above, means that I is always thought of as a +;;; POSITIVE-FIXNUM and there are no issues. +;;; +;;; Besides, the *-WITH-OFFSET machinery will fold those index +;;; adjustments in the first version into the array addressing at no +;;; performance penalty! + +;;; This transform is critical to the performance of string streams. If +;;; you tweak it, make sure that you compare the disassembly, if not the +;;; performance of, the functions implementing string streams +;;; (e.g. SB!IMPL::STRING-OUCH). (macrolet ((define-replace-transforms () (loop for saetp across sb!vm:*specialized-array-element-type-properties* - when (valid-bit-bash-saetp-p saetp) + for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*)) + unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag) collect - (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) - (n-element-bits (sb!vm:saetp-n-bits saetp)) - (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) - (find-package "SB!KERNEL")))) - `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2) - (,sequence-type ,sequence-type &rest t) - ,sequence-type - :node node) - `(let* ((len1 (length seq1)) - (len2 (length seq2)) - (end1 (or end1 len1)) - (end2 (or end2 len2)) - (replace-len1 (- end1 start1)) - (replace-len2 (- end2 start2))) - ,(unless (policy node (= safety 0)) - `(progn - (unless (<= 0 start1 end1 len1) - (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1)) - (unless (<= 0 start2 end2 len2) - (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2)))) - (funcall (function ,',bash-function) - seq2 start2 - seq1 start1 - (min replace-len1 replace-len2)) - seq1))) + `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2) + (,sequence-type ,sequence-type &rest t) + ,sequence-type + :node node) + ,(cond + ((valid-bit-bash-saetp-p saetp) nil) + ;; If we're not bit-bashing, only allow cases where we + ;; can determine the order of copying up front. (There + ;; are actually more cases we can handle if we know the + ;; amount that we're copying, but this handles the + ;; common cases.) + (t '(unless (= (constant-value-or-lose start1 0) + (constant-value-or-lose start2 0)) + (give-up-ir1-transform)))) + `(let* ((len1 (length seq1)) + (len2 (length seq2)) + (end1 (or end1 len1)) + (end2 (or end2 len2)) + (replace-len1 (- end1 start1)) + (replace-len2 (- end2 start2))) + ,(unless (policy node (= safety 0)) + `(progn + (unless (<= 0 start1 end1 len1) + (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1)) + (unless (<= 0 start2 end2 len2) + (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2)))) + ,',(cond + ((valid-bit-bash-saetp-p saetp) + (let* ((n-element-bits (sb!vm:saetp-n-bits saetp)) + (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) + (find-package "SB!KERNEL")))) + `(funcall (function ,bash-function) seq2 start2 + seq1 start1 (min replace-len1 replace-len2)))) + (t + ;; We can expand the loop inline here because we + ;; would have given up the transform (see above) + ;; if we didn't have constant matching start + ;; indices. + '(do ((i start1 (1+ i)) + (end (+ start1 + (min replace-len1 replace-len2)))) + ((>= i end)) + (declare (optimize (insert-array-bounds-checks 0))) + (setf (aref seq1 i) (aref seq2 i))))) + seq1)) into forms finally (return `(progn ,@forms))))) (define-replace-transforms)) -(macrolet - ((define-subseq-transforms () - (loop for saetp across sb!vm:*specialized-array-element-type-properties* - when (valid-bit-bash-saetp-p saetp) - collect - (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) - (n-element-bits (sb!vm:saetp-n-bits saetp)) - (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) - (find-package "SB!KERNEL")))) - `(deftransform subseq ((seq start &optional end) - (,sequence-type t &optional t) - ,sequence-type :node node) - `(let* ((length (length seq)) - (end (if end (min end length) length))) - ,(unless (policy node (= safety 0)) - `(progn - (unless (<= 0 start end length) - (sb!impl::signal-bounding-indices-bad-error seq start end)))) - (let* ((size (- end start)) - (result (make-array size :element-type ',',(sb!vm:saetp-specifier saetp)))) - (funcall (function ,',bash-function) - seq start result 0 size) - result)))) - into forms - finally (return `(progn ,@forms))))) - (define-subseq-transforms)) - -(macrolet - ((define-copy-seq-transforms () - (loop for saetp across sb!vm:*specialized-array-element-type-properties* - when (valid-bit-bash-saetp-p saetp) - collect - (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*))) - (n-element-bits (sb!vm:saetp-n-bits saetp)) - (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) - (find-package "SB!KERNEL")))) - `(deftransform copy-seq ((seq) (,sequence-type) - ,sequence-type) - `(let* ((length (length seq)) - (result (make-array length :element-type ',',(sb!vm:saetp-specifier saetp)))) - (funcall (function ,',bash-function) - seq 0 result 0 length) - result))) - into forms - finally (return `(progn ,@forms))))) - (define-copy-seq-transforms)) +;;; Expand simple cases of UB-BASH-COPY inline. "simple" is +;;; defined as those cases where we are doing word-aligned copies from +;;; both the source and the destination and we are copying from the same +;;; offset from both the source and the destination. (The last +;;; condition is there so we can determine the direction to copy at +;;; compile time rather than runtime. Remember that UB-BASH-COPY +;;; acts like memmove, not memcpy.) These conditions may seem rather +;;; restrictive, but they do catch common cases, like allocating a (* 2 +;;; N)-size buffer and blitting in the old N-size buffer in. + +(defun frob-bash-transform (src src-offset + dst dst-offset + length n-elems-per-word) + (declare (ignore src dst length)) + (let ((n-bits-per-elem (truncate sb!vm:n-word-bits n-elems-per-word))) + (multiple-value-bind (src-word src-elt) + (truncate (lvar-value src-offset) n-elems-per-word) + (multiple-value-bind (dst-word dst-elt) + (truncate (lvar-value dst-offset) n-elems-per-word) + ;; Avoid non-word aligned copies. + (unless (and (zerop src-elt) (zerop dst-elt)) + (give-up-ir1-transform)) + ;; Avoid copies where we would have to insert code for + ;; determining the direction of copying. + (unless (= src-word dst-word) + (give-up-ir1-transform)) + ;; FIXME: The cross-compiler doesn't optimize TRUNCATE properly, + ;; so we have to do its work here. + `(let ((end (+ ,src-word ,(if (= n-elems-per-word 1) + 'length + `(truncate (the index length) ,n-elems-per-word))))) + (declare (type index end)) + ;; Handle any bits at the end. + (when (logtest length (1- ,n-elems-per-word)) + (let* ((extra (mod length ,n-elems-per-word)) + ;; FIXME: The shift amount on this ASH is + ;; *always* negative, but the backend doesn't + ;; have a NEGATIVE-FIXNUM primitive type, so we + ;; wind up with a pile of code that tests the + ;; sign of the shift count prior to shifting when + ;; all we need is a simple negate and shift + ;; right. Yuck. + (mask (ash #.(1- (ash 1 sb!vm:n-word-bits)) + (* (- extra ,n-elems-per-word) + ,n-bits-per-elem)))) + (setf (sb!kernel:%vector-raw-bits dst end) + (logior + (logandc2 (sb!kernel:%vector-raw-bits dst end) + (ash mask + ,(ecase sb!c:*backend-byte-order* + (:little-endian 0) + (:big-endian `(* (- ,n-elems-per-word extra) + ,n-bits-per-elem))))) + (logand (sb!kernel:%vector-raw-bits src end) + (ash mask + ,(ecase sb!c:*backend-byte-order* + (:little-endian 0) + (:big-endian `(* (- ,n-elems-per-word extra) + ,n-bits-per-elem))))))))) + ;; Copy from the end to save a register. + (do ((i end (1- i))) + ((<= i ,src-word)) + (setf (sb!kernel:%vector-raw-bits dst (1- i)) + (sb!kernel:%vector-raw-bits src (1- i))))))))) + +#.(loop for i = 1 then (* i 2) + collect `(deftransform ,(intern (format nil "UB~D-BASH-COPY" i) + "SB!KERNEL") + ((src src-offset + dst dst-offset + length) + ((simple-unboxed-array (*)) + (constant-arg index) + (simple-unboxed-array (*)) + (constant-arg index) + index) + *) + (frob-bash-transform src src-offset + dst dst-offset length + ,(truncate sb!vm:n-word-bits i))) into forms + until (= i sb!vm:n-word-bits) + finally (return `(progn ,@forms))) + +;;; We expand copy loops inline in SUBSEQ and COPY-SEQ if we're copying +;;; arrays with elements of size >= the word size. We do this because +;;; we know the arrays cannot alias (one was just consed), therefore we +;;; can determine at compile time the direction to copy, and for +;;; word-sized elements, UB-BASH-COPY will do a bit of +;;; needless checking to figure out what's going on. The same +;;; considerations apply if we are copying elements larger than the word +;;; size, with the additional twist that doing it inline is likely to +;;; cons far less than calling REPLACE and letting generic code do the +;;; work. +;;; +;;; However, we do not do this for elements whose size is < than the +;;; word size because we don't want to deal with any alignment issues +;;; inline. The UB*-BASH-COPY transforms might fix things up later +;;; anyway. + +(defun maybe-expand-copy-loop-inline (src src-offset dst dst-offset length + element-type) + (let ((saetp (find-saetp element-type))) + (aver saetp) + (if (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits) + (expand-aref-copy-loop src src-offset dst dst-offset length) + `(locally (declare (optimize (safety 0))) + (replace ,dst ,src :start1 ,dst-offset :start2 ,src-offset :end1 ,length))))) + +(defun expand-aref-copy-loop (src src-offset dst dst-offset length) + (if (eql src-offset dst-offset) + `(do ((i (+ ,src-offset ,length) (1- i))) + ((<= i ,src-offset)) + (declare (optimize (insert-array-bounds-checks 0))) + (setf (aref ,dst (1- i)) (aref ,src (1- i)))) + `(do ((i (+ ,src-offset ,length) (1- i)) + (j (+ ,dst-offset ,length) (1- j))) + ((<= i ,src-offset)) + (declare (optimize (insert-array-bounds-checks 0))) + (setf (aref ,dst (1- j)) (aref ,src (1- i)))))) + +(deftransform subseq ((seq start &optional end) + ((or (simple-unboxed-array (*)) simple-vector) t &optional t) + * :node node) + (let ((array-type (lvar-type seq))) + (unless (array-type-p array-type) + (give-up-ir1-transform)) + (let ((element-type (type-specifier (array-type-specialized-element-type array-type)))) + `(let* ((length (length seq)) + (end (or end length))) + ,(unless (policy node (= safety 0)) + '(progn + (unless (<= 0 start end length) + (sb!impl::signal-bounding-indices-bad-error seq start end)))) + (let* ((size (- end start)) + (result (make-array size :element-type ',element-type))) + ,(maybe-expand-copy-loop-inline 'seq 'start 'result 0 'size element-type) + result))))) + +(deftransform copy-seq ((seq) ((or (simple-unboxed-array (*)) simple-vector)) *) + (let ((array-type (lvar-type seq))) + (unless (array-type-p array-type) + (give-up-ir1-transform)) + (let ((element-type (type-specifier (array-type-specialized-element-type array-type)))) + `(let* ((length (length seq)) + (result (make-array length :element-type ',element-type))) + ,(maybe-expand-copy-loop-inline 'seq 0 'result 0 'length element-type) + result)))) ;;; FIXME: it really should be possible to take advantage of the ;;; macros used in code/seq.lisp here to avoid duplication of code, diff --git a/src/compiler/x86/array.lisp b/src/compiler/x86/array.lisp index 9f60029..3926463 100644 --- a/src/compiler/x86/array.lisp +++ b/src/compiler/x86/array.lisp @@ -708,14 +708,11 @@ ;;; These vops are useful for accessing the bits of a vector ;;; irrespective of what type of vector it is. -(define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) - unsigned-num %raw-bits) -(define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) - unsigned-num %set-raw-bits) -(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag - (unsigned-reg) unsigned-num %vector-raw-bits) -(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag - (unsigned-reg) unsigned-num %set-vector-raw-bits) +(define-full-reffer+offset raw-bits-with-offset * 0 other-pointer-lowtag (unsigned-reg) + unsigned-num %raw-bits-with-offset) +(define-full-setter+offset set-raw-bits-with-offset * 0 other-pointer-lowtag (unsigned-reg) + unsigned-num %set-raw-bits-with-offset) + ;;;; miscellaneous array VOPs diff --git a/tests/seq.impure.lisp b/tests/seq.impure.lisp index ae10560..b8ee470 100644 --- a/tests/seq.impure.lisp +++ b/tests/seq.impure.lisp @@ -1011,12 +1011,68 @@ ;; Too slow for the interpreter #+#.(cl:if (cl:eq sb-ext:*evaluator-mode* :compile) '(and) '(or)) (loop for i = 1 then (* i 2) do - ;; the bare '32' here is fairly arbitrary; '8' provides a good - ;; range of lengths over which to fill and copy, which should tease - ;; out most errors in the code (if any exist). (It also makes this - ;; part of the test suite finish reasonably quickly.) - (assert (test-fill-bashing i 32 8)) - (assert (test-copy-bashing i 32 8)) + ;; the bare '13' here is fairly arbitrary, except that it's been + ;; reduced from '32', which made the tests take aeons; '8' provides + ;; a good range of lengths over which to fill and copy, which + ;; should tease out most errors in the code (if any exist). (It + ;; also makes this part of the test suite finish reasonably + ;; quickly.) + (assert (time (test-fill-bashing i 13 8))) + (assert (time (test-copy-bashing i 13 8))) until (= i sb-vm:n-word-bits)) + +(defun test-inlined-bashing (bitsize) + ;; We have to compile things separately for each bitsize so the + ;; compiler will work out the array type and trigger the REPLACE + ;; transform. + (let ((lambda-form + `(lambda () + (let* ((n-elements-per-word ,(truncate sb-vm:n-word-bits bitsize)) + (size (* 3 n-elements-per-word)) + (standard-dst (make-array size :element-type '(unsigned-byte ,bitsize))) + (bashed-dst (make-array size :element-type '(unsigned-byte ,bitsize))) + (source (make-array size :element-type '(unsigned-byte ,bitsize)))) + (declare (type (simple-array (unsigned-byte ,bitsize) (*)) + source standard-dst bashed-dst)) + (do ((i 0 (1+ i)) + (offset n-elements-per-word (1+ offset))) + ((>= offset (* 2 n-elements-per-word)) t) + (dolist (c (fill-bytes-for-testing ,bitsize)) + (fill-with-known-value (mod (lognot c) (ash 1 ,bitsize)) size + source standard-dst bashed-dst) + ;; fill with test-data + (fill source c :start offset :end (+ offset n-elements-per-word)) + ;; copy filled data to test vectors + ;; + ;; a) the slow way (which is actually fast, since this + ;; should be transformed into UB*-BASH-COPY) + (replace standard-dst source + :start1 (- offset n-elements-per-word i) + :start2 (- offset n-elements-per-word i) + :end1 offset :end2 offset) + ;; b) the fast way--we fold the + ;; :START{1,2} arguments above ourselves + ;; to trigger the REPLACE transform + (replace bashed-dst source + :start1 0 :start2 0 :end1 offset :end2 offset) + ;; check for errors + (when (or (mismatch standard-dst bashed-dst) + ;; trigger COPY-SEQ transform + (mismatch (copy-seq standard-dst) bashed-dst) + ;; trigger SUBSEQ transform + (mismatch (subseq standard-dst (- offset n-elements-per-word i)) + bashed-dst)) + (format t "Test with target-offset ~A, source-offset ~A, fill ~A, and length ~A failed.~%" + 0 0 c offset) + (format t "Mismatch:~% correct ~A~% actual ~A~%" + standard-dst + bashed-dst) + (return-from nil nil)))))))) + (funcall (compile nil lambda-form)))) + +#+#.(cl:if (cl:eq sb-ext:*evaluator-mode* :compile) '(and) '(or)) +(loop for i = 1 then (* i 2) do + (assert (test-inlined-bashing i)) + until (= i sb-vm:n-word-bits)) ;;; success diff --git a/version.lisp-expr b/version.lisp-expr index 1454db7..8c07c97 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.4.54" +"1.0.4.55" -- 1.7.10.4