1.0.4.55: Optimized REPLACE and UB*-BASH-COPY routines
authorNathan Froyd <froydnj@cs.rice.edu>
Tue, 10 Apr 2007 13:50:43 +0000 (13:50 +0000)
committerNathan Froyd <froydnj@cs.rice.edu>
Tue, 10 Apr 2007 13:50:43 +0000 (13:50 +0000)
* Expand simple cases of UB*-BASH-COPY inline to avoid full call
  overhead and generate better code generally;
* Handle more cases of REPLACE; we now optimize REPLACE on all
  simple specialized array types (only element types <= n-word-bits
  are handled generally, though);
* Use a single COPY-SEQ and SUBSEQ transform rather than one per
  specialized array type; generate inline copies for these too
  when possible;
* Tests;
* Backend cleanup: introduce a FIND-SAETP to eliminate duplicate code;
* Backend cleanup: change %{SET-,}VECTOR-RAW-BITS on x86 to use
  the *-WITH-OFFSET machinery.

NEWS
package-data-list.lisp-expr
src/compiler/generic/vm-array.lisp
src/compiler/generic/vm-fndb.lisp
src/compiler/generic/vm-tran.lisp
src/compiler/seqtran.lisp
src/compiler/x86/array.lisp
tests/seq.impure.lisp
version.lisp-expr

diff --git a/NEWS b/NEWS
index 4881cec..f3034d9 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,8 @@ changes in sbcl-1.0.5 relative to sbcl-1.0.4:
     in multithreaded application code.
   * optimization: GET-INTERNAL-REAL-TIME has been optimized on POSIX
     platforms. (thanks to James Anderson for the optimization hint)
+  * optimization: REPLACE, SUBSEQ, and COPY-SEQ are now optimized in
+    more cases.
   * enhancement: XREF information is now collected to references made
     to global variables using SYMBOL-VALUE with a constant argument.
   * bug fix: dead unbound variable references now signal an error.
index 8645a54..19e7f4e 100644 (file)
@@ -1145,7 +1145,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%MAP-TO-LIST-ARITY-1" "%MAP-TO-NIL-ON-SEQUENCE"
                "%MAP-TO-NIL-ON-SIMPLE-VECTOR" "%MAP-TO-NIL-ON-VECTOR"
                "%MASK-FIELD" "%NEGATE" "%POW" "%PUTHASH"
-               "%RAW-BITS" "%VECTOR-RAW-BITS"
+               "%RAW-BITS" "%RAW-BITS-WITH-OFFSET" "%VECTOR-RAW-BITS"
                "%RAW-REF-COMPLEX-DOUBLE" "%RAW-REF-COMPLEX-LONG"
                "%RAW-REF-COMPLEX-SINGLE" "%RAW-REF-DOUBLE"
                "%RAW-REF-LONG" "%RAW-REF-SINGLE"
@@ -1160,7 +1160,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%RAW-INSTANCE-REF/COMPLEX-DOUBLE"
                "%RAW-INSTANCE-SET/COMPLEX-DOUBLE"
                "%SET-ARRAY-DIMENSION" "%SET-FUNCALLABLE-INSTANCE-INFO"
-               "%SET-RAW-BITS" "%SET-VECTOR-RAW-BITS"
+               "%SET-RAW-BITS" "%SET-RAW-BITS-WITH-OFFSET"
+               "%SET-VECTOR-RAW-BITS"
                "%SET-SAP-REF-16" "%SET-SAP-REF-32" "%SET-SAP-REF-64"
                "%SET-SAP-REF-WORD" "%SET-SAP-REF-8" "%SET-SAP-REF-DOUBLE"
                "%SET-SAP-REF-LONG" "%SET-SAP-REF-SAP"
index ba48b3d..e0925e5 100644 (file)
   #!+sb-doc
   "An alist for mapping simple array element types to their
 corresponding primitive types.")
+
+(in-package "SB!C")
+
+(defun find-saetp (element-type)
+  (find element-type sb!vm:*specialized-array-element-type-properties*
+        :key #'sb!vm:saetp-specifier :test #'equal))
index 355cf7b..81ff030 100644 (file)
 
 (defknown %raw-bits (t fixnum) sb!vm:word
   (foldable flushable))
+#!+x86
+(defknown %raw-bits-with-offset (t fixnum fixnum) sb!vm:word
+  (flushable always-translatable))
 (defknown (%set-raw-bits) (t fixnum sb!vm:word) sb!vm:word
   (unsafe))
-;; These two are mostly used for bit-bashing operations.
+#!+x86
+(defknown (%set-raw-bits-with-offset) (t fixnum fixnum sb!vm:word) sb!vm:word
+  (unsafe always-translatable))
+;;; These two are mostly used for bit-bashing operations.
 (defknown %vector-raw-bits (t fixnum) sb!vm:word
-  (foldable flushable))
+  (flushable))
 (defknown (%set-vector-raw-bits) (t fixnum sb!vm:word) sb!vm:word
   (unsafe))
 
index 86efb42..f3791f0 100644 (file)
                          sb!vm:bignum-digits-offset
                          index offset))
 
+#!+x86
+(progn
+(define-source-transform sb!kernel:%vector-raw-bits (thing index)
+  `(sb!kernel:%raw-bits-with-offset ,thing ,index 2))
+
+(define-source-transform sb!kernel:%raw-bits (thing index)
+  `(sb!kernel:%raw-bits-with-offset ,thing ,index 0))
+
+(define-source-transform sb!kernel:%set-vector-raw-bits (thing index value)
+  `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 2 ,value))
+
+(define-source-transform sb!kernel:%set-raw-bits (thing index value)
+  `(sb!kernel:%set-raw-bits-with-offset ,thing ,index 0 ,value))
+
+(deftransform sb!kernel:%raw-bits-with-offset ((thing index offset) * * :node node)
+  (fold-index-addressing 'sb!kernel:%raw-bits-with-offset
+                         sb!vm:n-word-bits sb!vm:other-pointer-lowtag
+                         0 index offset))
+
+(deftransform sb!kernel:%set-raw-bits-with-offset ((thing index offset value) * *)
+  (fold-index-addressing 'sb!kernel:%set-raw-bits-with-offset
+                         sb!vm:n-word-bits sb!vm:other-pointer-lowtag
+                         0 index offset t))
+) ; PROGN
+
 ;;; The layout is stored in slot 0.
 (define-source-transform %instance-layout (x)
   `(truly-the layout (%instance-ref ,x 0)))
 ;;; Transform data vector access to a form that opens up optimization
 ;;; opportunities.
 #!+x86
-(deftransform data-vector-ref ((array index) ((or simple-unboxed-array
+(deftransform data-vector-ref ((array index) ((or (simple-unboxed-array (*))
                                                   simple-vector)
                                               t))
   (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
-           (saetp (find element-type
-                        sb!vm:*specialized-array-element-type-properties*
-                        :key #'sb!vm:saetp-specifier :test #'equal)))
+           (saetp (find-saetp element-type)))
       (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
         (give-up-ir1-transform))
       `(data-vector-ref-with-offset array index 0))))
 
 #!+x86
 (deftransform data-vector-ref-with-offset ((array index offset)
-                                           ((or simple-unboxed-array
+                                           ((or (simple-unboxed-array (*))
                                                 simple-vector)
                                             t t))
   (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
-           (saetp (find element-type
-                        sb!vm:*specialized-array-element-type-properties*
-                        :key #'sb!vm:saetp-specifier :test #'equal)))
+           (saetp (find-saetp element-type)))
       (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits))
       (fold-index-addressing 'data-vector-ref-with-offset
                              (sb!vm:saetp-n-bits saetp)
 ;;; opportunities.
 #!+x86
 (deftransform data-vector-set ((array index new-value)
-                               ((or simple-unboxed-array simple-vector)
+                               ((or (simple-unboxed-array (*)) simple-vector)
                                 t t))
   (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
-           (saetp (find element-type
-                        sb!vm:*specialized-array-element-type-properties*
-                        :key #'sb!vm:saetp-specifier :test #'equal)))
+           (saetp (find-saetp element-type)))
       (unless (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
         (give-up-ir1-transform))
       `(data-vector-set-with-offset array index 0 new-value))))
 
 #!+x86
 (deftransform data-vector-set-with-offset ((array index offset new-value)
-                                           ((or simple-unboxed-array
+                                           ((or (simple-unboxed-array (*))
                                                 simple-vector)
                                             t t t))
   (let ((array-type (lvar-type array)))
     (unless (array-type-p array-type)
       (give-up-ir1-transform))
     (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
-           (saetp (find element-type
-                        sb!vm:*specialized-array-element-type-properties*
-                        :key #'sb!vm:saetp-specifier :test #'equal)))
+           (saetp (find-saetp element-type)))
       (aver (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits))
       (fold-index-addressing 'data-vector-set-with-offset
                              (sb!vm:saetp-n-bits saetp)
index 24fe5ce..a3126da 100644 (file)
   (def string/=* identity))
 
 \f
-;;;; string-only transforms for sequence functions
-;;;;
-;;;; Note: CMU CL had more of these, including transforms for
-;;;; functions which cons. In SBCL, we've gotten rid of most of the
-;;;; transforms for functions which cons, since our GC overhead is
-;;;; sufficiently large that it doesn't seem worth it to try to
-;;;; economize on function call overhead or on the overhead of runtime
-;;;; type dispatch in AREF. The exception is CONCATENATE, since
-;;;; a full call to CONCATENATE would have to look up the sequence
-;;;; type, which can be really slow.
-
-;;; Moved here from generic/vm-tran.lisp to satisfy clisp
-;;;
-;;; FIXME: Add a comment telling whether this holds for all vectors
-;;; or only for vectors based on simple arrays (non-adjustable, etc.).
+;;;; transforms for sequence functions
+
+;;; Moved here from generic/vm-tran.lisp to satisfy clisp.  Only applies
+;;; to vectors based on simple arrays.
 (def!constant vector-data-bit-offset
   (* sb!vm:vector-data-offset sb!vm:n-word-bits))
 
        (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)))
 ) ; EVAL-WHEN
 
-;; FIXME: It turns out that this transform (for SIMPLE-BASE-STRINGS)
-;; is critical for the performance of string streams.  Make this
-;; more explicit.
+;;; FIXME: In the copy loops below, we code the loops in a strange
+;;; fashion:
+;;;
+;;; (do ((i (+ src-offset length) (1- i)))
+;;;     ((<= i 0) ...)
+;;;   (... (aref foo (1- i)) ...))
+;;;
+;;; rather than the more natural (and seemingly more efficient):
+;;;
+;;; (do ((i (1- (+ src-offset length)) (1- i)))
+;;;     ((< i 0) ...)
+;;;   (... (aref foo i) ...))
+;;;
+;;; (more efficient because we don't have to do the index adjusting on
+;;; every iteration of the loop)
+;;;
+;;; We do this to avoid a suboptimality in SBCL's backend.  In the
+;;; latter case, the backend thinks I is a FIXNUM (which it is), but
+;;; when used as an array index, the backend thinks I is a
+;;; POSITIVE-FIXNUM (which it is).  However, since the backend thinks of
+;;; these as distinct storage classes, it cannot coerce a move from a
+;;; FIXNUM TN to a POSITIVE-FIXNUM TN.  The practical effect of this
+;;; deficiency is that we have two extra moves and increased register
+;;; pressure, which can lead to some spectacularly bad register
+;;; allocation.  (sub-FIXME: the register allocation even with the
+;;; strangely written loops is not always excellent, either...).  Doing
+;;; it the first way, above, means that I is always thought of as a
+;;; POSITIVE-FIXNUM and there are no issues.
+;;;
+;;; Besides, the *-WITH-OFFSET machinery will fold those index
+;;; adjustments in the first version into the array addressing at no
+;;; performance penalty!
+
+;;; This transform is critical to the performance of string streams.  If
+;;; you tweak it, make sure that you compare the disassembly, if not the
+;;; performance of, the functions implementing string streams
+;;; (e.g. SB!IMPL::STRING-OUCH).
 (macrolet
     ((define-replace-transforms ()
        (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
+             for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*))
+             unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag)
              collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                            (find-package "SB!KERNEL"))))
-               `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
-                                       (,sequence-type ,sequence-type &rest t)
-                                       ,sequence-type
-                                       :node node)
-                 `(let* ((len1 (length seq1))
-                         (len2 (length seq2))
-                         (end1 (or end1 len1))
-                         (end2 (or end2 len2))
-                         (replace-len1 (- end1 start1))
-                         (replace-len2 (- end2 start2)))
-                   ,(unless (policy node (= safety 0))
-                     `(progn
-                       (unless (<= 0 start1 end1 len1)
-                         (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
-                       (unless (<= 0 start2 end2 len2)
-                         (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
-                   (funcall (function ,',bash-function)
-                    seq2 start2
-                    seq1 start1
-                    (min replace-len1 replace-len2))
-                   seq1)))
+            `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
+                                    (,sequence-type ,sequence-type &rest t)
+                                    ,sequence-type
+                                    :node node)
+               ,(cond
+                 ((valid-bit-bash-saetp-p saetp) nil)
+                 ;; If we're not bit-bashing, only allow cases where we
+                 ;; can determine the order of copying up front.  (There
+                 ;; are actually more cases we can handle if we know the
+                 ;; amount that we're copying, but this handles the
+                 ;; common cases.)
+                 (t '(unless (= (constant-value-or-lose start1 0)
+                              (constant-value-or-lose start2 0))
+                      (give-up-ir1-transform))))
+               `(let* ((len1 (length seq1))
+                       (len2 (length seq2))
+                       (end1 (or end1 len1))
+                       (end2 (or end2 len2))
+                       (replace-len1 (- end1 start1))
+                       (replace-len2 (- end2 start2)))
+                  ,(unless (policy node (= safety 0))
+                           `(progn
+                              (unless (<= 0 start1 end1 len1)
+                                (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
+                              (unless (<= 0 start2 end2 len2)
+                                (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
+                  ,',(cond
+                      ((valid-bit-bash-saetp-p saetp)
+                       (let* ((n-element-bits (sb!vm:saetp-n-bits saetp))
+                              (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
+                                                     (find-package "SB!KERNEL"))))
+                         `(funcall (function ,bash-function) seq2 start2
+                                   seq1 start1 (min replace-len1 replace-len2))))
+                      (t
+                       ;; We can expand the loop inline here because we
+                       ;; would have given up the transform (see above)
+                       ;; if we didn't have constant matching start
+                       ;; indices.
+                       '(do ((i start1 (1+ i))
+                             (end (+ start1
+                                     (min replace-len1 replace-len2))))
+                         ((>= i end))
+                         (declare (optimize (insert-array-bounds-checks 0)))
+                         (setf (aref seq1 i) (aref seq2 i)))))
+                  seq1))
              into forms
              finally (return `(progn ,@forms)))))
   (define-replace-transforms))
 
-(macrolet
-    ((define-subseq-transforms ()
-       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
-             collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                           (find-package "SB!KERNEL"))))
-               `(deftransform subseq ((seq start &optional end)
-                                      (,sequence-type t &optional t)
-                                      ,sequence-type :node node)
-                 `(let* ((length (length seq))
-                         (end (if end (min end length) length)))
-                   ,(unless (policy node (= safety 0))
-                     `(progn
-                       (unless (<= 0 start end length)
-                         (sb!impl::signal-bounding-indices-bad-error seq start end))))
-                   (let* ((size (- end start))
-                          (result (make-array size :element-type ',',(sb!vm:saetp-specifier saetp))))
-                     (funcall (function ,',bash-function)
-                              seq start result 0 size)
-                     result))))
-             into forms
-             finally (return `(progn ,@forms)))))
-  (define-subseq-transforms))
-
-(macrolet
-    ((define-copy-seq-transforms ()
-       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
-             collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                           (find-package "SB!KERNEL"))))
-               `(deftransform copy-seq ((seq) (,sequence-type)
-                                        ,sequence-type)
-                 `(let* ((length (length seq))
-                         (result (make-array length :element-type ',',(sb!vm:saetp-specifier saetp))))
-                   (funcall (function ,',bash-function)
-                    seq 0 result 0 length)
-                   result)))
-             into forms
-             finally (return `(progn ,@forms)))))
-  (define-copy-seq-transforms))
+;;; Expand simple cases of UB<SIZE>-BASH-COPY inline.  "simple" is
+;;; defined as those cases where we are doing word-aligned copies from
+;;; both the source and the destination and we are copying from the same
+;;; offset from both the source and the destination.  (The last
+;;; condition is there so we can determine the direction to copy at
+;;; compile time rather than runtime.  Remember that UB<SIZE>-BASH-COPY
+;;; acts like memmove, not memcpy.)  These conditions may seem rather
+;;; restrictive, but they do catch common cases, like allocating a (* 2
+;;; N)-size buffer and blitting in the old N-size buffer in.
+
+(defun frob-bash-transform (src src-offset
+                            dst dst-offset
+                            length n-elems-per-word)
+  (declare (ignore src dst length))
+  (let ((n-bits-per-elem (truncate sb!vm:n-word-bits n-elems-per-word)))
+    (multiple-value-bind (src-word src-elt)
+        (truncate (lvar-value src-offset) n-elems-per-word)
+      (multiple-value-bind (dst-word dst-elt)
+          (truncate (lvar-value dst-offset) n-elems-per-word)
+        ;; Avoid non-word aligned copies.
+        (unless (and (zerop src-elt) (zerop dst-elt))
+          (give-up-ir1-transform))
+        ;; Avoid copies where we would have to insert code for
+        ;; determining the direction of copying.
+        (unless (= src-word dst-word)
+          (give-up-ir1-transform))
+        ;; FIXME: The cross-compiler doesn't optimize TRUNCATE properly,
+        ;; so we have to do its work here.
+        `(let ((end (+ ,src-word ,(if (= n-elems-per-word 1)
+                                      'length
+                                      `(truncate (the index length) ,n-elems-per-word)))))
+           (declare (type index end))
+           ;; Handle any bits at the end.
+           (when (logtest length (1- ,n-elems-per-word))
+             (let* ((extra (mod length ,n-elems-per-word))
+                    ;; FIXME: The shift amount on this ASH is
+                    ;; *always* negative, but the backend doesn't
+                    ;; have a NEGATIVE-FIXNUM primitive type, so we
+                    ;; wind up with a pile of code that tests the
+                    ;; sign of the shift count prior to shifting when
+                    ;; all we need is a simple negate and shift
+                    ;; right.  Yuck.
+                    (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+                               (* (- extra ,n-elems-per-word)
+                                  ,n-bits-per-elem))))
+               (setf (sb!kernel:%vector-raw-bits dst end)
+                     (logior
+                      (logandc2 (sb!kernel:%vector-raw-bits dst end)
+                                (ash mask
+                                     ,(ecase sb!c:*backend-byte-order*
+                                             (:little-endian 0)
+                                             (:big-endian `(* (- ,n-elems-per-word extra)
+                                                              ,n-bits-per-elem)))))
+                      (logand (sb!kernel:%vector-raw-bits src end)
+                              (ash mask
+                                   ,(ecase sb!c:*backend-byte-order*
+                                           (:little-endian 0)
+                                           (:big-endian `(* (- ,n-elems-per-word extra)
+                                                            ,n-bits-per-elem)))))))))
+           ;; Copy from the end to save a register.
+           (do ((i end (1- i)))
+               ((<= i ,src-word))
+             (setf (sb!kernel:%vector-raw-bits dst (1- i))
+                   (sb!kernel:%vector-raw-bits src (1- i)))))))))
+
+#.(loop for i = 1 then (* i 2)
+        collect `(deftransform ,(intern (format nil "UB~D-BASH-COPY" i)
+                                        "SB!KERNEL")
+                                                        ((src src-offset
+                                                          dst dst-offset
+                                                          length)
+                                                        ((simple-unboxed-array (*))
+                                                         (constant-arg index)
+                                                         (simple-unboxed-array (*))
+                                                         (constant-arg index)
+                                                         index)
+                                                        *)
+                  (frob-bash-transform src src-offset
+                                       dst dst-offset length
+                                       ,(truncate sb!vm:n-word-bits i))) into forms
+        until (= i sb!vm:n-word-bits)
+        finally (return `(progn ,@forms)))
+
+;;; We expand copy loops inline in SUBSEQ and COPY-SEQ if we're copying
+;;; arrays with elements of size >= the word size.  We do this because
+;;; we know the arrays cannot alias (one was just consed), therefore we
+;;; can determine at compile time the direction to copy, and for
+;;; word-sized elements, UB<WORD-SIZE>-BASH-COPY will do a bit of
+;;; needless checking to figure out what's going on.  The same
+;;; considerations apply if we are copying elements larger than the word
+;;; size, with the additional twist that doing it inline is likely to
+;;; cons far less than calling REPLACE and letting generic code do the
+;;; work.
+;;;
+;;; However, we do not do this for elements whose size is < than the
+;;; word size because we don't want to deal with any alignment issues
+;;; inline.  The UB*-BASH-COPY transforms might fix things up later
+;;; anyway.
+
+(defun maybe-expand-copy-loop-inline (src src-offset dst dst-offset length
+                                      element-type)
+  (let ((saetp (find-saetp element-type)))
+    (aver saetp)
+    (if (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)
+        (expand-aref-copy-loop src src-offset dst dst-offset length)
+        `(locally (declare (optimize (safety 0)))
+           (replace ,dst ,src :start1 ,dst-offset :start2 ,src-offset :end1 ,length)))))
+
+(defun expand-aref-copy-loop (src src-offset dst dst-offset length)
+  (if (eql src-offset dst-offset)
+      `(do ((i (+ ,src-offset ,length) (1- i)))
+           ((<= i ,src-offset))
+         (declare (optimize (insert-array-bounds-checks 0)))
+         (setf (aref ,dst (1- i)) (aref ,src (1- i))))
+      `(do ((i (+ ,src-offset ,length) (1- i))
+            (j (+ ,dst-offset ,length) (1- j)))
+           ((<= i ,src-offset))
+         (declare (optimize (insert-array-bounds-checks 0)))
+         (setf (aref ,dst (1- j)) (aref ,src (1- i))))))
+
+(deftransform subseq ((seq start &optional end)
+                      ((or (simple-unboxed-array (*)) simple-vector) t &optional t)
+                      * :node node)
+  (let ((array-type (lvar-type seq)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let ((element-type (type-specifier (array-type-specialized-element-type array-type))))
+      `(let* ((length (length seq))
+              (end (or end length)))
+         ,(unless (policy node (= safety 0))
+                  '(progn
+                    (unless (<= 0 start end length)
+                      (sb!impl::signal-bounding-indices-bad-error seq start end))))
+         (let* ((size (- end start))
+                (result (make-array size :element-type ',element-type)))
+           ,(maybe-expand-copy-loop-inline 'seq 'start 'result 0 'size element-type)
+           result)))))
+
+(deftransform copy-seq ((seq) ((or (simple-unboxed-array (*)) simple-vector)) *)
+  (let ((array-type (lvar-type seq)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let ((element-type (type-specifier (array-type-specialized-element-type array-type))))
+      `(let* ((length (length seq))
+              (result (make-array length :element-type ',element-type)))
+         ,(maybe-expand-copy-loop-inline 'seq 0 'result 0 'length element-type)
+         result))))
 
 ;;; FIXME: it really should be possible to take advantage of the
 ;;; macros used in code/seq.lisp here to avoid duplication of code,
index 9f60029..3926463 100644 (file)
 \f
 ;;; These vops are useful for accessing the bits of a vector
 ;;; irrespective of what type of vector it is.
-(define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg)
-  unsigned-num %raw-bits)
-(define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
-  unsigned-num %set-raw-bits)
-(define-full-reffer vector-raw-bits * vector-data-offset other-pointer-lowtag
-  (unsigned-reg) unsigned-num %vector-raw-bits)
-(define-full-setter set-vector-raw-bits * vector-data-offset other-pointer-lowtag
-  (unsigned-reg) unsigned-num %set-vector-raw-bits)
+(define-full-reffer+offset raw-bits-with-offset * 0 other-pointer-lowtag (unsigned-reg)
+  unsigned-num %raw-bits-with-offset)
+(define-full-setter+offset set-raw-bits-with-offset * 0 other-pointer-lowtag (unsigned-reg)
+  unsigned-num %set-raw-bits-with-offset)
+
 \f
 ;;;; miscellaneous array VOPs
 
index ae10560..b8ee470 100644 (file)
 ;; Too slow for the interpreter
 #+#.(cl:if (cl:eq sb-ext:*evaluator-mode* :compile) '(and) '(or))
 (loop for i = 1 then (* i 2) do
-     ;; the bare '32' here is fairly arbitrary; '8' provides a good
-     ;; range of lengths over which to fill and copy, which should tease
-     ;; out most errors in the code (if any exist).  (It also makes this
-     ;; part of the test suite finish reasonably quickly.)
-     (assert (test-fill-bashing i 32 8))
-     (assert (test-copy-bashing i 32 8))
+     ;; the bare '13' here is fairly arbitrary, except that it's been
+     ;; reduced from '32', which made the tests take aeons; '8' provides
+     ;; a good range of lengths over which to fill and copy, which
+     ;; should tease out most errors in the code (if any exist).  (It
+     ;; also makes this part of the test suite finish reasonably
+     ;; quickly.)
+     (assert (time (test-fill-bashing i 13 8)))
+     (assert (time (test-copy-bashing i 13 8)))
      until (= i sb-vm:n-word-bits))
+
+(defun test-inlined-bashing (bitsize)
+  ;; We have to compile things separately for each bitsize so the
+  ;; compiler will work out the array type and trigger the REPLACE
+  ;; transform.
+  (let ((lambda-form
+         `(lambda ()
+            (let* ((n-elements-per-word ,(truncate sb-vm:n-word-bits bitsize))
+                   (size (* 3 n-elements-per-word))
+                   (standard-dst (make-array size :element-type '(unsigned-byte ,bitsize)))
+                   (bashed-dst (make-array size :element-type '(unsigned-byte ,bitsize)))
+                   (source (make-array size :element-type '(unsigned-byte ,bitsize))))
+              (declare (type (simple-array (unsigned-byte ,bitsize) (*))
+                             source standard-dst bashed-dst))
+              (do ((i 0 (1+ i))
+                   (offset n-elements-per-word (1+ offset)))
+                  ((>= offset (* 2 n-elements-per-word)) t)
+                (dolist (c (fill-bytes-for-testing ,bitsize))
+                  (fill-with-known-value (mod (lognot c) (ash 1 ,bitsize)) size
+                                         source standard-dst bashed-dst)
+                  ;; fill with test-data
+                  (fill source c :start offset :end (+ offset n-elements-per-word))
+                  ;; copy filled data to test vectors
+                  ;;
+                  ;; a) the slow way (which is actually fast, since this
+                  ;; should be transformed into UB*-BASH-COPY)
+                  (replace standard-dst source
+                           :start1 (- offset n-elements-per-word i)
+                           :start2 (- offset n-elements-per-word i)
+                           :end1 offset :end2 offset)
+                  ;; b) the fast way--we fold the
+                  ;; :START{1,2} arguments above ourselves
+                  ;; to trigger the REPLACE transform
+                  (replace bashed-dst source
+                           :start1 0 :start2 0 :end1 offset :end2 offset)
+                  ;; check for errors
+                  (when (or (mismatch standard-dst bashed-dst)
+                            ;; trigger COPY-SEQ transform
+                            (mismatch (copy-seq standard-dst) bashed-dst)
+                            ;; trigger SUBSEQ transform
+                            (mismatch (subseq standard-dst (- offset n-elements-per-word i))
+                                      bashed-dst))
+                    (format t "Test with target-offset ~A, source-offset ~A, fill ~A, and length ~A failed.~%"
+                            0 0 c offset)
+                    (format t "Mismatch:~% correct ~A~% actual  ~A~%"
+                            standard-dst
+                            bashed-dst)
+                    (return-from nil nil))))))))
+    (funcall (compile nil lambda-form))))
+
+#+#.(cl:if (cl:eq sb-ext:*evaluator-mode* :compile) '(and) '(or))
+(loop for i = 1 then (* i 2) do
+      (assert (test-inlined-bashing i))
+      until (= i sb-vm:n-word-bits))
 \f
 ;;; success
index 1454db7..8c07c97 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.4.54"
+"1.0.4.55"