1.0.5.3: compiling a SUBSEQ on a SIMPLE-VECTOR should not give notes
[sbcl.git] / src / compiler / seqtran.lisp
index 323dcad..2699964 100644 (file)
                                     'list)
                                    (t
                                     (give-up-ir1-transform
-                                     "can't determine result type")))))
+                                     "result type unsuitable")))))
       (cond ((and result-type-value (null seqs))
              ;; The consing arity-1 cases can be implemented
              ;; reasonably efficiently as function calls, and the cost
   (def string/=* identity))
 
 \f
-;;;; string-only transforms for sequence functions
-;;;;
-;;;; Note: CMU CL had more of these, including transforms for
-;;;; functions which cons. In SBCL, we've gotten rid of most of the
-;;;; transforms for functions which cons, since our GC overhead is
-;;;; sufficiently large that it doesn't seem worth it to try to
-;;;; economize on function call overhead or on the overhead of runtime
-;;;; type dispatch in AREF. The exception is CONCATENATE, since
-;;;; a full call to CONCATENATE would have to look up the sequence
-;;;; type, which can be really slow.
-
-;;; Moved here from generic/vm-tran.lisp to satisfy clisp
-;;;
-;;; FIXME: Add a comment telling whether this holds for all vectors
-;;; or only for vectors based on simple arrays (non-adjustable, etc.).
+;;;; transforms for sequence functions
+
+;;; Moved here from generic/vm-tran.lisp to satisfy clisp.  Only applies
+;;; to vectors based on simple arrays.
 (def!constant vector-data-bit-offset
   (* sb!vm:vector-data-offset sb!vm:n-word-bits))
 
        (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)))
 ) ; EVAL-WHEN
 
-;; FIXME: It turns out that this transform (for SIMPLE-BASE-STRINGS)
-;; is critical for the performance of string streams.  Make this
-;; more explicit.
+;;; FIXME: In the copy loops below, we code the loops in a strange
+;;; fashion:
+;;;
+;;; (do ((i (+ src-offset length) (1- i)))
+;;;     ((<= i 0) ...)
+;;;   (... (aref foo (1- i)) ...))
+;;;
+;;; rather than the more natural (and seemingly more efficient):
+;;;
+;;; (do ((i (1- (+ src-offset length)) (1- i)))
+;;;     ((< i 0) ...)
+;;;   (... (aref foo i) ...))
+;;;
+;;; (more efficient because we don't have to do the index adjusting on
+;;; every iteration of the loop)
+;;;
+;;; We do this to avoid a suboptimality in SBCL's backend.  In the
+;;; latter case, the backend thinks I is a FIXNUM (which it is), but
+;;; when used as an array index, the backend thinks I is a
+;;; POSITIVE-FIXNUM (which it is).  However, since the backend thinks of
+;;; these as distinct storage classes, it cannot coerce a move from a
+;;; FIXNUM TN to a POSITIVE-FIXNUM TN.  The practical effect of this
+;;; deficiency is that we have two extra moves and increased register
+;;; pressure, which can lead to some spectacularly bad register
+;;; allocation.  (sub-FIXME: the register allocation even with the
+;;; strangely written loops is not always excellent, either...).  Doing
+;;; it the first way, above, means that I is always thought of as a
+;;; POSITIVE-FIXNUM and there are no issues.
+;;;
+;;; Besides, the *-WITH-OFFSET machinery will fold those index
+;;; adjustments in the first version into the array addressing at no
+;;; performance penalty!
+
+;;; This transform is critical to the performance of string streams.  If
+;;; you tweak it, make sure that you compare the disassembly, if not the
+;;; performance of, the functions implementing string streams
+;;; (e.g. SB!IMPL::STRING-OUCH).
 (macrolet
     ((define-replace-transforms ()
        (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
+             for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*))
+             unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag)
              collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                            (find-package "SB!KERNEL"))))
-               `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
-                                       (,sequence-type ,sequence-type &rest t)
-                                       ,sequence-type
-                                       :node node)
-                 `(let* ((len1 (length seq1))
-                         (len2 (length seq2))
-                         (end1 (or end1 len1))
-                         (end2 (or end2 len2))
-                         (replace-len1 (- end1 start1))
-                         (replace-len2 (- end2 start2)))
-                   ,(unless (policy node (= safety 0))
-                     `(progn
-                       (unless (<= 0 start1 end1 len1)
-                         (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
-                       (unless (<= 0 start2 end2 len2)
-                         (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
-                   (funcall (function ,',bash-function)
-                    seq2 start2
-                    seq1 start1
-                    (min replace-len1 replace-len2))
-                   seq1)))
+            `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
+                                    (,sequence-type ,sequence-type &rest t)
+                                    ,sequence-type
+                                    :node node)
+               ,(cond
+                 ((valid-bit-bash-saetp-p saetp) nil)
+                 ;; If we're not bit-bashing, only allow cases where we
+                 ;; can determine the order of copying up front.  (There
+                 ;; are actually more cases we can handle if we know the
+                 ;; amount that we're copying, but this handles the
+                 ;; common cases.)
+                 (t '(unless (= (constant-value-or-lose start1 0)
+                              (constant-value-or-lose start2 0))
+                      (give-up-ir1-transform))))
+               `(let* ((len1 (length seq1))
+                       (len2 (length seq2))
+                       (end1 (or end1 len1))
+                       (end2 (or end2 len2))
+                       (replace-len1 (- end1 start1))
+                       (replace-len2 (- end2 start2)))
+                  ,(unless (policy node (= safety 0))
+                           `(progn
+                              (unless (<= 0 start1 end1 len1)
+                                (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
+                              (unless (<= 0 start2 end2 len2)
+                                (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
+                  ,',(cond
+                      ((valid-bit-bash-saetp-p saetp)
+                       (let* ((n-element-bits (sb!vm:saetp-n-bits saetp))
+                              (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
+                                                     (find-package "SB!KERNEL"))))
+                         `(funcall (function ,bash-function) seq2 start2
+                                   seq1 start1 (min replace-len1 replace-len2))))
+                      (t
+                       ;; We can expand the loop inline here because we
+                       ;; would have given up the transform (see above)
+                       ;; if we didn't have constant matching start
+                       ;; indices.
+                       '(do ((i start1 (1+ i))
+                             (end (+ start1
+                                     (min replace-len1 replace-len2))))
+                         ((>= i end))
+                         (declare (optimize (insert-array-bounds-checks 0)))
+                         (setf (aref seq1 i) (aref seq2 i)))))
+                  seq1))
              into forms
              finally (return `(progn ,@forms)))))
   (define-replace-transforms))
 
-(macrolet
-    ((define-subseq-transforms ()
-       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
-             collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                           (find-package "SB!KERNEL"))))
-               `(deftransform subseq ((seq start &optional end)
-                                      (,sequence-type t &optional t)
-                                      ,sequence-type :node node)
-                 `(let* ((length (length seq))
-                         (end (if end (min end length) length)))
-                   ,(unless (policy node (= safety 0))
-                     `(progn
-                       (unless (<= 0 start end length)
-                         (sb!impl::signal-bounding-indices-bad-error seq start end))))
-                   (let* ((size (- end start))
-                          (result (make-array size :element-type ',',(sb!vm:saetp-specifier saetp))))
-                     (funcall (function ,',bash-function)
-                              seq start result 0 size)
-                     result))))
-             into forms
-             finally (return `(progn ,@forms)))))
-  (define-subseq-transforms))
-
-(macrolet
-    ((define-copy-seq-transforms ()
-       (loop for saetp across sb!vm:*specialized-array-element-type-properties*
-             when (valid-bit-bash-saetp-p saetp)
-             collect
-             (let* ((sequence-type `(simple-array ,(sb!vm:saetp-specifier saetp) (*)))
-                    (n-element-bits (sb!vm:saetp-n-bits saetp))
-                    (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                           (find-package "SB!KERNEL"))))
-               `(deftransform copy-seq ((seq) (,sequence-type)
-                                        ,sequence-type)
-                 `(let* ((length (length seq))
-                         (result (make-array length :element-type ',',(sb!vm:saetp-specifier saetp))))
-                   (funcall (function ,',bash-function)
-                    seq 0 result 0 length)
-                   result)))
-             into forms
-             finally (return `(progn ,@forms)))))
-  (define-copy-seq-transforms))
-
-;;; FIXME: this would be a valid transform for certain excluded cases:
-;;;   * :TEST 'CHAR= or :TEST #'CHAR=
-;;;   * :TEST 'EQL   or :TEST #'EQL
-;;;   * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity)
-(deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2)
-                      (simple-string simple-string &rest t)
+;;; Expand simple cases of UB<SIZE>-BASH-COPY inline.  "simple" is
+;;; defined as those cases where we are doing word-aligned copies from
+;;; both the source and the destination and we are copying from the same
+;;; offset from both the source and the destination.  (The last
+;;; condition is there so we can determine the direction to copy at
+;;; compile time rather than runtime.  Remember that UB<SIZE>-BASH-COPY
+;;; acts like memmove, not memcpy.)  These conditions may seem rather
+;;; restrictive, but they do catch common cases, like allocating a (* 2
+;;; N)-size buffer and blitting in the old N-size buffer in.
+
+(defun frob-bash-transform (src src-offset
+                            dst dst-offset
+                            length n-elems-per-word)
+  (declare (ignore src dst length))
+  (let ((n-bits-per-elem (truncate sb!vm:n-word-bits n-elems-per-word)))
+    (multiple-value-bind (src-word src-elt)
+        (truncate (lvar-value src-offset) n-elems-per-word)
+      (multiple-value-bind (dst-word dst-elt)
+          (truncate (lvar-value dst-offset) n-elems-per-word)
+        ;; Avoid non-word aligned copies.
+        (unless (and (zerop src-elt) (zerop dst-elt))
+          (give-up-ir1-transform))
+        ;; Avoid copies where we would have to insert code for
+        ;; determining the direction of copying.
+        (unless (= src-word dst-word)
+          (give-up-ir1-transform))
+        ;; FIXME: The cross-compiler doesn't optimize TRUNCATE properly,
+        ;; so we have to do its work here.
+        `(let ((end (+ ,src-word ,(if (= n-elems-per-word 1)
+                                      'length
+                                      `(truncate (the index length) ,n-elems-per-word)))))
+           (declare (type index end))
+           ;; Handle any bits at the end.
+           (when (logtest length (1- ,n-elems-per-word))
+             (let* ((extra (mod length ,n-elems-per-word))
+                    ;; FIXME: The shift amount on this ASH is
+                    ;; *always* negative, but the backend doesn't
+                    ;; have a NEGATIVE-FIXNUM primitive type, so we
+                    ;; wind up with a pile of code that tests the
+                    ;; sign of the shift count prior to shifting when
+                    ;; all we need is a simple negate and shift
+                    ;; right.  Yuck.
+                    (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+                               (* (- extra ,n-elems-per-word)
+                                  ,n-bits-per-elem))))
+               (setf (sb!kernel:%vector-raw-bits dst end)
+                     (logior
+                      (logandc2 (sb!kernel:%vector-raw-bits dst end)
+                                (ash mask
+                                     ,(ecase sb!c:*backend-byte-order*
+                                             (:little-endian 0)
+                                             (:big-endian `(* (- ,n-elems-per-word extra)
+                                                              ,n-bits-per-elem)))))
+                      (logand (sb!kernel:%vector-raw-bits src end)
+                              (ash mask
+                                   ,(ecase sb!c:*backend-byte-order*
+                                           (:little-endian 0)
+                                           (:big-endian `(* (- ,n-elems-per-word extra)
+                                                            ,n-bits-per-elem)))))))))
+           ;; Copy from the end to save a register.
+           (do ((i end (1- i)))
+               ((<= i ,src-word))
+             (setf (sb!kernel:%vector-raw-bits dst (1- i))
+                   (sb!kernel:%vector-raw-bits src (1- i)))))))))
+
+#.(loop for i = 1 then (* i 2)
+        collect `(deftransform ,(intern (format nil "UB~D-BASH-COPY" i)
+                                        "SB!KERNEL")
+                                                        ((src src-offset
+                                                          dst dst-offset
+                                                          length)
+                                                        ((simple-unboxed-array (*))
+                                                         (constant-arg index)
+                                                         (simple-unboxed-array (*))
+                                                         (constant-arg index)
+                                                         index)
+                                                        *)
+                  (frob-bash-transform src src-offset
+                                       dst dst-offset length
+                                       ,(truncate sb!vm:n-word-bits i))) into forms
+        until (= i sb!vm:n-word-bits)
+        finally (return `(progn ,@forms)))
+
+;;; We expand copy loops inline in SUBSEQ and COPY-SEQ if we're copying
+;;; arrays with elements of size >= the word size.  We do this because
+;;; we know the arrays cannot alias (one was just consed), therefore we
+;;; can determine at compile time the direction to copy, and for
+;;; word-sized elements, UB<WORD-SIZE>-BASH-COPY will do a bit of
+;;; needless checking to figure out what's going on.  The same
+;;; considerations apply if we are copying elements larger than the word
+;;; size, with the additional twist that doing it inline is likely to
+;;; cons far less than calling REPLACE and letting generic code do the
+;;; work.
+;;;
+;;; However, we do not do this for elements whose size is < than the
+;;; word size because we don't want to deal with any alignment issues
+;;; inline.  The UB*-BASH-COPY transforms might fix things up later
+;;; anyway.
+
+(defun maybe-expand-copy-loop-inline (src src-offset dst dst-offset length
+                                      element-type)
+  (let ((saetp (find-saetp element-type)))
+    (aver saetp)
+    (if (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)
+        (expand-aref-copy-loop src src-offset dst dst-offset length)
+        `(locally (declare (optimize (safety 0)))
+           (replace ,dst ,src :start1 ,dst-offset :start2 ,src-offset :end1 ,length)))))
+
+(defun expand-aref-copy-loop (src src-offset dst dst-offset length)
+  (if (eql src-offset dst-offset)
+      `(do ((i (+ ,src-offset ,length) (1- i)))
+           ((<= i ,src-offset))
+         (declare (optimize (insert-array-bounds-checks 0)))
+         (setf (aref ,dst (1- i)) (aref ,src (1- i))))
+      ;; KLUDGE: The compiler is not able to derive that (+ offset
+      ;; length) must be a fixnum, but arrives at (unsigned-byte 29).
+      ;; We, however, know it must be so, as by this point the bounds
+      ;; have already been checked.
+      `(do ((i (truly-the fixnum (+ ,src-offset ,length)) (1- i))
+            (j (+ ,dst-offset ,length) (1- j)))
+           ((<= i ,src-offset))
+         (declare (optimize (insert-array-bounds-checks 0))
+                  (type (integer 0 #.sb!xc:array-dimension-limit) j i))
+         (setf (aref ,dst (1- j)) (aref ,src (1- i))))))
+
+(deftransform subseq ((seq start &optional end)
+                      ((or (simple-unboxed-array (*)) simple-vector) t &optional t)
+                      * :node node)
+  (let ((array-type (lvar-type seq)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let ((element-type (type-specifier (array-type-specialized-element-type array-type))))
+      `(let* ((length (length seq))
+              (end (or end length)))
+         ,(unless (policy node (= safety 0))
+                  '(progn
+                    (unless (<= 0 start end length)
+                      (sb!impl::signal-bounding-indices-bad-error seq start end))))
+         (let* ((size (- end start))
+                (result (make-array size :element-type ',element-type)))
+           ,(maybe-expand-copy-loop-inline 'seq 'start 'result 0 'size element-type)
+           result)))))
+
+(deftransform copy-seq ((seq) ((or (simple-unboxed-array (*)) simple-vector)) *)
+  (let ((array-type (lvar-type seq)))
+    (unless (array-type-p array-type)
+      (give-up-ir1-transform))
+    (let ((element-type (type-specifier (array-type-specialized-element-type array-type))))
+      `(let* ((length (length seq))
+              (result (make-array length :element-type ',element-type)))
+         ,(maybe-expand-copy-loop-inline 'seq 0 'result 0 'length element-type)
+         result))))
+
+;;; FIXME: it really should be possible to take advantage of the
+;;; macros used in code/seq.lisp here to avoid duplication of code,
+;;; and enable even funkier transformations.
+(deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2
+                               (test #'eql)
+                               (key #'identity)
+                               from-end)
+                      (vector vector &rest t)
                       *
                       :policy (> speed (max space safety)))
-  `(block search
-    (let ((end1 (or end1 (length pattern)))
-          (end2 (or end2 (length text))))
-      (do ((index2 start2 (1+ index2)))
-          ((>= index2 end2) nil)
-        (when (do ((index1 start1 (1+ index1))
-                   (index2 index2 (1+ index2)))
-                  ((>= index1 end1) t)
-                (when (= index2 end2)
-                  (return-from search nil))
-                (when (char/= (char pattern index1) (char text index2))
-                  (return nil)))
-          (return index2))))))
+  "open code"
+  (let ((from-end (when (lvar-p from-end)
+                    (unless (constant-lvar-p from-end)
+                      (give-up-ir1-transform ":FROM-END is not constant."))
+                    (lvar-value from-end)))
+        (keyp (lvar-p key))
+        (testp (lvar-p test)))
+    `(block search
+       (let ((end1 (or end1 (length pattern)))
+             (end2 (or end2 (length text)))
+             ,@(when keyp
+                     '((key (coerce key 'function))))
+             ,@(when testp
+                     '((test (coerce test 'function)))))
+         (declare (type index start1 start2 end1 end2))
+         (do (,(if from-end
+                   '(index2 (- end2 (- end1 start1)) (1- index2))
+                   '(index2 start2 (1+ index2))))
+             (,(if from-end
+                   '(< index2 start2)
+                   '(>= index2 end2))
+              nil)
+           ;; INDEX2 is FIXNUM, not an INDEX, as right before the loop
+           ;; terminates is hits -1 when :FROM-END is true and :START2
+           ;; is 0.
+           (declare (type fixnum index2))
+           (when (do ((index1 start1 (1+ index1))
+                      (index2 index2 (1+ index2)))
+                     ((>= index1 end1) t)
+                   (declare (type index index1 index2))
+                   ,@(unless from-end
+                             '((when (= index2 end2)
+                                 (return-from search nil))))
+                   (unless (,@(if testp
+                                  '(funcall test)
+                                  '(eql))
+                              ,(if keyp
+                                   '(funcall key (aref pattern index1))
+                                   '(aref pattern index1))
+                              ,(if keyp
+                                   '(funcall key (aref text index2))
+                                   '(aref text index2)))
+                     (return nil)))
+             (return index2)))))))
 
 ;;; FIXME: It seems as though it should be possible to make a DEFUN
 ;;; %CONCATENATE (with a DEFTRANSFORM to translate constant RTYPE to
 (macrolet ((define-find-position (fun-name values-index)
              `(deftransform ,fun-name ((item sequence &key
                                              from-end (start 0) end
-                                             key test test-not))
+                                             key test test-not)
+                                       (t (or list vector) &rest t))
                 '(nth-value ,values-index
                             (%find-position item sequence
                                             from-end start
 (macrolet ((define-find-position-if (fun-name values-index)
              `(deftransform ,fun-name ((predicate sequence &key
                                                   from-end (start 0)
-                                                  end key))
+                                                  end key)
+                                       (t (or list vector) &rest t))
                 '(nth-value
                   ,values-index
                   (%find-position-if (%coerce-callable-to-fun predicate)
 (macrolet ((define-find-position-if-not (fun-name values-index)
                `(deftransform ,fun-name ((predicate sequence &key
                                           from-end (start 0)
-                                          end key))
+                                          end key)
+                                         (t (or list vector) &rest t))
                  '(nth-value
                    ,values-index
                    (%find-position-if-not (%coerce-callable-to-fun predicate)