+;;; Besides, the *-WITH-OFFSET machinery will fold those index
+;;; adjustments in the first version into the array addressing at no
+;;; performance penalty!
+
+;;; This transform is critical to the performance of string streams. If
+;;; you tweak it, make sure that you compare the disassembly, if not the
+;;; performance of, the functions implementing string streams
+;;; (e.g. SB!IMPL::STRING-OUCH).
+(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute)
+ (defun make-replace-transform (saetp sequence-type1 sequence-type2)
+ `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
+ (,sequence-type1 ,sequence-type2 &rest t)
+ ,sequence-type1
+ :node node)
+ `(let* ((len1 (length seq1))
+ (len2 (length seq2))
+ (end1 (or end1 len1))
+ (end2 (or end2 len2))
+ (replace-len (min (- end1 start1) (- end2 start2))))
+ ,(unless (policy node (= insert-array-bounds-checks 0))
+ `(progn
+ (unless (<= 0 start1 end1 len1)
+ (sequence-bounding-indices-bad-error seq1 start1 end1))
+ (unless (<= 0 start2 end2 len2)
+ (sequence-bounding-indices-bad-error seq2 start2 end2))))
+ ,',(cond
+ ((and saetp (sb!vm:valid-bit-bash-saetp-p saetp))
+ (let* ((n-element-bits (sb!vm:saetp-n-bits saetp))
+ (bash-function (intern (format nil "UB~D-BASH-COPY"
+ n-element-bits)
+ (find-package "SB!KERNEL"))))
+ `(funcall (function ,bash-function) seq2 start2
+ seq1 start1 replace-len)))
+ (t
+ `(if (and
+ ;; If the sequence types are different, SEQ1 and
+ ;; SEQ2 must be distinct arrays.
+ ,(eql sequence-type1 sequence-type2)
+ (eq seq1 seq2) (> start1 start2))
+ (do ((i (truly-the index (+ start1 replace-len -1))
+ (1- i))
+ (j (truly-the index (+ start2 replace-len -1))
+ (1- j)))
+ ((< i start1))
+ (declare (optimize (insert-array-bounds-checks 0)))
+ (setf (aref seq1 i) (aref seq2 j)))
+ (do ((i start1 (1+ i))
+ (j start2 (1+ j))
+ (end (+ start1 replace-len)))
+ ((>= i end))
+ (declare (optimize (insert-array-bounds-checks 0)))
+ (setf (aref seq1 i) (aref seq2 j))))))
+ seq1))))
+
+(macrolet
+ ((define-replace-transforms ()
+ (loop for saetp across sb!vm:*specialized-array-element-type-properties*
+ for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*))
+ unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag)
+ collect (make-replace-transform saetp sequence-type sequence-type)
+ into forms
+ finally (return `(progn ,@forms))))
+ (define-one-transform (sequence-type1 sequence-type2)
+ (make-replace-transform nil sequence-type1 sequence-type2)))
+ (define-replace-transforms)
+ #!+sb-unicode
+ (progn
+ (define-one-transform (simple-array base-char (*)) (simple-array character (*)))
+ (define-one-transform (simple-array character (*)) (simple-array base-char (*)))))
+
+;;; Expand simple cases of UB<SIZE>-BASH-COPY inline. "simple" is
+;;; defined as those cases where we are doing word-aligned copies from
+;;; both the source and the destination and we are copying from the same
+;;; offset from both the source and the destination. (The last
+;;; condition is there so we can determine the direction to copy at
+;;; compile time rather than runtime. Remember that UB<SIZE>-BASH-COPY
+;;; acts like memmove, not memcpy.) These conditions may seem rather
+;;; restrictive, but they do catch common cases, like allocating a (* 2
+;;; N)-size buffer and blitting in the old N-size buffer in.
+
+(defun frob-bash-transform (src src-offset
+ dst dst-offset
+ length n-elems-per-word)
+ (declare (ignore src dst length))
+ (let ((n-bits-per-elem (truncate sb!vm:n-word-bits n-elems-per-word)))
+ (multiple-value-bind (src-word src-elt)
+ (truncate (lvar-value src-offset) n-elems-per-word)
+ (multiple-value-bind (dst-word dst-elt)
+ (truncate (lvar-value dst-offset) n-elems-per-word)
+ ;; Avoid non-word aligned copies.
+ (unless (and (zerop src-elt) (zerop dst-elt))
+ (give-up-ir1-transform))
+ ;; Avoid copies where we would have to insert code for
+ ;; determining the direction of copying.
+ (unless (= src-word dst-word)
+ (give-up-ir1-transform))
+ ;; FIXME: The cross-compiler doesn't optimize TRUNCATE properly,
+ ;; so we have to do its work here.
+ `(let ((end (+ ,src-word ,(if (= n-elems-per-word 1)
+ 'length
+ `(truncate (the index length) ,n-elems-per-word)))))
+ (declare (type index end))
+ ;; Handle any bits at the end.
+ (when (logtest length (1- ,n-elems-per-word))
+ (let* ((extra (mod length ,n-elems-per-word))
+ ;; FIXME: The shift amount on this ASH is
+ ;; *always* negative, but the backend doesn't
+ ;; have a NEGATIVE-FIXNUM primitive type, so we
+ ;; wind up with a pile of code that tests the
+ ;; sign of the shift count prior to shifting when
+ ;; all we need is a simple negate and shift
+ ;; right. Yuck.
+ (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
+ (* (- extra ,n-elems-per-word)
+ ,n-bits-per-elem))))
+ (setf (sb!kernel:%vector-raw-bits dst end)
+ (logior
+ (logandc2 (sb!kernel:%vector-raw-bits dst end)
+ (ash mask
+ ,(ecase sb!c:*backend-byte-order*
+ (:little-endian 0)
+ (:big-endian `(* (- ,n-elems-per-word extra)
+ ,n-bits-per-elem)))))
+ (logand (sb!kernel:%vector-raw-bits src end)
+ (ash mask
+ ,(ecase sb!c:*backend-byte-order*
+ (:little-endian 0)
+ (:big-endian `(* (- ,n-elems-per-word extra)
+ ,n-bits-per-elem)))))))))
+ ;; Copy from the end to save a register.
+ (do ((i end (1- i)))
+ ((<= i ,src-word))
+ (setf (sb!kernel:%vector-raw-bits dst (1- i))
+ (sb!kernel:%vector-raw-bits src (1- i))))
+ (values))))))
+
+#.(loop for i = 1 then (* i 2)
+ collect `(deftransform ,(intern (format nil "UB~D-BASH-COPY" i)
+ "SB!KERNEL")
+ ((src src-offset
+ dst dst-offset
+ length)
+ ((simple-unboxed-array (*))
+ (constant-arg index)
+ (simple-unboxed-array (*))
+ (constant-arg index)
+ index)
+ *)
+ (frob-bash-transform src src-offset
+ dst dst-offset length
+ ,(truncate sb!vm:n-word-bits i))) into forms
+ until (= i sb!vm:n-word-bits)
+ finally (return `(progn ,@forms)))
+
+;;; We expand copy loops inline in SUBSEQ and COPY-SEQ if we're copying
+;;; arrays with elements of size >= the word size. We do this because
+;;; we know the arrays cannot alias (one was just consed), therefore we
+;;; can determine at compile time the direction to copy, and for
+;;; word-sized elements, UB<WORD-SIZE>-BASH-COPY will do a bit of
+;;; needless checking to figure out what's going on. The same
+;;; considerations apply if we are copying elements larger than the word
+;;; size, with the additional twist that doing it inline is likely to
+;;; cons far less than calling REPLACE and letting generic code do the
+;;; work.
+;;;
+;;; However, we do not do this for elements whose size is < than the
+;;; word size because we don't want to deal with any alignment issues
+;;; inline. The UB*-BASH-COPY transforms might fix things up later
+;;; anyway.
+
+(defun maybe-expand-copy-loop-inline (src src-offset dst dst-offset length
+ element-type)
+ (let ((saetp (find-saetp element-type)))
+ (aver saetp)
+ (if (>= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits)
+ (expand-aref-copy-loop src src-offset dst dst-offset length)
+ `(locally (declare (optimize (safety 0)))
+ (replace ,dst ,src :start1 ,dst-offset :start2 ,src-offset :end1 ,length)))))
+
+(defun expand-aref-copy-loop (src src-offset dst dst-offset length)
+ (if (eql src-offset dst-offset)
+ `(do ((i (+ ,src-offset ,length) (1- i)))
+ ((<= i ,src-offset))
+ (declare (optimize (insert-array-bounds-checks 0)))
+ (setf (aref ,dst (1- i)) (aref ,src (1- i))))
+ ;; KLUDGE: The compiler is not able to derive that (+ offset
+ ;; length) must be a fixnum, but arrives at (unsigned-byte 29).
+ ;; We, however, know it must be so, as by this point the bounds
+ ;; have already been checked.
+ `(do ((i (truly-the fixnum (+ ,src-offset ,length)) (1- i))
+ (j (+ ,dst-offset ,length) (1- j)))
+ ((<= i ,src-offset))
+ (declare (optimize (insert-array-bounds-checks 0))
+ (type (integer 0 #.sb!xc:array-dimension-limit) j i))
+ (setf (aref ,dst (1- j)) (aref ,src (1- i))))))
+
+;;; SUBSEQ, COPY-SEQ
+
+(deftransform subseq ((seq start &optional end)
+ (vector t &optional t)
+ *
+ :node node)
+ (let ((type (lvar-type seq)))
+ (cond
+ ((and (array-type-p type)
+ (csubtypep type (specifier-type '(or (simple-unboxed-array (*)) simple-vector)))
+ (policy node (> speed space)))
+ (let ((element-type (type-specifier (array-type-specialized-element-type type))))
+ `(let* ((length (length seq))
+ (end (or end length)))
+ ,(unless (policy node (zerop insert-array-bounds-checks))
+ '(progn
+ (unless (<= 0 start end length)
+ (sequence-bounding-indices-bad-error seq start end))))
+ (let* ((size (- end start))
+ (result (make-array size :element-type ',element-type)))
+ ,(maybe-expand-copy-loop-inline 'seq (if (constant-lvar-p start)
+ (lvar-value start)
+ 'start)
+ 'result 0 'size element-type)
+ result))))
+ (t
+ '(vector-subseq* seq start end)))))
+
+(deftransform subseq ((seq start &optional end)
+ (list t &optional t))
+ `(list-subseq* seq start end))
+
+(deftransform subseq ((seq start &optional end)
+ ((and sequence (not vector) (not list)) t &optional t))
+ '(sb!sequence:subseq seq start end))
+
+(deftransform copy-seq ((seq) (vector))
+ (let ((type (lvar-type seq)))
+ (cond ((and (array-type-p type)
+ (csubtypep type (specifier-type '(or (simple-unboxed-array (*)) simple-vector))))
+ (let ((element-type (type-specifier (array-type-specialized-element-type type))))
+ `(let* ((length (length seq))
+ (result (make-array length :element-type ',element-type)))
+ ,(maybe-expand-copy-loop-inline 'seq 0 'result 0 'length element-type)
+ result)))
+ (t
+ '(vector-subseq* seq 0 nil)))))
+
+(deftransform copy-seq ((seq) (list))
+ '(list-copy-seq* seq))
+
+(deftransform copy-seq ((seq) ((and sequence (not vector) (not list))))
+ '(sb!sequence:copy-seq seq))
+
+;;; FIXME: it really should be possible to take advantage of the
+;;; macros used in code/seq.lisp here to avoid duplication of code,
+;;; and enable even funkier transformations.
+(deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2
+ (test #'eql)
+ (key #'identity)
+ from-end)
+ (vector vector &rest t)
+ *
+ :node node
+ :policy (> speed (max space safety)))
+ "open code"
+ (flet ((maybe (x)
+ (when (lvar-p x)
+ (if (constant-lvar-p x)
+ (when (lvar-value x)
+ :yes)
+ :maybe))))
+ (let ((from-end (when (lvar-p from-end)
+ (unless (constant-lvar-p from-end)
+ (give-up-ir1-transform ":FROM-END is not constant."))
+ (lvar-value from-end)))
+ (key? (maybe key))
+ (test? (maybe test))
+ (check-bounds-p (policy node (plusp insert-array-bounds-checks))))
+ `(block search
+ (flet ((oops (vector start end)
+ (sequence-bounding-indices-bad-error vector start end)))
+ (let* ((len1 (length pattern))
+ (len2 (length text))
+ (end1 (or end1 len1))
+ (end2 (or end2 len2))
+ ,@(case key?
+ (:yes `((key (%coerce-callable-to-fun key))))
+ (:maybe `((key (when key
+ (%coerce-callable-to-fun key))))))
+ ,@(when test?
+ `((test (%coerce-callable-to-fun test)))))
+ (declare (type index start1 start2 end1 end2))
+ ,@(when check-bounds-p
+ `((unless (<= start1 end1 len1)
+ (oops pattern start1 end1))
+ (unless (<= start2 end2 len2)
+ (oops pattern start2 end2))))
+ (when (= end1 start1)
+ (return-from search (if from-end
+ end2
+ start2)))
+ (do (,(if from-end
+ '(index2 (- end2 (- end1 start1)) (1- index2))
+ '(index2 start2 (1+ index2))))
+ (,(if from-end
+ '(< index2 start2)
+ '(>= index2 end2))
+ nil)
+ ;; INDEX2 is FIXNUM, not an INDEX, as right before the loop
+ ;; terminates is hits -1 when :FROM-END is true and :START2
+ ;; is 0.
+ (declare (type fixnum index2))
+ (when (do ((index1 start1 (1+ index1))
+ (index2 index2 (1+ index2)))
+ ((>= index1 end1) t)
+ (declare (type index index1 index2)
+ (optimize (insert-array-bounds-checks 0)))
+ ,@(unless from-end
+ '((when (= index2 end2)
+ (return-from search nil))))
+ (unless (,@(if test?
+ `(funcall test)
+ `(eql))
+ ,(case key?
+ (:yes `(funcall key (aref pattern index1)))
+ (:maybe `(let ((elt (aref pattern index1)))
+ (if key
+ (funcall key elt)
+ elt)))
+ (otherwise `(aref pattern index1)))
+ ,(case key?
+ (:yes `(funcall key (aref text index2)))
+ (:maybe `(let ((elt (aref text index2)))
+ (if key
+ (funcall key elt)
+ elt)))
+ (otherwise `(aref text index2))))
+ (return nil)))
+ (return index2)))))))))
+
+
+;;; Open-code CONCATENATE for strings. It would be possible to extend
+;;; this transform to non-strings, but I chose to just do the case that
+;;; should cover 95% of CONCATENATE performance complaints for now.
+;;; -- JES, 2007-11-17
+;;;
+;;; Only handle the simple result type cases. If somebody does (CONCATENATE
+;;; '(STRING 6) ...) their code won't be optimized, but nobody does that in
+;;; practice.
+;;;
+;;; Limit full open coding based on length of constant sequences. Default
+;;; value is chosen so that other parts of the compiler (constraint propagation
+;;; mainly) won't go nonlinear too badly. It's not an exact number -- but
+;;; in the right ballpark.
+(defvar *concatenate-open-code-limit* 129)
+
+(deftransform concatenate ((result-type &rest lvars)
+ ((constant-arg
+ (member string simple-string base-string simple-base-string))
+ &rest sequence)
+ * :node node)
+ (let ((vars (loop for x in lvars collect (gensym)))
+ (type (lvar-value result-type)))
+ (if (policy node (<= speed space))
+ ;; Out-of-line
+ `(lambda (.dummy. ,@vars)
+ (declare (ignore .dummy.))
+ ,(ecase type
+ ((string simple-string)
+ `(%concatenate-to-string ,@vars))
+ ((base-string simple-base-string)
+ `(%concatenate-to-base-string ,@vars))))
+ ;; Inline
+ (let* ((element-type (ecase type
+ ((string simple-string) 'character)
+ ((base-string simple-base-string) 'base-char)))
+ (lvar-values (loop for lvar in lvars
+ collect (when (constant-lvar-p lvar)
+ (lvar-value lvar))))
+ (lengths
+ (loop for value in lvar-values
+ for var in vars
+ collect (if value
+ (length value)
+ `(sb!impl::string-dispatch ((simple-array * (*))
+ sequence)
+ ,var
+ (declare (muffle-conditions compiler-note))
+ (length ,var)))))
+ (non-constant-start
+ (loop for value in lvar-values
+ while (and (stringp value)
+ (< (length value) *concatenate-open-code-limit*))
+ sum (length value))))
+ `(apply
+ (lambda ,vars
+ (declare (ignorable ,@vars))
+ (declare (optimize (insert-array-bounds-checks 0)))
+ (let* ((.length. (+ ,@lengths))
+ (.pos. ,non-constant-start)
+ (.string. (make-string .length. :element-type ',element-type)))
+ (declare (type index .length. .pos.)
+ (muffle-conditions compiler-note))
+ ,@(loop with first-constants = t
+ for first = t then nil
+ for value in lvar-values
+ for var in vars
+ collect
+ (cond ((and (stringp value)
+ (< (length value) *concatenate-open-code-limit*))
+ ;; Fold the array reads for constant arguments
+ `(progn
+ ,@(loop for c across value
+ for i from 0
+ collect
+ ;; Without truly-the we get massive numbers
+ ;; of pointless error traps.
+ `(setf (aref .string.
+ (truly-the index ,(if first-constants
+ i
+ `(+ .pos. ,i))))
+ ,c))
+ ,(unless first-constants
+ `(incf (truly-the index .pos.) ,(length value)))))
+ (t
+ (prog1
+ `(sb!impl::string-dispatch
+ (#!+sb-unicode
+ (simple-array character (*))
+ (simple-array base-char (*))
+ t)
+ ,var
+ (replace .string. ,var
+ ,@(cond ((not first-constants)
+ '(:start1 .pos.))
+ ((plusp non-constant-start)
+ `(:start1 ,non-constant-start))))
+ (incf (truly-the index .pos.) (length ,var)))
+ (setf first-constants nil)))))
+ .string.))
+ lvars)))))