1.0.11.29: Faster CONCATENATE on strings
authorJuho Snellman <jsnell@iki.fi>
Sun, 18 Nov 2007 04:13:27 +0000 (04:13 +0000)
committerJuho Snellman <jsnell@iki.fi>
Sun, 18 Nov 2007 04:13:27 +0000 (04:13 +0000)
        * Add a result-type dependent DEFTRANSFORM which open-codes directly
          to calls to REPLACE.
        * Constant-fold the array reads for constant string arguments
          in the transform (intended as a slezy benchmark trick, but actually
          it looks as if having some literal strings mixed in with
          variables is pretty common in real world CONCATENATE uses).
        * Add transforms for REPLACE on mixed SIMPLE-BASE-STRINGS and
          (SIMPLE-ARRAY CHARACTER (*)) to support.
        * Speeds up a simple benchmark of concatenating two three-character
          strings by a factor of 15, and by a factor of 30 when the strings
          are constant. For a more real-world example, doing DIRECTORY on
          a large set of files speeds up by 25%.

        Also:

        * Fix a broken test (extra close paren) that was uncovered by
          the write-no-partial-fasls change.

NEWS
src/code/primordial-extensions.lisp
src/code/seq.lisp
src/compiler/seqtran.lisp
tests/fopcompiler.impure-cload.lisp
version.lisp-expr

diff --git a/NEWS b/NEWS
index 371216d..22b35fa 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@ changes in sbcl-1.0.12 relative to sbcl-1.0.11:
     concurrent accesses (but not iteration.) See also:
     SB-EXT:WITH-LOCKED-HASH-TABLE, and
     SB-EXT:HASH-TABLE-SYNCHRONIZED-P.
+  * optimization: CONCATENATE on strings is an order of magnitue faster
+    in code compiled with (> SPEED SPACE)
   * bug fix: if file compilation is aborted, the partial fasl is now
     deleted, and COMPILE-FILE returns NIL as the primary value.
   * bug fix: number of thread safety issues relating to SBCL's internal
index 087968c..d9d0d0e 100644 (file)
         (gensym (format nil "~A[~S]" name block-name))
         (gensym name))))
 
+
+;;; Compile a version of BODY for all TYPES, and dispatch to the
+;;; correct one based on the value of VAR. This was originally used
+;;; only for strings, hence the name. Renaming it to something more
+;;; generic might not be a bad idea.
+(defmacro string-dispatch ((&rest types) var &body body)
+  (let ((fun (gensym "STRING-DISPATCH-FUN-")))
+    `(flet ((,fun (,var)
+              ,@body))
+       (declare (inline ,fun))
+       (etypecase ,var
+         ,@(loop for type in types
+                 collect `(,type (,fun (the ,type ,var))))))))
+
 ;;; Automate an idiom often found in macros:
 ;;;   (LET ((FOO (GENSYM "FOO"))
 ;;;         (MAX-INDEX (GENSYM "MAX-INDEX-")))
index ed87d4c..37d333c 100644 (file)
       (vector-search sequence2 sequence1)
       (apply #'sb!sequence:search sequence1 sequence2 args))))
 
-(sb!xc:defmacro string-dispatch ((&rest types) var &body body)
-  (let ((fun (gensym "STRING-DISPATCH-FUN-")))
-    `(flet ((,fun (,var)
-              ,@body))
-       (declare (inline ,fun))
-       (etypecase ,var
-         ,@(loop for type in types
-                 collect `(,type (,fun (the ,type ,var))))))))
-
 ;;; FIXME: this was originally in array.lisp; it might be better to
 ;;; put it back there, and make DOSEQUENCE and SEQ-DISPATCH be in
 ;;; a new early-seq.lisp file.
index 55c4dba..ca0fd04 100644 (file)
 ;;; you tweak it, make sure that you compare the disassembly, if not the
 ;;; performance of, the functions implementing string streams
 ;;; (e.g. SB!IMPL::STRING-OUCH).
+(eval-when (:compile-toplevel :load-toplevel :execute)
+  (defun make-replace-transform (saetp sequence-type1 sequence-type2)
+    `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
+                            (,sequence-type1 ,sequence-type2 &rest t)
+                            ,sequence-type1
+                            :node node)
+       ,(cond
+         ((and saetp (valid-bit-bash-saetp-p saetp)) nil)
+         ;; If the sequence types are different, SEQ1 and SEQ2 must
+         ;; be distinct arrays, and we can open code the copy loop.
+         ((not (eql sequence-type1 sequence-type2)) nil)
+         ;; If we're not bit-bashing, only allow cases where we
+         ;; can determine the order of copying up front.  (There
+         ;; are actually more cases we can handle if we know the
+         ;; amount that we're copying, but this handles the
+         ;; common cases.)
+         (t '(unless (= (constant-value-or-lose start1 0)
+                      (constant-value-or-lose start2 0))
+              (give-up-ir1-transform))))
+       `(let* ((len1 (length seq1))
+               (len2 (length seq2))
+               (end1 (or end1 len1))
+               (end2 (or end2 len2))
+               (replace-len1 (- end1 start1))
+               (replace-len2 (- end2 start2)))
+          ,(unless (policy node (= safety 0))
+             `(progn
+                 (unless (<= 0 start1 end1 len1)
+                   (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
+                 (unless (<= 0 start2 end2 len2)
+                   (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
+          ,',(cond
+              ((and saetp (valid-bit-bash-saetp-p saetp))
+               (let* ((n-element-bits (sb!vm:saetp-n-bits saetp))
+                      (bash-function (intern (format nil "UB~D-BASH-COPY"
+                                                     n-element-bits)
+                                             (find-package "SB!KERNEL"))))
+                 `(funcall (function ,bash-function) seq2 start2
+                           seq1 start1 (min replace-len1 replace-len2))))
+              (t
+               ;; We can expand the loop inline here because we
+               ;; would have given up the transform (see above)
+               ;; if we didn't have constant matching start
+               ;; indices.
+               '(do ((i start1 (1+ i))
+                     (j start2 (1+ j))
+                     (end (+ start1
+                             (min replace-len1 replace-len2))))
+                 ((>= i end))
+                 (declare (optimize (insert-array-bounds-checks 0)))
+                 (setf (aref seq1 i) (aref seq2 j)))))
+          seq1))))
+
 (macrolet
     ((define-replace-transforms ()
        (loop for saetp across sb!vm:*specialized-array-element-type-properties*
              for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*))
              unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag)
-             collect
-            `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2)
-                                    (,sequence-type ,sequence-type &rest t)
-                                    ,sequence-type
-                                    :node node)
-               ,(cond
-                 ((valid-bit-bash-saetp-p saetp) nil)
-                 ;; If we're not bit-bashing, only allow cases where we
-                 ;; can determine the order of copying up front.  (There
-                 ;; are actually more cases we can handle if we know the
-                 ;; amount that we're copying, but this handles the
-                 ;; common cases.)
-                 (t '(unless (= (constant-value-or-lose start1 0)
-                              (constant-value-or-lose start2 0))
-                      (give-up-ir1-transform))))
-               `(let* ((len1 (length seq1))
-                       (len2 (length seq2))
-                       (end1 (or end1 len1))
-                       (end2 (or end2 len2))
-                       (replace-len1 (- end1 start1))
-                       (replace-len2 (- end2 start2)))
-                  ,(unless (policy node (= safety 0))
-                           `(progn
-                              (unless (<= 0 start1 end1 len1)
-                                (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1))
-                              (unless (<= 0 start2 end2 len2)
-                                (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2))))
-                  ,',(cond
-                      ((valid-bit-bash-saetp-p saetp)
-                       (let* ((n-element-bits (sb!vm:saetp-n-bits saetp))
-                              (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits)
-                                                     (find-package "SB!KERNEL"))))
-                         `(funcall (function ,bash-function) seq2 start2
-                                   seq1 start1 (min replace-len1 replace-len2))))
-                      (t
-                       ;; We can expand the loop inline here because we
-                       ;; would have given up the transform (see above)
-                       ;; if we didn't have constant matching start
-                       ;; indices.
-                       '(do ((i start1 (1+ i))
-                             (end (+ start1
-                                     (min replace-len1 replace-len2))))
-                         ((>= i end))
-                         (declare (optimize (insert-array-bounds-checks 0)))
-                         (setf (aref seq1 i) (aref seq2 i)))))
-                  seq1))
+             collect (make-replace-transform saetp sequence-type sequence-type)
              into forms
-             finally (return `(progn ,@forms)))))
-  (define-replace-transforms))
+             finally (return `(progn ,@forms))))
+     (define-one-transform (sequence-type1 sequence-type2)
+       (make-replace-transform nil sequence-type1 sequence-type2)))
+  (define-one-transform simple-base-string (simple-array character (*)))
+  (define-one-transform (simple-array character (*)) simple-base-string))
 
 ;;; Expand simple cases of UB<SIZE>-BASH-COPY inline.  "simple" is
 ;;; defined as those cases where we are doing word-aligned copies from
                      (return nil)))
              (return index2)))))))
 
-;;; FIXME: It seems as though it should be possible to make a DEFUN
-;;; %CONCATENATE (with a DEFTRANSFORM to translate constant RTYPE to
-;;; CTYPE before calling %CONCATENATE) which is comparably efficient,
-;;; at least once DYNAMIC-EXTENT works.
-;;;
-;;; FIXME: currently KLUDGEed because of bug 188
-;;;
-;;; FIXME: disabled for sb-unicode: probably want it back
-#!-sb-unicode
-(deftransform concatenate ((rtype &rest sequences)
-                           (t &rest (or simple-base-string
-                                        (simple-array nil (*))))
-                           simple-base-string
-                           :policy (< safety 3))
-  (loop for rest-seqs on sequences
-        for n-seq = (gensym "N-SEQ")
-        for n-length = (gensym "N-LENGTH")
-        for start = 0 then next-start
-        for next-start = (gensym "NEXT-START")
-        collect n-seq into args
-        collect `(,n-length (length ,n-seq)) into lets
-        collect n-length into all-lengths
-        collect next-start into starts
-        collect `(if (and (typep ,n-seq '(simple-array nil (*)))
-                          (> ,n-length 0))
-                     (error 'nil-array-accessed-error)
-                     (#.(let* ((i (position 'character sb!kernel::*specialized-array-element-types*))
-                               (saetp (aref sb!vm:*specialized-array-element-type-properties* i))
-                               (n-bits (sb!vm:saetp-n-bits saetp)))
-                          (intern (format nil "UB~D-BASH-COPY" n-bits)
-                                  "SB!KERNEL"))
-                        ,n-seq 0 res ,start ,n-length))
-                into forms
-        collect `(setq ,next-start (+ ,start ,n-length)) into forms
-        finally
-        (return
-          `(lambda (rtype ,@args)
-             (declare (ignore rtype))
-             (let* (,@lets
-                    (res (make-string (the index (+ ,@all-lengths))
-                                      :element-type 'base-char)))
-               (declare (type index ,@all-lengths))
-               (let (,@(mapcar (lambda (name) `(,name 0)) starts))
-                 (declare (type index ,@starts))
-                 ,@forms)
-               res)))))
+
+;;; Open-code CONCATENATE for strings. It would be possible to extend
+;;; this transform to non-strings, but I chose to just do the case that
+;;; should cover 95% of CONCATENATE performance complaints for now.
+;;;   -- JES, 2007-11-17
+(deftransform concatenate ((result-type &rest lvars)
+                           (symbol &rest sequence)
+                           *
+                           :policy (> speed space))
+  (unless (constant-lvar-p result-type)
+    (give-up-ir1-transform))
+  (let* ((element-type (let ((type (lvar-value result-type)))
+                         ;; Only handle the simple result type cases. If
+                         ;; somebody does (CONCATENATE '(STRING 6) ...)
+                         ;; their code won't be optimized, but nobody does
+                         ;; that in practice.
+                         (case type
+                           ((string simple-string) 'character)
+                           ((base-string simple-base-string) 'base-char)
+                           (t (give-up-ir1-transform)))))
+         (vars (loop for x in lvars collect (gensym)))
+         (lvar-values (loop for lvar in lvars
+                            collect (when (constant-lvar-p lvar)
+                                      (lvar-value lvar))))
+         (lengths
+          (loop for value in lvar-values
+                for var in vars
+                collect (if value
+                            (length value)
+                            `(sb!impl::string-dispatch ((simple-array * (*))
+                                                        sequence)
+                                 ,var
+                               (declare (muffle-conditions compiler-note))
+                               (length ,var))))))
+    `(apply
+      (lambda ,vars
+        (declare (ignorable ,@vars))
+        (let* ((.length. (+ ,@lengths))
+               (.pos. 0)
+               (.string. (make-string .length. :element-type ',element-type)))
+          (declare (type index .length. .pos.)
+                   (muffle-conditions compiler-note))
+          ,@(loop for value in lvar-values
+                  for var in vars
+                  collect (if (stringp value)
+                              ;; Fold the array reads for constant arguments
+                              `(progn
+                                 ,@(loop for c across value
+                                         collect `(setf (aref .string.
+                                                              .pos.) ,c)
+                                         collect `(incf .pos.)))
+                              `(sb!impl::string-dispatch
+                                   (#!+sb-unicode
+                                    (simple-array character (*))
+                                    (simple-array base-char (*))
+                                    t)
+                                   ,var
+                                 (replace .string. ,var :start1 .pos.)
+                                 (incf .pos. (length ,var)))))
+          .string.))
+      lvars)))
 \f
 ;;;; CONS accessor DERIVE-TYPE optimizers
 
index 1e05ee6..25fc8e7 100644 (file)
@@ -92,5 +92,5 @@
 (symbol-macrolet ((foo 1))
   (let* ((x (bar (foo)))
          (y (bar (x foo))))
-    (bar (y x foo)))))
+    (bar (y x foo))))
 
index 404f2fe..4cff566 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.11.28"
+"1.0.11.29"