From 260a9146f02374a9cfbd9deb53283ee493f3729f Mon Sep 17 00:00:00 2001 From: Juho Snellman Date: Sun, 18 Nov 2007 04:13:27 +0000 Subject: [PATCH] 1.0.11.29: Faster CONCATENATE on strings * Add a result-type dependent DEFTRANSFORM which open-codes directly to calls to REPLACE. * Constant-fold the array reads for constant string arguments in the transform (intended as a slezy benchmark trick, but actually it looks as if having some literal strings mixed in with variables is pretty common in real world CONCATENATE uses). * Add transforms for REPLACE on mixed SIMPLE-BASE-STRINGS and (SIMPLE-ARRAY CHARACTER (*)) to support. * Speeds up a simple benchmark of concatenating two three-character strings by a factor of 15, and by a factor of 30 when the strings are constant. For a more real-world example, doing DIRECTORY on a large set of files speeds up by 25%. Also: * Fix a broken test (extra close paren) that was uncovered by the write-no-partial-fasls change. --- NEWS | 2 + src/code/primordial-extensions.lisp | 14 +++ src/code/seq.lisp | 9 -- src/compiler/seqtran.lisp | 214 ++++++++++++++++++++--------------- tests/fopcompiler.impure-cload.lisp | 2 +- version.lisp-expr | 2 +- 6 files changed, 138 insertions(+), 105 deletions(-) diff --git a/NEWS b/NEWS index 371216d..22b35fa 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ changes in sbcl-1.0.12 relative to sbcl-1.0.11: concurrent accesses (but not iteration.) See also: SB-EXT:WITH-LOCKED-HASH-TABLE, and SB-EXT:HASH-TABLE-SYNCHRONIZED-P. + * optimization: CONCATENATE on strings is an order of magnitue faster + in code compiled with (> SPEED SPACE) * bug fix: if file compilation is aborted, the partial fasl is now deleted, and COMPILE-FILE returns NIL as the primary value. * bug fix: number of thread safety issues relating to SBCL's internal diff --git a/src/code/primordial-extensions.lisp b/src/code/primordial-extensions.lisp index 087968c..d9d0d0e 100644 --- a/src/code/primordial-extensions.lisp +++ b/src/code/primordial-extensions.lisp @@ -134,6 +134,20 @@ (gensym (format nil "~A[~S]" name block-name)) (gensym name)))) + +;;; Compile a version of BODY for all TYPES, and dispatch to the +;;; correct one based on the value of VAR. This was originally used +;;; only for strings, hence the name. Renaming it to something more +;;; generic might not be a bad idea. +(defmacro string-dispatch ((&rest types) var &body body) + (let ((fun (gensym "STRING-DISPATCH-FUN-"))) + `(flet ((,fun (,var) + ,@body)) + (declare (inline ,fun)) + (etypecase ,var + ,@(loop for type in types + collect `(,type (,fun (the ,type ,var)))))))) + ;;; Automate an idiom often found in macros: ;;; (LET ((FOO (GENSYM "FOO")) ;;; (MAX-INDEX (GENSYM "MAX-INDEX-"))) diff --git a/src/code/seq.lisp b/src/code/seq.lisp index ed87d4c..37d333c 100644 --- a/src/code/seq.lisp +++ b/src/code/seq.lisp @@ -2520,15 +2520,6 @@ (vector-search sequence2 sequence1) (apply #'sb!sequence:search sequence1 sequence2 args)))) -(sb!xc:defmacro string-dispatch ((&rest types) var &body body) - (let ((fun (gensym "STRING-DISPATCH-FUN-"))) - `(flet ((,fun (,var) - ,@body)) - (declare (inline ,fun)) - (etypecase ,var - ,@(loop for type in types - collect `(,type (,fun (the ,type ,var)))))))) - ;;; FIXME: this was originally in array.lisp; it might be better to ;;; put it back there, and make DOSEQUENCE and SEQ-DISPATCH be in ;;; a new early-seq.lisp file. diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index 55c4dba..ca0fd04 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -588,60 +588,71 @@ ;;; you tweak it, make sure that you compare the disassembly, if not the ;;; performance of, the functions implementing string streams ;;; (e.g. SB!IMPL::STRING-OUCH). +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun make-replace-transform (saetp sequence-type1 sequence-type2) + `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2) + (,sequence-type1 ,sequence-type2 &rest t) + ,sequence-type1 + :node node) + ,(cond + ((and saetp (valid-bit-bash-saetp-p saetp)) nil) + ;; If the sequence types are different, SEQ1 and SEQ2 must + ;; be distinct arrays, and we can open code the copy loop. + ((not (eql sequence-type1 sequence-type2)) nil) + ;; If we're not bit-bashing, only allow cases where we + ;; can determine the order of copying up front. (There + ;; are actually more cases we can handle if we know the + ;; amount that we're copying, but this handles the + ;; common cases.) + (t '(unless (= (constant-value-or-lose start1 0) + (constant-value-or-lose start2 0)) + (give-up-ir1-transform)))) + `(let* ((len1 (length seq1)) + (len2 (length seq2)) + (end1 (or end1 len1)) + (end2 (or end2 len2)) + (replace-len1 (- end1 start1)) + (replace-len2 (- end2 start2))) + ,(unless (policy node (= safety 0)) + `(progn + (unless (<= 0 start1 end1 len1) + (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1)) + (unless (<= 0 start2 end2 len2) + (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2)))) + ,',(cond + ((and saetp (valid-bit-bash-saetp-p saetp)) + (let* ((n-element-bits (sb!vm:saetp-n-bits saetp)) + (bash-function (intern (format nil "UB~D-BASH-COPY" + n-element-bits) + (find-package "SB!KERNEL")))) + `(funcall (function ,bash-function) seq2 start2 + seq1 start1 (min replace-len1 replace-len2)))) + (t + ;; We can expand the loop inline here because we + ;; would have given up the transform (see above) + ;; if we didn't have constant matching start + ;; indices. + '(do ((i start1 (1+ i)) + (j start2 (1+ j)) + (end (+ start1 + (min replace-len1 replace-len2)))) + ((>= i end)) + (declare (optimize (insert-array-bounds-checks 0))) + (setf (aref seq1 i) (aref seq2 j))))) + seq1)))) + (macrolet ((define-replace-transforms () (loop for saetp across sb!vm:*specialized-array-element-type-properties* for sequence-type = `(simple-array ,(sb!vm:saetp-specifier saetp) (*)) unless (= (sb!vm:saetp-typecode saetp) sb!vm::simple-array-nil-widetag) - collect - `(deftransform replace ((seq1 seq2 &key (start1 0) (start2 0) end1 end2) - (,sequence-type ,sequence-type &rest t) - ,sequence-type - :node node) - ,(cond - ((valid-bit-bash-saetp-p saetp) nil) - ;; If we're not bit-bashing, only allow cases where we - ;; can determine the order of copying up front. (There - ;; are actually more cases we can handle if we know the - ;; amount that we're copying, but this handles the - ;; common cases.) - (t '(unless (= (constant-value-or-lose start1 0) - (constant-value-or-lose start2 0)) - (give-up-ir1-transform)))) - `(let* ((len1 (length seq1)) - (len2 (length seq2)) - (end1 (or end1 len1)) - (end2 (or end2 len2)) - (replace-len1 (- end1 start1)) - (replace-len2 (- end2 start2))) - ,(unless (policy node (= safety 0)) - `(progn - (unless (<= 0 start1 end1 len1) - (sb!impl::signal-bounding-indices-bad-error seq1 start1 end1)) - (unless (<= 0 start2 end2 len2) - (sb!impl::signal-bounding-indices-bad-error seq2 start2 end2)))) - ,',(cond - ((valid-bit-bash-saetp-p saetp) - (let* ((n-element-bits (sb!vm:saetp-n-bits saetp)) - (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) - (find-package "SB!KERNEL")))) - `(funcall (function ,bash-function) seq2 start2 - seq1 start1 (min replace-len1 replace-len2)))) - (t - ;; We can expand the loop inline here because we - ;; would have given up the transform (see above) - ;; if we didn't have constant matching start - ;; indices. - '(do ((i start1 (1+ i)) - (end (+ start1 - (min replace-len1 replace-len2)))) - ((>= i end)) - (declare (optimize (insert-array-bounds-checks 0))) - (setf (aref seq1 i) (aref seq2 i))))) - seq1)) + collect (make-replace-transform saetp sequence-type sequence-type) into forms - finally (return `(progn ,@forms))))) - (define-replace-transforms)) + finally (return `(progn ,@forms)))) + (define-one-transform (sequence-type1 sequence-type2) + (make-replace-transform nil sequence-type1 sequence-type2))) + (define-one-transform simple-base-string (simple-array character (*))) + (define-one-transform (simple-array character (*)) simple-base-string)) ;;; Expand simple cases of UB-BASH-COPY inline. "simple" is ;;; defined as those cases where we are doing word-aligned copies from @@ -854,52 +865,67 @@ (return nil))) (return index2))))))) -;;; FIXME: It seems as though it should be possible to make a DEFUN -;;; %CONCATENATE (with a DEFTRANSFORM to translate constant RTYPE to -;;; CTYPE before calling %CONCATENATE) which is comparably efficient, -;;; at least once DYNAMIC-EXTENT works. -;;; -;;; FIXME: currently KLUDGEed because of bug 188 -;;; -;;; FIXME: disabled for sb-unicode: probably want it back -#!-sb-unicode -(deftransform concatenate ((rtype &rest sequences) - (t &rest (or simple-base-string - (simple-array nil (*)))) - simple-base-string - :policy (< safety 3)) - (loop for rest-seqs on sequences - for n-seq = (gensym "N-SEQ") - for n-length = (gensym "N-LENGTH") - for start = 0 then next-start - for next-start = (gensym "NEXT-START") - collect n-seq into args - collect `(,n-length (length ,n-seq)) into lets - collect n-length into all-lengths - collect next-start into starts - collect `(if (and (typep ,n-seq '(simple-array nil (*))) - (> ,n-length 0)) - (error 'nil-array-accessed-error) - (#.(let* ((i (position 'character sb!kernel::*specialized-array-element-types*)) - (saetp (aref sb!vm:*specialized-array-element-type-properties* i)) - (n-bits (sb!vm:saetp-n-bits saetp))) - (intern (format nil "UB~D-BASH-COPY" n-bits) - "SB!KERNEL")) - ,n-seq 0 res ,start ,n-length)) - into forms - collect `(setq ,next-start (+ ,start ,n-length)) into forms - finally - (return - `(lambda (rtype ,@args) - (declare (ignore rtype)) - (let* (,@lets - (res (make-string (the index (+ ,@all-lengths)) - :element-type 'base-char))) - (declare (type index ,@all-lengths)) - (let (,@(mapcar (lambda (name) `(,name 0)) starts)) - (declare (type index ,@starts)) - ,@forms) - res))))) + +;;; Open-code CONCATENATE for strings. It would be possible to extend +;;; this transform to non-strings, but I chose to just do the case that +;;; should cover 95% of CONCATENATE performance complaints for now. +;;; -- JES, 2007-11-17 +(deftransform concatenate ((result-type &rest lvars) + (symbol &rest sequence) + * + :policy (> speed space)) + (unless (constant-lvar-p result-type) + (give-up-ir1-transform)) + (let* ((element-type (let ((type (lvar-value result-type))) + ;; Only handle the simple result type cases. If + ;; somebody does (CONCATENATE '(STRING 6) ...) + ;; their code won't be optimized, but nobody does + ;; that in practice. + (case type + ((string simple-string) 'character) + ((base-string simple-base-string) 'base-char) + (t (give-up-ir1-transform))))) + (vars (loop for x in lvars collect (gensym))) + (lvar-values (loop for lvar in lvars + collect (when (constant-lvar-p lvar) + (lvar-value lvar)))) + (lengths + (loop for value in lvar-values + for var in vars + collect (if value + (length value) + `(sb!impl::string-dispatch ((simple-array * (*)) + sequence) + ,var + (declare (muffle-conditions compiler-note)) + (length ,var)))))) + `(apply + (lambda ,vars + (declare (ignorable ,@vars)) + (let* ((.length. (+ ,@lengths)) + (.pos. 0) + (.string. (make-string .length. :element-type ',element-type))) + (declare (type index .length. .pos.) + (muffle-conditions compiler-note)) + ,@(loop for value in lvar-values + for var in vars + collect (if (stringp value) + ;; Fold the array reads for constant arguments + `(progn + ,@(loop for c across value + collect `(setf (aref .string. + .pos.) ,c) + collect `(incf .pos.))) + `(sb!impl::string-dispatch + (#!+sb-unicode + (simple-array character (*)) + (simple-array base-char (*)) + t) + ,var + (replace .string. ,var :start1 .pos.) + (incf .pos. (length ,var))))) + .string.)) + lvars))) ;;;; CONS accessor DERIVE-TYPE optimizers diff --git a/tests/fopcompiler.impure-cload.lisp b/tests/fopcompiler.impure-cload.lisp index 1e05ee6..25fc8e7 100644 --- a/tests/fopcompiler.impure-cload.lisp +++ b/tests/fopcompiler.impure-cload.lisp @@ -92,5 +92,5 @@ (symbol-macrolet ((foo 1)) (let* ((x (bar (foo))) (y (bar (x foo)))) - (bar (y x foo))))) + (bar (y x foo)))) diff --git a/version.lisp-expr b/version.lisp-expr index 404f2fe..4cff566 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.11.28" +"1.0.11.29" -- 1.7.10.4