From: Christophe Rhodes Date: Wed, 14 Jan 2004 16:53:17 +0000 (+0000) Subject: 0.8.7.15: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=8dd43b84a688fde72f6d957c59f7207d539990f7;p=sbcl.git 0.8.7.15: Benchmark-inspired SEARCH deftransform for simple-base-strings ... Adjust per APD sbcl-devel to allow the type deriver to work out all the types ... put boyer-moore version in compiler-extras in contrib/ Also fix stale-symbols so that it runs ... most stale symbols appear to be within vop-parse and pv-table structures --- diff --git a/contrib/compiler-extras.lisp b/contrib/compiler-extras.lisp index 0bbee06..4e68bd2 100644 --- a/contrib/compiler-extras.lisp +++ b/contrib/compiler-extras.lisp @@ -77,3 +77,76 @@ (aref seq index2)) (incf index2)))))) seq1))) + +;;; Boyer-Moore search for strings. +;;; +;;; TODO: +;;; * START/END keywords +;;; * a literal :TEST #'CHAR= or :TEST #'EQL is OK (also #'EQ) +;;; * fewer hardcoded constants +;;; * :FROM-END +;;; +;;; * investigate whether we can make this work with a hashtable and a +;;; default for "not in pattern" +(deftransform search ((pattern text) + (simple-base-string simple-base-string)) + (unless (constant-lvar-p pattern) + (give-up-ir1-transform)) + (let* ((pattern (lvar-value pattern)) + (bad-character (make-array 256 :element-type 'fixnum :initial-element (length pattern))) + (temp (make-array (length pattern) :element-type 'fixnum)) + (good-suffix (make-array (length pattern) :element-type 'fixnum :initial-element (1- (length pattern))))) + + (dotimes (i (1- (length pattern))) + (setf (aref bad-character (char-code (aref pattern i))) + (- (length pattern) 1 i))) + + (setf (aref temp (1- (length pattern))) (length pattern)) + (loop with g = (1- (length pattern)) + with f = (1- (length pattern)) ; XXXXXX? + for i downfrom (- (length pattern) 2) above 0 + if (and (> i g) + (< (aref temp (- (+ i (length pattern)) 1 f)) (- i g))) + do (setf (aref temp i) (aref temp (- (+ i (length pattern)) 1 f))) + else + do (progn + (when (< i g) + (setf g i)) + (setf f i) + (do () + ((not + (and (>= g 0) + (char= (aref pattern g) + (aref pattern (- (+ g (length pattern)) 1 f)))))) + (decf g)) + (setf (aref temp i) (- f g)))) + + (loop with j = 0 + for i downfrom (1- (length pattern)) to -1 + if (or (= i -1) (= (aref temp i) (1+ i))) + do (do () + ((>= j (- (length pattern) 1 i))) + (when (= (aref good-suffix j) (length pattern)) + (setf (aref good-suffix j) (- (length pattern) 1 i))) + (incf j))) + + (loop for i from 0 below (1- (length pattern)) + do (setf (aref good-suffix (- (length pattern) 1 (aref temp i))) + (- (length pattern) 1 i))) + + `(let ((good-suffix ,good-suffix) + (bad-character ,bad-character)) + (declare (optimize speed (safety 0))) + (block search + (do ((j 0)) + ((> j (- (length text) (length pattern)))) + (declare (fixnum j)) + (do ((i (1- (length pattern)) (1- i))) + ((< i 0) (return-from search j)) + (declare (fixnum i)) + (when (char/= (aref pattern i) (aref text (+ i j))) + (incf j (max (aref good-suffix i) + (+ (- (aref bad-character (char-code (aref text (+ i j)))) + (length pattern)) + (1+ i)))) + (return)))))))) diff --git a/contrib/stale-symbols.lisp b/contrib/stale-symbols.lisp index 1e84555..02823d4 100644 --- a/contrib/stale-symbols.lisp +++ b/contrib/stale-symbols.lisp @@ -38,7 +38,7 @@ ;;; are displayed, but are not stale. It displays the names of ;;; restarts. Worse, it displays the names of CMUCL-internal constants. ;;; These symbols that name constants are not referenced from anywhere -;;; expect the package datastructures because the compiler can +;;; except the package datastructures because the compiler can ;;; substitute their value wherever they're used in the CMUCL source ;;; code, without keeping a reference to the symbol hanging around. ;;; There are also a number of PCL-related symbols that are displayed, @@ -50,9 +50,9 @@ (defun print-stale-reference (obj stream) (cond ((vectorp obj) (format stream "vector (probable package internals)")) - ((sb-c::compiled-debug-function-p obj) - (format stream "#" - (sb-c::compiled-debug-function-name obj))) + ((sb-c::compiled-debug-fun-p obj) + (format stream "#" + (sb-c::compiled-debug-fun-name obj))) (t (format stream "~w" obj)))) diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index b531b36..9e124db 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -712,6 +712,32 @@ sb!vm:n-byte-bits))) string1)) +;;; FIXME: this would be a valid transform for certain excluded cases: +;;; * :TEST 'CHAR= or :TEST #'CHAR= +;;; * :TEST 'EQL or :TEST #'EQL +;;; * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity) +;;; +;;; also, it should be noted that there's nothing much in this +;;; transform (as opposed to the ones for REPLACE and CONCATENATE) +;;; that particularly limits it to SIMPLE-BASE-STRINGs. +(deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2) + (simple-base-string simple-base-string &rest t) + * + :policy (> speed (max space safety))) + `(block search + (let ((end1 (or end1 (length pattern))) + (end2 (or end2 (length text)))) + (do ((index2 start2 (1+ index2))) + ((>= index2 end2) nil) + (when (do ((index1 start1 (1+ index1)) + (index2 index2 (1+ index2))) + ((>= index1 end1) t) + (when (= index2 end2) + (return-from search nil)) + (when (char/= (char pattern index1) (char text index2)) + (return nil))) + (return index2)))))) + ;;; FIXME: It seems as though it should be possible to make a DEFUN ;;; %CONCATENATE (with a DEFTRANSFORM to translate constant RTYPE to ;;; CTYPE before calling %CONCATENATE) which is comparably efficient, diff --git a/version.lisp-expr b/version.lisp-expr index 2dfd7dc..566f174 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.7.14" +"0.8.7.15"