(aref seq index2))
(incf index2))))))
seq1)))
+
+;;; Boyer-Moore search for strings.
+;;;
+;;; TODO:
+;;; * START/END keywords
+;;; * a literal :TEST #'CHAR= or :TEST #'EQL is OK (also #'EQ)
+;;; * fewer hardcoded constants
+;;; * :FROM-END
+;;;
+;;; * investigate whether we can make this work with a hashtable and a
+;;; default for "not in pattern"
+(deftransform search ((pattern text)
+ (simple-base-string simple-base-string))
+ (unless (constant-lvar-p pattern)
+ (give-up-ir1-transform))
+ (let* ((pattern (lvar-value pattern))
+ (bad-character (make-array 256 :element-type 'fixnum :initial-element (length pattern)))
+ (temp (make-array (length pattern) :element-type 'fixnum))
+ (good-suffix (make-array (length pattern) :element-type 'fixnum :initial-element (1- (length pattern)))))
+
+ (dotimes (i (1- (length pattern)))
+ (setf (aref bad-character (char-code (aref pattern i)))
+ (- (length pattern) 1 i)))
+
+ (setf (aref temp (1- (length pattern))) (length pattern))
+ (loop with g = (1- (length pattern))
+ with f = (1- (length pattern)) ; XXXXXX?
+ for i downfrom (- (length pattern) 2) above 0
+ if (and (> i g)
+ (< (aref temp (- (+ i (length pattern)) 1 f)) (- i g)))
+ do (setf (aref temp i) (aref temp (- (+ i (length pattern)) 1 f)))
+ else
+ do (progn
+ (when (< i g)
+ (setf g i))
+ (setf f i)
+ (do ()
+ ((not
+ (and (>= g 0)
+ (char= (aref pattern g)
+ (aref pattern (- (+ g (length pattern)) 1 f))))))
+ (decf g))
+ (setf (aref temp i) (- f g))))
+
+ (loop with j = 0
+ for i downfrom (1- (length pattern)) to -1
+ if (or (= i -1) (= (aref temp i) (1+ i)))
+ do (do ()
+ ((>= j (- (length pattern) 1 i)))
+ (when (= (aref good-suffix j) (length pattern))
+ (setf (aref good-suffix j) (- (length pattern) 1 i)))
+ (incf j)))
+
+ (loop for i from 0 below (1- (length pattern))
+ do (setf (aref good-suffix (- (length pattern) 1 (aref temp i)))
+ (- (length pattern) 1 i)))
+
+ `(let ((good-suffix ,good-suffix)
+ (bad-character ,bad-character))
+ (declare (optimize speed (safety 0)))
+ (block search
+ (do ((j 0))
+ ((> j (- (length text) (length pattern))))
+ (declare (fixnum j))
+ (do ((i (1- (length pattern)) (1- i)))
+ ((< i 0) (return-from search j))
+ (declare (fixnum i))
+ (when (char/= (aref pattern i) (aref text (+ i j)))
+ (incf j (max (aref good-suffix i)
+ (+ (- (aref bad-character (char-code (aref text (+ i j))))
+ (length pattern))
+ (1+ i))))
+ (return))))))))
;;; are displayed, but are not stale. It displays the names of
;;; restarts. Worse, it displays the names of CMUCL-internal constants.
;;; These symbols that name constants are not referenced from anywhere
-;;; expect the package datastructures because the compiler can
+;;; except the package datastructures because the compiler can
;;; substitute their value wherever they're used in the CMUCL source
;;; code, without keeping a reference to the symbol hanging around.
;;; There are also a number of PCL-related symbols that are displayed,
(defun print-stale-reference (obj stream)
(cond ((vectorp obj)
(format stream "vector (probable package internals)"))
- ((sb-c::compiled-debug-function-p obj)
- (format stream "#<compiled-debug-function ~a>"
- (sb-c::compiled-debug-function-name obj)))
+ ((sb-c::compiled-debug-fun-p obj)
+ (format stream "#<compiled-debug-fun ~a>"
+ (sb-c::compiled-debug-fun-name obj)))
(t
(format stream "~w" obj))))
sb!vm:n-byte-bits)))
string1))
+;;; FIXME: this would be a valid transform for certain excluded cases:
+;;; * :TEST 'CHAR= or :TEST #'CHAR=
+;;; * :TEST 'EQL or :TEST #'EQL
+;;; * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity)
+;;;
+;;; also, it should be noted that there's nothing much in this
+;;; transform (as opposed to the ones for REPLACE and CONCATENATE)
+;;; that particularly limits it to SIMPLE-BASE-STRINGs.
+(deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2)
+ (simple-base-string simple-base-string &rest t)
+ *
+ :policy (> speed (max space safety)))
+ `(block search
+ (let ((end1 (or end1 (length pattern)))
+ (end2 (or end2 (length text))))
+ (do ((index2 start2 (1+ index2)))
+ ((>= index2 end2) nil)
+ (when (do ((index1 start1 (1+ index1))
+ (index2 index2 (1+ index2)))
+ ((>= index1 end1) t)
+ (when (= index2 end2)
+ (return-from search nil))
+ (when (char/= (char pattern index1) (char text index2))
+ (return nil)))
+ (return index2))))))
+
;;; FIXME: It seems as though it should be possible to make a DEFUN
;;; %CONCATENATE (with a DEFTRANSFORM to translate constant RTYPE to
;;; CTYPE before calling %CONCATENATE) which is comparably efficient,