0.pre8.107:
[sbcl.git] / src / compiler / seqtran.lisp
index 9792a4b..acaef21 100644 (file)
          (tests `(endp ,v))
          (args-to-fn (if take-car `(car ,v) v))))
 
-      (let ((call `(funcall ,fn . ,(args-to-fn)))
-           (endtest `(or ,@(tests))))
+      (let* ((fn-sym (gensym))  ; for ONCE-ONLY-ish purposes
+            (call `(funcall ,fn-sym . ,(args-to-fn)))
+            (endtest `(or ,@(tests))))
        (ecase accumulate
          (:nconc
           (let ((temp (gensym))
                 (map-result (gensym)))
-            `(let ((,map-result (list nil)))
+            `(let ((,fn-sym ,fn)
+                   (,map-result (list nil)))
                (do-anonymous ((,temp ,map-result) . ,(do-clauses))
                              (,endtest (cdr ,map-result))
                  (setq ,temp (last (nconc ,temp ,call)))))))
          (:list
           (let ((temp (gensym))
                 (map-result (gensym)))
-            `(let ((,map-result (list nil)))
+            `(let ((,fn-sym ,fn)
+                   (,map-result (list nil)))
                (do-anonymous ((,temp ,map-result) . ,(do-clauses))
                              (,endtest (cdr ,map-result))
                  (rplacd ,temp (setq ,temp (list ,call)))))))
          ((nil)
-          `(let ((,n-first ,(first arglists)))
+          `(let ((,fn-sym ,fn)
+                 (,n-first ,(first arglists)))
              (do-anonymous ,(do-clauses)
                            (,endtest ,n-first) ,call))))))))
 
                                result-type-arg-value)))))
     `(lambda (result-type-arg fun ,@seq-names)
        (truly-the ,result-type
-        ,(cond ((policy node (> speed safety))
+        ,(cond ((policy node (< safety 3))
+                ;; ANSI requires the length-related type check only
+                ;; when the SAFETY quality is 3... in other cases, we
+                ;; skip it, because it could be expensive.
                 bare)
                ((not constant-result-type-arg-p)
                 `(sequence-of-checked-length-given-type ,bare
                                                         result-type-arg))
                (t
-                (let ((result-ctype (ir1-transform-specifier-type result-type)))
+                (let ((result-ctype (ir1-transform-specifier-type
+                                     result-type)))
                   (if (array-type-p result-ctype)
-                      (let* ((dims (array-type-dimensions result-ctype))
-                             (dim (first dims)))
-                        (if (eq dim '*)
-                            bare
-                            `(vector-of-checked-length-given-length ,bare
-                                                                    ,dim)))
+                      (let ((dims (array-type-dimensions result-ctype)))
+                        (unless (and (listp dims) (= (length dims) 1))
+                          (give-up-ir1-transform "invalid sequence type"))
+                        (let ((dim (first dims)))
+                          (if (eq dim '*)
+                              bare
+                              `(vector-of-checked-length-given-length ,bare
+                                                                      ,dim))))
+                      ;; FIXME: this is wrong, as not all subtypes of
+                      ;; VECTOR are ARRAY-TYPEs [consider, for
+                      ;; example, (OR (VECTOR T 3) (VECTOR T
+                      ;; 4))]. However, it's difficult to see what we
+                      ;; should put here... maybe we should
+                      ;; GIVE-UP-IR1-TRANSFORM if the type is a
+                      ;; subtype of VECTOR but not an ARRAY-TYPE?
                       bare))))))))
 
+;;; Return a DO loop, mapping a function FUN to elements of
+;;; sequences. SEQS is a list of continuations, SEQ-NAMES - list of
+;;; variables, bound to sequences, INTO - a variable, which is used in
+;;; MAP-INTO. RESULT and BODY are forms, which can use variables
+;;; FUNCALL-RESULT, containing the result of application of FUN, and
+;;; INDEX, containing the current position in sequences.
+(defun build-sequence-iterator (seqs seq-names &key result into body)
+  (declare (type list seqs seq-names)
+           (type symbol into))
+  (collect ((bindings)
+           (declarations)
+            (vector-lengths)
+            (tests)
+            (places))
+    (let ((found-vector-p nil))
+      (flet ((process-vector (length)
+               (unless found-vector-p
+                 (setq found-vector-p t)
+                 (bindings `(index 0 (1+ index)))
+                 (declarations `(type index index)))
+               (vector-lengths length)))
+        (loop for seq of-type continuation in seqs
+           for seq-name in seq-names
+           for type = (continuation-type seq)
+           do (cond ((csubtypep type (specifier-type 'list))
+                     (let ((index (gensym "I")))
+                       (bindings `(,index ,seq-name (cdr ,index)))
+                       (declarations `(type list ,index))
+                       (places `(car ,index))
+                       (tests `(endp ,index))))
+                    ((csubtypep type (specifier-type 'vector))
+                     (process-vector `(length ,seq-name))
+                     (places `(aref ,seq-name index)))
+                    (t
+                     (give-up-ir1-transform
+                      "can't determine sequence argument type"))))
+        (when into
+          (process-vector `(array-dimension ,into 0))))
+      (when found-vector-p
+        (bindings `(length (min ,@(vector-lengths))))
+        (tests `(= index length)))
+      `(do (,@(bindings))
+           ((or ,@(tests)) ,result)
+         (declare ,@(declarations))
+         (let ((funcall-result (funcall fun ,@(places))))
+           (declare (ignorable funcall-result))
+           ,body)))))
+
 ;;; Try to compile %MAP efficiently when we can determine sequence
 ;;; argument types at compile time.
 ;;;
   (unless seqs (abort-ir1-transform "no sequence args"))
   (unless (constant-continuation-p result-type)
     (give-up-ir1-transform "RESULT-TYPE argument not constant"))
-  (labels (;; 1-valued SUBTYPEP, fails unless second value of SUBTYPEP is true
+  (labels ( ;; 1-valued SUBTYPEP, fails unless second value of SUBTYPEP is true
           (fn-1subtypep (fn x y)
             (multiple-value-bind (subtype-p valid-p) (funcall fn x y)
               (if valid-p
                   subtype-p
                   (give-up-ir1-transform
                    "can't analyze sequence type relationship"))))
-          (1subtypep (x y) (fn-1subtypep #'sb!xc:subtypep x y))
-          (1csubtypep (x y) (fn-1subtypep #'csubtypep x y))
-          (seq-supertype (seq)
-            (let ((ctype (continuation-type seq)))
-              (cond ((1csubtypep ctype (specifier-type 'vector)) 'vector)
-                    ((1csubtypep ctype (specifier-type 'list)) 'list)
-                    (t
-                     (give-up-ir1-transform
-                      "can't determine sequence argument type"))))))
+          (1subtypep (x y) (fn-1subtypep #'sb!xc:subtypep x y)))
     (let* ((result-type-value (continuation-value result-type))
           (result-supertype (cond ((null result-type-value) 'null)
                                   ((1subtypep result-type-value 'vector)
                                    'list)
                                   (t
                                    (give-up-ir1-transform
-                                    "can't determine result type"))))
-          (seq-supertypes (mapcar #'seq-supertype seqs)))
+                                    "can't determine result type")))))
       (cond ((and result-type-value (= 1 (length seqs)))
             ;; The consing arity-1 cases can be implemented
             ;; reasonably efficiently as function calls, and the cost
             ;; optimization policy.
             (cond ((subtypep 'list result-type-value)
                    '(apply #'%map-to-list-arity-1 fun seqs))
-                  (;; (This one can be inefficient due to COERCE, but
+                  ( ;; (This one can be inefficient due to COERCE, but
                    ;; the current open-coded implementation has the
                    ;; same problem.)
                    (subtypep result-type-value 'vector)
                             ',result-type-value))
                   (t (bug "impossible (?) sequence type"))))
            (t
-            (let* ((seq-args (make-gensym-list (length seqs)))
-                   (index-bindingoids
-                    (mapcar (lambda (seq-arg seq-supertype)
-                              (let ((i (gensym "I"))) 
-                                (ecase seq-supertype
-                                  (vector `(,i 0 (1+ ,i)))
-                                  (list `(,i ,seq-arg (rest ,i))))))
-                            seq-args seq-supertypes))
-                   (indices (mapcar #'first index-bindingoids))
-                   (index-decls (mapcar (lambda (index seq-supertype)
-                                          `(type ,(ecase seq-supertype
-                                                    (vector 'index)
-                                                    (list 'list))
-                                                 ,index))
-                                        indices seq-supertypes))
-                   (tests (mapcar (lambda (seq-arg seq-supertype index)
-                                    (ecase seq-supertype
-                                      (vector `(>= ,index (length ,seq-arg)))
-                                      (list `(endp ,index))))
-                                  seq-args seq-supertypes indices))
-                   (values (mapcar (lambda (seq-arg seq-supertype index)
-                                     (ecase seq-supertype
-                                       (vector `(aref ,seq-arg ,index))
-                                       (list `(first ,index))))
-                                   seq-args seq-supertypes indices)))
-              (multiple-value-bind (push-dacc final-result)
+            (let* ((seq-args (make-gensym-list (length seqs))))
+              (multiple-value-bind (push-dacc result)
                   (ecase result-supertype
                     (null (values nil nil))
-                    (list (values `(push dacc acc) `(nreverse acc)))
-                    (vector (values `(push dacc acc)
+                    (list (values `(push funcall-result acc)
+                                   `(nreverse acc)))
+                    (vector (values `(push funcall-result acc)
                                     `(coerce (nreverse acc)
                                              ',result-type-value))))
                 ;; (We use the same idiom, of returning a LAMBDA from
                 ;; of the &REST vars.)
                 `(lambda (result-type fun ,@seq-args)
                    (declare (ignore result-type))
-                   (do ((really-fun (%coerce-callable-to-fun fun))
-                        ,@index-bindingoids
-                        (acc nil))
-                   ((or ,@tests)
-                    ,final-result)
-                   (declare ,@index-decls)
-                   (declare (type list acc))
-                   (declare (ignorable acc))
-                   (let ((dacc (funcall really-fun ,@values)))
-                     (declare (ignorable dacc))
-                     ,push-dacc))))))))))
+                    (let ((fun (%coerce-callable-to-fun fun))
+                          (acc nil))
+                      (declare (type list acc))
+                      (declare (ignorable acc))
+                      ,(build-sequence-iterator
+                        seqs seq-args
+                        :result result
+                        :body push-dacc))))))))))
+
+;;; MAP-INTO
+(deftransform map-into ((result fun &rest seqs)
+                        (vector * &rest *)
+                        *)
+  "open code"
+  (let ((seqs-names (mapcar (lambda (x)
+                              (declare (ignore x))
+                              (gensym))
+                            seqs)))
+    `(lambda (result fun ,@seqs-names)
+       ,(build-sequence-iterator
+         seqs seqs-names
+         :result '(when (array-has-fill-pointer-p result)
+                   (setf (fill-pointer result) index))
+         :into 'result
+         :body '(setf (aref result index) funcall-result))
+       result)))
+
 \f
+;;; FIXME: once the confusion over doing transforms with known-complex
+;;; arrays is over, we should also transform the calls to (AND (ARRAY
+;;; * (*)) (NOT (SIMPLE-ARRAY * (*)))) objects.
 (deftransform elt ((s i) ((simple-array * (*)) *) *)
   '(aref s i))
 
-(deftransform elt ((s i) (list *) *)
+(deftransform elt ((s i) (list *) * :policy (< safety 3))
   '(nth i s))
 
 (deftransform %setelt ((s i v) ((simple-array * (*)) * *) *)
   '(%aset s i v))
 
-(deftransform %setelt ((s i v) (list * *))
+(deftransform %setelt ((s i v) (list * *) * :policy (< safety 3))
   '(setf (car (nthcdr i s)) v))
 
+(deftransform %check-vector-sequence-bounds ((vector start end)
+                                            (vector * *) *
+                                            :node node)
+  (if (policy node (< safety speed))
+      '(or end (length vector))
+      '(let ((length (length vector)))
+       (if (<= 0 start (or end length) length)
+           (or end length)
+           (sb!impl::signal-bounding-indices-bad-error vector start end)))))
+
 (macrolet ((def (name)
              `(deftransform ,name ((e l &key (test #'eql)) * *
                                   :node node)
 
 ;;; Moved here from generic/vm-tran.lisp to satisfy clisp
 ;;;
-;;; FIXME: It would be good to implement SB!XC:DEFCONSTANT, and use
-;;; use that here, so that the compiler is born knowing this value.
 ;;; FIXME: Add a comment telling whether this holds for all vectors
 ;;; or only for vectors based on simple arrays (non-adjustable, etc.).
 (def!constant vector-data-bit-offset
   (* sb!vm:vector-data-offset sb!vm:n-word-bits))
 
-;;; FIXME: Shouldn't we be testing for legality of
-;;;   * START1, START2, END1, and END2 indices?
-;;;   * size of copied string relative to destination string?
-;;; (Either there should be tests conditional on SAFETY>=SPEED, or
-;;; the transform should be conditional on SPEED>SAFETY.)
-;;;
-;;; FIXME: Also, the transform should probably be dependent on
-;;; SPEED>SPACE.
 (deftransform replace ((string1 string2 &key (start1 0) (start2 0)
                                end1 end2)
-                      (simple-string simple-string &rest t))
+                      (simple-string simple-string &rest t)
+                      *
+                      ;; FIXME: consider replacing this policy test
+                      ;; with some tests for the STARTx and ENDx
+                      ;; indices being valid, conditional on high
+                      ;; SAFETY code.
+                      ;;
+                      ;; FIXME: It turns out that this transform is
+                      ;; critical for the performance of string
+                      ;; streams.  Make this more explicit.
+                      :policy (< (max safety space) 3))
   `(locally
      (declare (optimize (safety 0)))
      (bit-bash-copy string2
                       (find nil)
                       (position nil))
                   (declare (type index index))
-                  (dolist (i sequence (values find position))
+                  (dolist (i sequence
+                           (if (and end (> end index))
+                               (sb!impl::signal-bounding-indices-bad-error
+                                sequence start end)
+                               (values find position)))
                     (let ((key-i (funcall key i)))
                       (when (and end (>= index end))
                         (return (values find position)))
                              :important t)
   "expand inline"
   '(%find-position-if (let ((test-fun (%coerce-callable-to-fun test)))
-                       ;; I'm having difficulty believing I'm
-                       ;; reading it right, but as far as I can see,
-                       ;; the only guidance that ANSI gives for the
-                       ;; order of arguments to asymmetric tests is
-                       ;; the character-set dependent example from
-                       ;; the definition of FIND,
-                       ;;   (find #\d "here are some.." :test #'char>)
-                       ;;     => #\Space
-                       ;; (In ASCII, we have (CHAR> #\d #\SPACE)=>T.)
-                       ;; (Neither the POSITION definition page nor
-                       ;; section 17.2 ("Rules about Test Functions")
-                       ;; seem to consider the possibility of
-                       ;; asymmetry.)
-                       ;;
-                       ;; So, judging from the example, we want to
-                       ;; do (FUNCALL TEST-FUN ITEM I), because
-                       ;; (FUNCALL #'CHAR> #\d #\SPACE)=>T.
-                       ;;
-                       ;; -- WHN (whose attention was drawn to it by
-                       ;;         Alexey Dejneka's bug report/fix)
+                       ;; The order of arguments for asymmetric tests
+                       ;; (e.g. #'<, as opposed to order-independent
+                       ;; tests like #'=) is specified in the spec
+                       ;; section 17.2.1 -- the O/Zi stuff there.
                        (lambda (i)
                          (funcall test-fun item i)))
                      sequence
           (,n-end ,end-arg))
        (with-array-data ((,sequence ,n-sequence :offset-var ,offset)
                         (,start ,start)
-                        (,end (or ,n-end (length ,n-sequence))))
+                        (,end (%check-vector-sequence-bounds
+                               ,n-sequence ,start ,n-end)))
          (block ,block
           (macrolet ((maybe-return ()
                        '(let ((,element (aref ,sequence ,index)))
   (check-inlineability-of-find-position-if sequence from-end)
   '(%find-position-vector-macro item sequence
                                from-end start end key test))
+
+;;; logic to unravel :TEST, :TEST-NOT, and :KEY options in FIND,
+;;; POSITION-IF, etc.
+(define-source-transform effective-find-position-test (test test-not)
+  (once-only ((test test)
+             (test-not test-not))
+    `(cond
+      ((and ,test ,test-not)
+       (error "can't specify both :TEST and :TEST-NOT"))
+      (,test (%coerce-callable-to-fun ,test))
+      (,test-not
+       ;; (Without DYNAMIC-EXTENT, this is potentially horribly
+       ;; inefficient, but since the TEST-NOT option is deprecated
+       ;; anyway, we don't care.)
+       (complement (%coerce-callable-to-fun ,test-not)))
+      (t #'eql))))
+(define-source-transform effective-find-position-key (key)
+  (once-only ((key key))
+    `(if ,key
+        (%coerce-callable-to-fun ,key)
+        #'identity)))
+
+(macrolet ((define-find-position (fun-name values-index)
+            `(deftransform ,fun-name ((item sequence &key
+                                            from-end (start 0) end
+                                            key test test-not))
+               '(nth-value ,values-index
+                           (%find-position item sequence
+                                           from-end start
+                                           end
+                                           (effective-find-position-key key)
+                                           (effective-find-position-test
+                                            test test-not))))))
+  (define-find-position find 0)
+  (define-find-position position 1))
+
+(macrolet ((define-find-position-if (fun-name values-index)
+            `(deftransform ,fun-name ((predicate sequence &key
+                                                 from-end (start 0)
+                                                 end key))
+               '(nth-value
+                 ,values-index
+                 (%find-position-if (%coerce-callable-to-fun predicate)
+                                    sequence from-end
+                                    start end
+                                    (effective-find-position-key key))))))
+  (define-find-position-if find-if 0)
+  (define-find-position-if position-if 1))
+
+;;; the deprecated functions FIND-IF-NOT and POSITION-IF-NOT. We
+;;; didn't bother to worry about optimizing them, except note that on
+;;; Sat, Oct 06, 2001 at 04:22:38PM +0100, Christophe Rhodes wrote on
+;;; sbcl-devel
+;;;
+;;;     My understanding is that while the :test-not argument is
+;;;     deprecated in favour of :test (complement #'foo) because of
+;;;     semantic difficulties (what happens if both :test and :test-not
+;;;     are supplied, etc) the -if-not variants, while officially
+;;;     deprecated, would be undeprecated were X3J13 actually to produce
+;;;     a revised standard, as there are perfectly legitimate idiomatic
+;;;     reasons for allowing the -if-not versions equal status,
+;;;     particularly remove-if-not (== filter).
+;;;
+;;;     This is only an informal understanding, I grant you, but
+;;;     perhaps it's worth optimizing the -if-not versions in the same
+;;;     way as the others?
+;;;
+;;; FIXME: Maybe remove uses of these deprecated functions (and
+;;; definitely of :TEST-NOT) within the implementation of SBCL.
+(macrolet ((define-find-position-if-not (fun-name values-index)
+              `(deftransform ,fun-name ((predicate sequence &key
+                                         from-end (start 0)
+                                         end key))
+                '(nth-value
+                  ,values-index
+                  (%find-position-if-not (%coerce-callable-to-fun predicate)
+                   sequence from-end
+                   start end
+                   (effective-find-position-key key))))))
+  (define-find-position-if-not find-if-not 0)
+  (define-find-position-if-not position-if-not 1))