From: James M. Lawrence <llmjjmll@gmail.com>
Date: Thu, 17 May 2012 23:16:54 +0000 (-0400)
Subject: fix MAP-INTO performance
X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=151b7b5db692eb7c089e92100df0b037418e8d27;p=sbcl.git

fix MAP-INTO performance

* remove the O(n^2) algorithm for lists

* use (MAP NIL ...) for all sequence types

* avoid unnecessary LENGTH calls

* update fill pointer after mapping succeeds, not before

* add tests for MAP-INTO (there were none!)

* add some WITH-TESTs to tests/map-tests.impure.lisp
---

diff --git a/src/code/seq.lisp b/src/code/seq.lisp
index 3a70741..96a67b3 100644
--- a/src/code/seq.lisp
+++ b/src/code/seq.lisp
@@ -1045,34 +1045,74 @@ many elements are copied."
          first-sequence
          more-sequences))
 
-;;; KLUDGE: MAP has been rewritten substantially since the fork from
-;;; CMU CL in order to give reasonable performance, but this
-;;; implementation of MAP-INTO still has the same problems as the old
-;;; MAP code. Ideally, MAP-INTO should be rewritten to be efficient in
-;;; the same way that the corresponding cases of MAP have been
-;;; rewritten. Instead of doing it now, though, it's easier to wait
-;;; until we have DYNAMIC-EXTENT, at which time it should become
-;;; extremely easy to define a reasonably efficient MAP-INTO in terms
-;;; of (MAP NIL ..). -- WHN 20000920
+;;; Uses the machinery of (MAP NIL ...). For non-vectors we avoid
+;;; computing the length of the result sequence since we can detect
+;;; the end during mapping (if MAP even gets that far).
 (defun map-into (result-sequence function &rest sequences)
-  (let* ((fp-result
-          (and (arrayp result-sequence)
-               (array-has-fill-pointer-p result-sequence)))
-         (len (apply #'min
-                     (if fp-result
-                         (array-dimension result-sequence 0)
-                         (length result-sequence))
-                     (mapcar #'length sequences))))
-
-    (when fp-result
-      (setf (fill-pointer result-sequence) len))
-
-    (let ((really-fun (%coerce-callable-to-fun function)))
-      (dotimes (index len)
-        (setf (elt result-sequence index)
-              (apply really-fun
-                     (mapcar (lambda (seq) (elt seq index))
-                             sequences))))))
+  (declare (truly-dynamic-extent sequences))
+  (let ((really-fun (%coerce-callable-to-fun function)))
+    ;; For each result type, define a mapping function which is
+    ;; responsible for replacing RESULT-SEQUENCE elements and for
+    ;; terminating itself if the end of RESULT-SEQUENCE is reached.
+    ;;
+    ;; The mapping function is defined with the MAP-LAMBDA macrolet,
+    ;; whose syntax matches that of LAMBDA.
+    (macrolet ((map-lambda (params &body body)
+                 `(flet ((f ,params ,@body))
+                    (declare (truly-dynamic-extent #'f))
+                    ;; Note (MAP-INTO SEQ (LAMBDA () ...)) is a
+                    ;; different animal, hence the awkward flip
+                    ;; between MAP and LOOP.
+                    (if sequences
+                        (apply #'map nil #'f sequences)
+                        (loop (f))))))
+      ;; Optimize MAP-LAMBDAs since they are the inner loops. Because
+      ;; we are manually doing bounds checking with known types, turn
+      ;; off safety for vectors and lists but keep it for generic
+      ;; sequences.
+      (etypecase result-sequence
+        (vector
+         (locally (declare (optimize speed (safety 0)))
+           (with-array-data ((data result-sequence) (start) (end)
+                             ;; MAP-INTO ignores fill pointer when mapping
+                             :check-fill-pointer nil)
+             (let ((index start))
+               (declare (type index index))
+               (macrolet ((dispatch ()
+                            `(block mapping
+                               (map-lambda (&rest args)
+                                 (declare (truly-dynamic-extent args))
+                                 (when (eql index end)
+                                   (return-from mapping))
+                                 (setf (aref data index)
+                                       (apply really-fun args))
+                                 (incf index)))))
+                 (typecase data
+                   (simple-vector (dispatch))
+                   (otherwise (dispatch))))
+               (when (array-has-fill-pointer-p result-sequence)
+                 (setf (fill-pointer result-sequence) (- index start)))))))
+        (list
+         (let ((node result-sequence))
+           (declare (type list node))
+           (map-lambda (&rest args)
+             (declare (truly-dynamic-extent args) (optimize speed (safety 0)))
+             (when (null node)
+               (return-from map-into result-sequence))
+             (setf (car node) (apply really-fun args))
+             (setf node (cdr node)))))
+        (sequence
+         (multiple-value-bind (iter limit from-end)
+             (sb!sequence:make-sequence-iterator result-sequence)
+           (map-lambda (&rest args)
+             (declare (truly-dynamic-extent args) (optimize speed))
+             (when (sb!sequence:iterator-endp result-sequence
+                                              iter limit from-end)
+               (return-from map-into result-sequence))
+             (setf (sb!sequence:iterator-element result-sequence iter)
+                   (apply really-fun args))
+             (setf iter (sb!sequence:iterator-step result-sequence
+                                                   iter from-end))))))))
   result-sequence)
 
 ;;;; quantifiers
diff --git a/tests/map-tests.impure.lisp b/tests/map-tests.impure.lisp
index ec999b5..d8a11d4 100644
--- a/tests/map-tests.impure.lisp
+++ b/tests/map-tests.impure.lisp
@@ -11,15 +11,81 @@
 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
 ;;;; more information.
 
+(load "test-util.lisp")
 (load "assertoid.lisp")
 (use-package "ASSERTOID")
 
 ;;; tests of MAP
 ;;; FIXME: Move these into their own file.
-(assertoid (map 'vector #'+ '(1 2 3) '(30 20))
-           :expected-equalp #(31 22))
-(assertoid (map 'list #'+ #(1 2) '(100) #(0) #(100 100))
-           :expected-equal '(201))
+(test-util:with-test (:name :map)
+  (assertoid (map 'vector #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+  (assertoid (map 'list #'+ #(1 2) '(100) #(0) #(100 100))
+             :expected-equal '(201)))
+
+;;; tests of MAP-INTO
+
+(test-util:with-test (:name :map-into)
+  (assertoid (map-into (vector) #'+ '(1 2 3) '(30 20))
+             :expected-equalp #())
+  (assertoid (map-into (vector 99) #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31))
+  (assertoid (map-into (vector 99 88) #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+  (assertoid (map-into (vector 99 88 77) #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22 77))
+
+  (assertoid (map-into (list) #'+ '(1 2 3) '(30 20))
+             :expected-equalp '())
+  (assertoid (map-into (list 99) #'+ '(1 2 3) '(30 20))
+             :expected-equalp '(31))
+  (assertoid (map-into (list 99 88) #'+ '(1 2 3) '(30 20))
+             :expected-equalp '(31 22))
+  (assertoid (map-into (list 99 88 77) #'+ '(1 2 3) '(30 20))
+             :expected-equalp '(31 22 77))
+
+  (assertoid (map-into (vector 99 99 99) (constantly 5))
+             :expected-equalp #(5 5 5))
+  (assertoid (map-into (vector 99 99 99) (let ((x 0)) (lambda () (incf x))))
+             :expected-equalp #(1 2 3))
+
+  (assertoid (map-into (list 99 99 99) (constantly 5))
+             :expected-equalp '(5 5 5))
+  (assertoid (map-into (list 99 99 99) (let ((x 0)) (lambda () (incf x))))
+             :expected-equalp '(1 2 3))
+
+  (assertoid (map-into (make-array 0 :element-type 'fixnum)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #())
+  (assertoid (map-into (make-array 1 :element-type 'fixnum :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31))
+  (assertoid (map-into (make-array 2 :element-type 'fixnum :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+  (assertoid (map-into (make-array 3 :element-type 'fixnum :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22 99))
+
+  (assertoid (map-into (make-array 0 :fill-pointer 0 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #())
+  (assertoid (map-into (make-array 1 :fill-pointer 0 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31))
+  (assertoid (map-into (make-array 2 :fill-pointer 0 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+  (assertoid (map-into (make-array 3 :fill-pointer 0 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+
+  (assertoid (map-into (make-array 9 :fill-pointer 9 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22))
+  (assertoid (map-into (make-array 9 :fill-pointer 5 :initial-element 99)
+                       #'+ '(1 2 3) '(30 20))
+             :expected-equalp #(31 22)))
 
 (defmacro with-mapnil-test-fun (fun-name &body body)
   `(let ((reversed-result nil))
@@ -27,15 +93,17 @@
               (push rest reversed-result)))
        ,@body
        (nreverse reversed-result))))
-(assertoid (with-mapnil-test-fun fun
-             (map nil #'fun #(1)))
-           :expected-equal '((1)))
-(assertoid (with-mapnil-test-fun fun
-             (map nil #'fun #() '(1 2 3)))
-           :expected-equal '())
-(assertoid (with-mapnil-test-fun fun
-             (map nil #'fun #(a b c) '(alpha beta) '(aleph beth)))
-           :expected-equal '((a alpha aleph) (b beta beth)))
+
+(test-util:with-test (:name :map-nil)
+  (assertoid (with-mapnil-test-fun fun
+               (map nil #'fun #(1)))
+             :expected-equal '((1)))
+  (assertoid (with-mapnil-test-fun fun
+               (map nil #'fun #() '(1 2 3)))
+             :expected-equal '())
+  (assertoid (with-mapnil-test-fun fun
+               (map nil #'fun #(a b c) '(alpha beta) '(aleph beth)))
+             :expected-equal '((a alpha aleph) (b beta beth))))
 
 ;;; Exercise MAP repeatedly on the same dataset by providing various
 ;;; combinations of sequence type arguments, declarations, and so
@@ -82,7 +150,11 @@
             (arrange
              `(assertoid (map ',result-type ,fun ,@(args-with-type-decls))
                          :expected-equalp (coerce ,result-seq
-                                                  ',result-type)))))
+                                                  ',result-type))))
+          (arrange
+           `(assertoid (map-into (fill (copy-seq ,result-seq) 9999)
+                                 ,fun ,@(args-with-type-decls))
+                       :expected-equalp ,result-seq)))
         (arrange
          `(assertoid (mapcar (lambda (args) (apply #',fun-name args))
                              (with-mapnil-test-fun mtf
@@ -95,17 +167,19 @@
                                     ,@(args-with-type-decls))))
                      :expected-equal (coerce ,result-seq 'list)))))
     `(progn ,@(nreverse reversed-assertoids))))
-(maptest :result-seq '(2 3)
-         :fun-name 1+
-         :arg-seqs (*list-2*)
-         :arg-types (list))
-(maptest :result-seq '(nil nil nil)
-         :fun-name oddp
-         :arg-seqs (*vector-30*)
-         :arg-types (vector))
-(maptest :result-seq '(12 24)
-         :fun-name +
-         :arg-seqs (*list-2* *list-2* *vector-30*)
-         :arg-types (list list vector))
+
+(test-util:with-test (:name :maptest)
+  (maptest :result-seq '(2 3)
+           :fun-name 1+
+           :arg-seqs (*list-2*)
+           :arg-types (list))
+  (maptest :result-seq '(nil nil nil)
+           :fun-name oddp
+           :arg-seqs (*vector-30*)
+           :arg-types (vector))
+  (maptest :result-seq '(12 24)
+           :fun-name +
+           :arg-seqs (*list-2* *list-2* *vector-30*)
+           :arg-types (list list vector)))
 
 ;;; success