0.8.15.13:

author Juho Snellman <jsnell@iki.fi>

Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)

committer Juho Snellman <jsnell@iki.fi>

Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)
author Juho Snellman <jsnell@iki.fi>
Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)
committer Juho Snellman <jsnell@iki.fi>
Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)
diff --git a/NEWS b/NEWS

index 06d43e5..b95d794 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,7 @@ changes in sbcl-0.8.16 relative to sbcl-0.8.15:
      Beane)
    * fixed compiler failure, caused by instrumenting code during
      IR1-optimization.  (Debian bug report #273606 by Gabor Melis)
+  * optimization: added loop analysis and improved register allocation
    * fixed some bugs revealed by Paul Dietz' test suite:
      ** POSITION on displaced vectors with non-zero displacement
         returns the right answer.
diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS

index b0c8f47..5474bd2 100644 (file)
--- a/OPTIMIZATIONS
+++ b/OPTIMIZATIONS
@@ -62,34 +62,6 @@ VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL]
  
  * And why two moves?
  --------------------------------------------------------------------------------
-#6
-09:49:05 <jtra> I have found a case in those where suboptimal code is
-  generate with nested loops, it might be moderately easy to fix that
-09:49:28 <jtra> see
-  http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl
-09:50:30 <jtra> if you add declarations to dotimes, generated code is
-  almost optimal, but most inner loops run out of registers and use
-  memory location for iteration variable
-
-;;; -*- mode: lisp -*-
-;;; http://www.bagley.org/~doug/shootout/
-;;; from Friedrich Dominicus
-
-(defun main ()
-  (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1")))
-        (x 0))
-    (declare (fixnum n)
-             (fixnum x)
-             (optimize (speed 3) (debug 0) (safety 0)))
-    (dotimes (a n)
-      (dotimes (b n)
-        (dotimes (c n)
-          (dotimes (d n)
-            (dotimes (e n)
-              (dotimes (f n)
-                (incf x)))))))
-   (format t "~A~%" x)))
---------------------------------------------------------------------------------
  #8
  (defun foo (d)
    (declare (optimize (speed 3) (safety 0) (debug 0)))
diff --git a/build-order.lisp-expr b/build-order.lisp-expr

index 441e757..0cdd20b 100644 (file)
--- a/build-order.lisp-expr
+++ b/build-order.lisp-expr
@@ -480,6 +480,7 @@
   ("src/compiler/ir1util")
   ("src/compiler/ir1report")
   ("src/compiler/ir1opt")
+ ("src/compiler/loop")
  
   ("src/compiler/ir1final")
   ("src/compiler/array-tran")
diff --git a/src/compiler/loop.lisp b/src/compiler/loop.lisp

new file mode 100644 (file)

index 0000000..c43c056
--- /dev/null
+++ b/src/compiler/loop.lisp
@@ -0,0 +1,218 @@
+;;;; This software is part of the SBCL system. See the README file for
+;;;; more information.
+;;;;
+;;;; This software is derived from the CMU CL system, which was
+;;;; written at Carnegie Mellon University and released into the
+;;;; public domain. The software is in the public domain and is
+;;;; provided with absolutely no warranty. See the COPYING and CREDITS
+;;;; files for more information.
+
+;;; **********************************************************************
+;;;
+;;; Stuff to annotate the flow graph with information about the loops in it.
+;;;
+;;; Written by Rob MacLachlan
+(in-package "SB!C")
+
+;;; FIND-DOMINATORS  --  Internal
+;;;
+;;; Find the set of blocks that dominates each block in COMPONENT.  We
+;;; assume that the DOMINATORS for each block is initially NIL, which
+;;; serves to represent the set of all blocks.  If a block is not
+;;; reachable from an entry point, then its dominators will still be
+;;; NIL when we are done.
+(defun find-dominators (component)
+  (let ((head (loop-head (component-outer-loop component)))
+       changed)
+    (let ((set (make-sset)))
+      (sset-adjoin head set)
+      (setf (block-dominators head) set))
+    (loop
+     (setq changed nil)
+     (do-blocks (block component :tail)
+       (let ((dom (block-dominators block)))
+        (when dom (sset-delete block dom))
+        (dolist (pred (block-pred block))
+          (let ((pdom (block-dominators pred)))
+            (when pdom
+              (if dom
+                  (when (sset-intersection dom pdom)
+                    (setq changed t))
+                  (setq dom (copy-sset pdom) changed t)))))     
+        (setf (block-dominators block) dom)
+        (when dom (sset-adjoin block dom))))
+     (unless changed (return)))))
+
+
+;;; DOMINATES-P  --  Internal
+;;;
+;;;    Return true if BLOCK1 dominates BLOCK2, false otherwise.
+(defun dominates-p (block1 block2)
+  (let ((set (block-dominators block2)))
+    (if set
+       (sset-member block1 set)
+       t)))
+
+;;; LOOP-ANALYZE  --  Interface
+;;;
+;;; Set up the LOOP structures which describe the loops in the flow
+;;; graph for COMPONENT.  We NIL out any existing loop information,
+;;; and then scan through the blocks looking for blocks which are the
+;;; destination of a retreating edge: an edge that goes backward in
+;;; the DFO.  We then create LOOP structures to describe the loops
+;;; that have those blocks as their heads.  If find the head of a
+;;; strange loop, then we do some graph walking to find the other
+;;; segments in the strange loop.  After we have found the loop
+;;; structure, we walk it to initialize the block lists.
+(defun loop-analyze (component)
+  (let ((loop (component-outer-loop component)))
+    (do-blocks (block component :both)
+      (setf (block-loop block) nil))
+    (setf (loop-inferiors loop) ())
+    (setf (loop-blocks loop) nil)
+    (do-blocks (block component)
+      (let ((number (block-number block)))
+       (dolist (pred (block-pred block))
+         (when (<= (block-number pred) number)
+           (when (note-loop-head block component)
+             (clear-flags component)
+             (setf (block-flag block) :good)
+             (dolist (succ (block-succ block))
+               (find-strange-loop-blocks succ block))
+             (find-strange-loop-segments block component))
+           (return)))))
+    (find-loop-blocks (component-outer-loop component))))
+
+
+;;; FIND-LOOP-BLOCKS  --  Internal
+;;;
+;;; This function initializes the block lists for LOOP and the loops
+;;; nested within it.  We recursively descend into the loop nesting
+;;; and place the blocks in the appropriate loop on the way up.  When
+;;; we are done, we scan the blocks looking for exits.  An exit is
+;;; always a block that has a successor which doesn't have a LOOP
+;;; assigned yet, since the target of the exit must be in a superior
+;;; loop.
+;;;
+;;; We find the blocks by doing a forward walk from the head of the
+;;; loop and from any exits of nested loops.  The walks from inferior
+;;; loop exits are necessary because the walks from the head terminate
+;;; when they encounter a block in an inferior loop.
+(defun find-loop-blocks (loop)
+  (dolist (sub-loop (loop-inferiors loop))
+    (find-loop-blocks sub-loop))
+
+  (find-blocks-from-here (loop-head loop) loop)
+  (dolist (sub-loop (loop-inferiors loop))
+    (dolist (exit (loop-exits sub-loop))
+      (dolist (succ (block-succ exit))
+       (find-blocks-from-here succ loop))))
+  
+  (collect ((exits))
+    (dolist (sub-loop (loop-inferiors loop))
+      (dolist (exit (loop-exits sub-loop))
+       (dolist (succ (block-succ exit))
+         (unless (block-loop succ)
+           (exits exit)
+           (return)))))
+    
+    (do ((block (loop-blocks loop) (block-loop-next block)))
+       ((null block))
+      (dolist (succ (block-succ block))
+       (unless (block-loop succ)
+         (exits block)
+         (return))))    
+    (setf (loop-exits loop) (exits))))
+
+
+;;; FIND-BLOCKS-FROM-HERE  --  Internal
+;;;
+;;; This function does a graph walk to find the blocks directly within
+;;; LOOP that can be reached by a forward walk from BLOCK.  If BLOCK
+;;; is already in a loop or is not dominated by the LOOP-HEAD, then we
+;;; return.  Otherwise, we add the block to the BLOCKS for LOOP and
+;;; recurse on its successors.
+(defun find-blocks-from-here (block loop)
+  (when (and (not (block-loop block))
+            (dominates-p (loop-head loop) block))
+    (setf (block-loop block) loop)
+    (shiftf (block-loop-next block) (loop-blocks loop) block)
+    (dolist (succ (block-succ block))
+      (find-blocks-from-here succ loop))))
+
+
+;;; NOTE-LOOP-HEAD  --  Internal
+;;;
+;;; Create a loop structure to describe the loop headed by the block
+;;; HEAD.  If there is one already, just return.  If some retreating
+;;; edge into the head is from a block which isn't dominated by the
+;;; head, then we have the head of a strange loop segment.  We return
+;;; true if HEAD is part of a newly discovered strange loop.
+(defun note-loop-head (head component)
+  (let ((superior (find-superior head (component-outer-loop component))))
+    (unless (eq (loop-head superior) head)
+      (let ((result (make-loop :head head
+                              :kind :natural
+                              :superior superior
+                              :depth (1+ (loop-depth superior))))
+           (number (block-number head)))
+       (push result (loop-inferiors superior))
+       (dolist (pred (block-pred head))
+         (when (<= (block-number pred) number)
+           (if (dominates-p head pred)
+               (push pred (loop-tail result))
+               (setf (loop-kind result) :strange))))
+       (eq (loop-kind result) :strange)))))
+
+
+;;; FIND-SUPERIOR  --  Internal
+;;;
+;;; Find the loop which would be the superior of a loop headed by
+;;; HEAD.  If there is already a loop with that head, then return that
+;;; loop.
+(defun find-superior (head loop)
+  (if (eq (loop-head loop) head)
+      loop
+      (dolist (inferior (loop-inferiors loop) loop)
+       (when (dominates-p (loop-head inferior) head)
+         (return (find-superior head inferior))))))
+
+
+;;; FIND-STRANGE-LOOP-BLOCKS  --  Internal
+;;;
+;;; Do a graph walk to find the blocks in the strange loop which HEAD
+;;; is in.  BLOCK is the block we are currently at and COMPONENT is
+;;; the component we are in.  We do a walk forward from block, using
+;;; only edges which are not back edges.  We return true if there is a
+;;; path from BLOCK to HEAD, false otherwise.  If the BLOCK-FLAG is
+;;; true then we return.  We use two non-null values of FLAG to
+;;; indicate whether a path from the BLOCK back to HEAD was found.
+(defun find-strange-loop-blocks (block head)
+  (let ((flag (block-flag block)))
+    (cond (flag
+          (if (eq flag :good)
+              t
+              nil))
+         (t
+          (setf (block-flag block) :bad)
+          (unless (dominates-p block head)
+            (dolist (succ (block-succ block))
+              (when (find-strange-loop-blocks succ head)
+                (setf (block-flag block) :good))))
+          (eq (block-flag block) :good)))))
+
+;;; FIND-STRANGE-LOOP-SEGMENTS  --  Internal
+;;;
+;;; Do a graph walk to find the segments in the strange loop that has
+;;; BLOCK in it.  We walk forward, looking only at blocks in the loop
+;;; (flagged as :GOOD.)  Each block in the loop that has predecessors
+;;; outside of the loop is the head of a segment.  We enter the LOOP
+;;; structures in COMPONENT.
+(defun find-strange-loop-segments (block component)
+  (when (eq (block-flag block) :good)
+    (setf (block-flag block) :done)
+    (unless (every #'(lambda (x) (member (block-flag x) '(:good :done)))
+                  (block-pred block))
+      (note-loop-head block component))
+    (dolist (succ (block-succ block))
+      (find-strange-loop-segments succ component))))
diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp

index 01d9c6f..0951cc8 100644 (file)
--- a/src/compiler/main.lisp
+++ b/src/compiler/main.lisp
@@ -38,6 +38,9 @@
  (defvar *check-consistency* nil)
  (defvar *all-components*)
  
+;;; Set to NIL to disable loop analysis for register allocation. 
+(defvar *loop-analyze* t)
+
  ;;; Bind this to a stream to capture various internal debugging output.
  (defvar *compiler-trace-output* nil)
  
@@ -523,6 +526,24 @@
  
      (ir1-phases component)
  
+    (when *loop-analyze* 
+      (find-dominators component)
+      (loop-analyze component))
+
+    #|
+    (when (and *loop-analyze* *compiler-trace-output*)
+      (labels ((print-blocks (block)
+                (format *compiler-trace-output* "    ~A~%" block)
+                (when (block-loop-next block)
+                  (print-blocks (block-loop-next block))))
+              (print-loop (loop)
+                (format *compiler-trace-output* "loop=~A~%" loop)
+                (print-blocks (loop-blocks loop))
+                (dolist (l (loop-inferiors loop))
+                  (print-loop l))))
+       (print-loop (component-outer-loop component))))
+    |#
+    
      ;; FIXME: What is MAYBE-MUMBLE for? Do we need it any more?
      (maybe-mumble "env ")
      (physenv-analyze component)
diff --git a/src/compiler/meta-vmdef.lisp b/src/compiler/meta-vmdef.lisp

index 805761f..d464ce4 100644 (file)
--- a/src/compiler/meta-vmdef.lisp
+++ b/src/compiler/meta-vmdef.lisp
@@ -72,7 +72,10 @@
                (/show0 "doing third SETF")
                (setf (finite-sb-live-tns res)
                      (make-array ',size :initial-element nil))
-              (/show0 "doing fourth and final SETF")
+              (/show0 "doing fourth SETF")
+              (setf (finite-sb-always-live-count res)
+                    (make-array ',size :initial-element 0))
+              (/show0 "doing fifth and final SETF")
                (setf (gethash ',name *backend-sb-names*)
                      res)))
  
diff --git a/src/compiler/node.lisp b/src/compiler/node.lisp

index d9ff1eb..4f0ad48 100644 (file)
--- a/src/compiler/node.lisp
+++ b/src/compiler/node.lisp
@@ -227,6 +227,13 @@
    (gen nil)
    (in nil)
    (out nil)
+  ;; Set of all blocks that dominate this block. NIL is interpreted
+  ;; as "all blocks in component". 
+  (dominators nil :type (or null sset))
+  ;; the LOOP that this block belongs to
+  (loop nil :type (or null cloop))
+  ;; next block in the loop.
+  (loop-next nil :type (or null cblock))
    ;; the component this block is in, or NIL temporarily during IR1
    ;; conversion and in deleted blocks
    (component (progn
@@ -378,7 +385,9 @@
    (delete-blocks nil :type list)
    (nlx-info-generated-p nil :type boolean)
    ;; this is filled by physical environment analysis
-  (dx-lvars nil :type list))
+  (dx-lvars nil :type list)
+  ;; The default LOOP in the component.
+  (outer-loop (make-loop :kind :outer :head head) :type cloop))
  (defprinter (component :identity t)
    name
    #!+sb-show id
diff --git a/src/compiler/pack.lisp b/src/compiler/pack.lisp

index a03d156..e28b767 100644 (file)
--- a/src/compiler/pack.lisp
+++ b/src/compiler/pack.lisp
@@ -92,7 +92,7 @@
  ;;;    the bits, otherwise we OR in the local conflict bits.
  ;;; -- If the TN is local, then we just do the block it is local to,
  ;;;    setting always-live and OR'ing in the local conflicts.
-(defun add-location-conflicts (tn sc offset)
+(defun add-location-conflicts (tn sc offset optimize)
    (declare (type tn tn) (type sc sc) (type index offset))
    (let ((confs (tn-global-conflicts tn))
         (sb (sc-sb sc))
@@ -123,7 +123,24 @@
           (let ((num (ir2-block-number (tn-local tn))))
             (setf (sbit loc-live num) 1)
             (bit-ior (the local-tn-bit-vector (svref loc-confs num))
-                    (tn-local-conflicts tn) t))))))))
+                    (tn-local-conflicts tn) t))))
+       ;; Calculating ALWAYS-LIVE-COUNT is moderately expensive, and
+       ;; currently the information isn't used unless (> SPEED
+       ;; COMPILE-SPEED).
+       (when optimize
+         (setf (svref (finite-sb-always-live-count sb) this-offset)
+               (find-location-usage sb this-offset))))))
+  (values))
+
+;; A rought measure of how much a given OFFSET in SB is currently
+;; used. Current implementation counts the amount of blocks where the
+;; offset has been marked as ALWAYS-LIVE.
+(defun find-location-usage (sb offset)
+  (declare (optimize speed))
+  (declare (type sb sb) (type index offset))
+  (let* ((always-live (svref (finite-sb-always-live sb) offset)))
+    (declare (simple-bit-vector always-live))
+    (count 1 always-live)))
  
  ;;; Return the total number of IR2-BLOCKs in COMPONENT.
  (defun ir2-block-count (component)
@@ -144,6 +161,7 @@
        (unless (eq (sb-kind sb) :non-packed)
         (let* ((conflicts (finite-sb-conflicts sb))
                (always-live (finite-sb-always-live sb))
+              (always-live-count (finite-sb-always-live-count sb))
                (max-locs (length conflicts))
                (last-count (finite-sb-last-block-count sb)))
           (unless (zerop max-locs)
@@ -172,7 +190,8 @@
                       (setf (svref conflicts i) new-vec))
                     (setf (svref always-live i)
                           (make-array new-size :element-type 'bit
-                                     :initial-element 0)))))
+                                     :initial-element 0))
+                   (setf (svref always-live-count i) 0))))
                (t
                 (dotimes (i (finite-sb-current-size sb))
                   (declare (type index i))
@@ -181,7 +200,8 @@
                     (dotimes (j last-count)
                       (declare (type index j))
                       (clear-bit-vector (svref conf j))))
-                 (clear-bit-vector (svref always-live i)))))))
+                 (clear-bit-vector (svref always-live i))
+                 (setf (svref always-live-count i) 0))))))
  
           (setf (finite-sb-last-block-count sb) nblocks)
           (setf (finite-sb-current-size sb) (sb-size sb))
@@ -233,6 +253,12 @@
                             :element-type 'bit)))
         (setf (finite-sb-always-live sb) new-live))
  
+      (let ((new-live-count (make-array new-size)))
+       (declare (optimize speed)) ;; FILL deftransform
+       (replace new-live-count (finite-sb-always-live-count sb))
+       (fill new-live-count 0 :start size)
+       (setf (finite-sb-always-live-count sb) new-live-count))
+       
        (let ((new-tns (make-array new-size :initial-element nil)))
         (replace new-tns (finite-sb-live-tns sb))
         (fill (finite-sb-live-tns sb) nil)
@@ -421,7 +447,7 @@
         (setf (tn-save-tn tn) res)
         (setf (tn-save-tn res) tn)
         (setf (tn-sc res) alt)
-       (pack-tn res t)
+       (pack-tn res t nil)
         (return res)))))
  
  ;;; Find the load function for moving from SRC to DEST and emit a
@@ -756,6 +782,40 @@
           ((null ref))
         (incf cost))
        (setf (tn-cost tn) cost))))
+
+;;; Iterate over the normal TNs, storing the depth of the deepest loop
+;;; that the TN is used in TN-LOOP-DEPTH.
+(defun assign-tn-depths (component)
+  (when *loop-analyze* 
+    (do-ir2-blocks (block component)
+      (do ((vop (ir2-block-start-vop block)
+               (vop-next vop)))
+         ((null vop))
+       (flet ((find-all-tns (head-fun)
+                (collect ((tns))
+                  (do ((ref (funcall head-fun vop) (tn-ref-across ref)))
+                      ((null ref))
+                    (tns (tn-ref-tn ref)))
+                  (tns))))
+         (dolist (tn (nconc (find-all-tns #'vop-args)
+                            (find-all-tns #'vop-results)
+                            (find-all-tns #'vop-temps)
+                            ;; What does "references in this VOP
+                            ;; mean"? Probably something that isn't
+                            ;; useful in this context, since these
+                            ;; TN-REFs are linked with TN-REF-NEXT
+                            ;; instead of TN-REF-ACROSS. --JES
+                            ;; 2004-09-11
+                            ;; (find-all-tns #'vop-refs)
+                            ))
+           (setf (tn-loop-depth tn)
+                 (max (tn-loop-depth tn)
+                      (let* ((ir1-block (ir2-block-block (vop-block vop)))
+                             (loop (block-loop ir1-block)))
+                        (if loop
+                            (loop-depth loop)
+                            0))))))))))
+
  \f
  ;;;; load TN packing
  
@@ -1229,68 +1289,52 @@
  ;;;; location selection
  
  ;;; Select some location for TN in SC, returning the offset if we
-;;; succeed, and NIL if we fail. We start scanning at the Last-Offset
-;;; in an attempt to distribute the TNs across all storage.
+;;; succeed, and NIL if we fail.
  ;;;
-;;; We call OFFSET-CONFLICTS-IN-SB directly, rather than using
-;;; CONFLICTS-IN-SC. This allows us to more efficient in packing
-;;; multi-location TNs: we don't have to multiply the number of tests
-;;; by the TN size. This falls out naturally, since we have to be
-;;; aware of TN size anyway so that we don't call CONFLICTS-IN-SC on a
-;;; bogus offset.
+;;; For :UNBOUNDED SCs just find the smallest correctly aligned offset
+;;; where the TN doesn't conflict with the TNs that have already been
+;;; packed. For :FINITE SCs try to pack the TN into the most heavily
+;;; used locations first (as estimated in FIND-LOCATION-USAGE).
  ;;;
-;;; We give up on finding a location after our current pointer has
-;;; wrapped twice. This will result in testing some locations twice in
-;;; the case that we fail, but is simpler than trying to figure out
-;;; the soonest failure point.
-;;;
-;;; We also give up without bothering to wrap if the current size
-;;; isn't large enough to hold a single element of element-size
-;;; without bothering to wrap. If it doesn't fit this iteration, it
-;;; won't fit next.
-;;;
-;;; ### Note that we actually try to pack as many consecutive TNs as
-;;; possible in the same location, since we start scanning at the same
-;;; offset that the last TN was successfully packed in. This is a
-;;; weakening of the scattering hueristic that was put in to prevent
-;;; restricted VOP temps from hogging all of the registers. This way,
-;;; all of these temps probably end up in one register.
-(defun select-location (tn sc &optional use-reserved-locs)
+;;; Historically SELECT-LOCATION tried did the opposite and tried to
+;;; distribute the TNs evenly across the available locations. At least
+;;; on register-starved architectures (x86) this seems to be a bad
+;;; strategy. -- JES 2004-09-11
+(defun select-location (tn sc &key use-reserved-locs optimize)
    (declare (type tn tn) (type sc sc) (inline member))
    (let* ((sb (sc-sb sc))
          (element-size (sc-element-size sc))
          (alignment (sc-alignment sc))
          (align-mask (1- alignment))
-        (size (finite-sb-current-size sb))
-        (start-offset (finite-sb-last-offset sb)))
-    (let ((current-start
-          (logandc2 (the index (+ start-offset align-mask)) align-mask))
-         (wrap-p nil))
-      (declare (type index current-start))
-      (loop
-       (when (> (+ current-start element-size) size)
-         (cond ((or wrap-p (> element-size size))
-                (return nil))
-               (t
-                (setq current-start 0)
-                (setq wrap-p t))))
-
-       (if (or (eq (sb-kind sb) :unbounded)
-               (and (member current-start (sc-locations sc))
-                    (or use-reserved-locs
-                        (not (member current-start
-                                     (sc-reserve-locations sc))))))
-           (dotimes (i element-size
-                       (return-from select-location current-start))
-             (declare (type index i))
-             (let ((offset (+ current-start i)))
-               (when (offset-conflicts-in-sb tn sb offset)
-                 (setq current-start
-                       (logandc2 (the index (+ (the index (1+ offset))
-                                               align-mask))
-                                 align-mask))
-                 (return))))
-           (incf current-start alignment))))))
+        (size (finite-sb-current-size sb)))
+    (flet ((attempt-location (start-offset)
+            (dotimes (i element-size
+                      (return-from select-location start-offset))
+              (declare (type index i))
+              (let ((offset (+ start-offset i)))
+                (when (offset-conflicts-in-sb tn sb offset)
+                  (return (logandc2 (the index (+ (the index (1+ offset))
+                                                  align-mask))
+                                    align-mask)))))))
+      (if (eq (sb-kind sb) :unbounded)
+         (loop with offset = 0
+               until (> (+ offset element-size) size) do
+               (setf offset (attempt-location offset)))        
+         (let ((locations (sc-locations sc)))
+           (when optimize
+             (setf locations
+                   (stable-sort (copy-list locations) #'>
+                                :key (lambda (location-offset)
+                                       (loop for offset from location-offset
+                                             repeat element-size
+                                             maximize (svref
+                                                       (finite-sb-always-live-count sb)
+                                                       offset))))))
+           (dolist (offset locations)
+             (when (or use-reserved-locs
+                       (not (member offset
+                                    (sc-reserve-locations sc))))
+               (attempt-location offset))))))))
  
  ;;; If a save TN, return the saved TN, otherwise return TN. This is
  ;;; useful for getting the conflicts of a TN that might be a save TN.
@@ -1312,7 +1356,7 @@
  ;;; If we are attempting to pack in the SC of the save TN for a TN
  ;;; with a :SPECIFIED-SAVE TN, then we pack in that location, instead
  ;;; of allocating a new stack location.
-(defun pack-tn (tn restricted)
+(defun pack-tn (tn restricted optimize)
    (declare (type tn tn))
    (let* ((original (original-tn tn))
          (fsc (tn-sc tn))
@@ -1322,13 +1366,12 @@
           (when (and save
                      (eq (tn-kind save) :specified-save))
             (tn-sc save))))
-
      (do ((sc fsc (pop alternates)))
         ((null sc)
          (failed-to-pack-error tn restricted))
        (when (eq sc specified-save-sc)
         (unless (tn-offset save)
-         (pack-tn save nil))
+         (pack-tn save nil optimize))
         (setf (tn-offset tn) (tn-offset save))
         (setf (tn-sc tn) (tn-sc save))
         (return))
@@ -1337,17 +1380,16 @@
         (let ((loc (or (find-ok-target-offset original sc)
                        (select-location original sc)
                        (and restricted
-                           (select-location original sc t))
+                           (select-location original sc :use-reserved-locs t))
                        (when (eq (sb-kind (sc-sb sc)) :unbounded)
                          (grow-sc sc)
                          (or (select-location original sc)
                              (error "failed to pack after growing SC?"))))))
           (when loc
-           (add-location-conflicts original sc loc)
+           (add-location-conflicts original sc loc optimize)
             (setf (tn-sc tn) sc)
             (setf (tn-offset tn) loc)
             (return))))))
-
    (values))
  
  ;;; Pack a wired TN, checking that the offset is in bounds for the SB,
@@ -1360,7 +1402,7 @@
  ;;; locations for local call arguments (such as OLD-FP) work, since
  ;;; the caller and callee OLD-FP save locations may conflict when the
  ;;; save locations don't really (due to being in different frames.)
-(defun pack-wired-tn (tn)
+(defun pack-wired-tn (tn optimize)
    (declare (type tn tn))
    (let* ((sc (tn-sc tn))
          (sb (sc-sb sc))
@@ -1410,7 +1452,7 @@
                (conflicts-in-sc original sc offset))
        (error "~S is wired to a location that it conflicts with." tn))
  
-    (add-location-conflicts original sc offset)))
+    (add-location-conflicts original sc offset optimize)))
  
  (defevent repack-block "Repacked a block due to TN unpacking.")
  
@@ -1444,6 +1486,12 @@
                           ;; construction of this SIMPLE-BIT-VECTOR
                           ;; until runtime.
                           #+sb-xc (make-array 0 :element-type 'bit)))
+       (setf (finite-sb-always-live-count sb)
+             (make-array size
+                         :initial-element    
+                         #-sb-xc #*
+                         ;; Ibid
+                         #+sb-xc (make-array 0 :element-type 'fixnum)))
         
         (fill (finite-sb-conflicts sb) nil)
         (setf (finite-sb-conflicts sb)
@@ -1455,11 +1503,26 @@
  
  (defun pack (component)
    (unwind-protect
-       (let ((optimize (policy *lexenv*
-                              (or (>= speed compilation-speed)
-                                  (>= space compilation-speed))))
+       (let ((optimize nil)
              (2comp (component-info component)))
          (init-sb-vectors component)
+
+        ;; Determine whether we want to do more expensive packing by
+        ;; checking whether any blocks in the component have (> SPEED
+        ;; COMPILE-SPEED).
+        ;; 
+        ;; FIXME: This means that a declaration can have a minor
+        ;; effect even outside its scope, and as the packing is done
+        ;; component-globally it'd be tricky to use strict scoping. I
+        ;; think this is still acceptable since it's just a tradeoff
+        ;; between compilation speed and allocation quality and
+        ;; doesn't affect the semantics of the generated code in any
+        ;; way. -- JES 2004-10-06
+        (do-ir2-blocks (block component)
+          (when (policy (block-last (ir2-block-block block))
+                        (> speed compilation-speed))
+            (setf optimize t)
+            (return)))
          
          ;; Call the target functions.
          (do-ir2-blocks (block component)
@@ -1469,49 +1532,66 @@
                (when target-fun
                  (funcall target-fun vop)))))
          
-        
          ;; Pack wired TNs first.
          (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn)))
              ((null tn))
-          (pack-wired-tn tn))
+          (pack-wired-tn tn optimize))
          
          ;; Pack restricted component TNs.
          (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
              ((null tn))
            (when (eq (tn-kind tn) :component)
-            (pack-tn tn t)))
+            (pack-tn tn t optimize)))
          
          ;; Pack other restricted TNs.
          (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
              ((null tn))
            (unless (tn-offset tn)
-            (pack-tn tn t)))
-        
-        ;; Assign costs to normal TNs so we know which ones should always
-        ;; be packed on the stack.
-        (when (and optimize *pack-assign-costs*)
-          (assign-tn-costs component))
-        
-        ;; Pack normal TNs in the order that they appear in the code. This
-        ;; should have some tendency to pack important TNs first, since
-        ;; control analysis favors the drop-through. This should also help
-        ;; targeting, since we will pack the target TN soon after we
-        ;; determine the location of the targeting TN.
-        (do-ir2-blocks (block component)
-          (let ((ltns (ir2-block-local-tns block)))
-            (do ((i (1- (ir2-block-local-tn-count block)) (1- i)))
-                ((minusp i))
-              (declare (fixnum i))
-              (let ((tn (svref ltns i)))
-                (unless (or (null tn) (eq tn :more) (tn-offset tn))
-                  (pack-tn tn nil))))))
+            (pack-tn tn t optimize)))
          
+        ;; Assign costs to normal TNs so we know which ones should
+        ;; always be packed on the stack.
+        (when *pack-assign-costs*
+          (assign-tn-costs component)
+          (assign-tn-depths component))
+
+        ;; Allocate normal TNs, starting with the TNs that are used
+        ;; in deep loops.
+        (collect ((tns))
+          (do-ir2-blocks (block component)
+            (let ((ltns (ir2-block-local-tns block)))
+              (do ((i (1- (ir2-block-local-tn-count block)) (1- i)))
+                  ((minusp i))
+                (declare (fixnum i))
+                (let ((tn (svref ltns i)))
+                  (unless (or (null tn)
+                              (eq tn :more)
+                              (tn-offset tn))
+                    ;; If loop analysis has been disabled we might as
+                    ;; well revert to the old behaviour of just
+                    ;; packing TNs linearly as they appear.
+                    (unless *loop-analyze*
+                      (pack-tn tn nil optimize))
+                    (tns tn))))))
+          (dolist (tn (stable-sort (tns)
+                                   (lambda (a b)
+                                     (cond
+                                       ((> (tn-loop-depth a)
+                                           (tn-loop-depth b))
+                                        t)
+                                       ((= (tn-loop-depth a)
+                                           (tn-loop-depth b))
+                                        (> (tn-cost a) (tn-cost b)))
+                                       (t nil)))))
+            (unless (tn-offset tn)
+              (pack-tn tn nil optimize))))
+          
          ;; Pack any leftover normal TNs. This is to deal with :MORE TNs,
          ;; which could possibly not appear in any local TN map.
          (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn)))
              ((null tn))
            (unless (tn-offset tn)
-            (pack-tn tn nil)))
+            (pack-tn tn nil optimize)))
          
          ;; Do load TN packing and emit saves.
          (let ((*repack-blocks* nil))
@@ -1525,13 +1605,13 @@
                     (pack-load-tns block))))
            (when *repack-blocks*
              (loop
-             (when (zerop (hash-table-count *repack-blocks*)) (return))
-             (maphash (lambda (block v)
-                        (declare (ignore v))
-                        (remhash block *repack-blocks*)
-                        (event repack-block)
-                        (pack-load-tns block))
-                      *repack-blocks*))))
+                (when (zerop (hash-table-count *repack-blocks*)) (return))
+                (maphash (lambda (block v)
+                           (declare (ignore v))
+                           (remhash block *repack-blocks*)
+                           (event repack-block)
+                           (pack-load-tns block))
+                         *repack-blocks*))))
          
          (values))
      (clean-up-pack-structures)))
diff --git a/src/compiler/vop.lisp b/src/compiler/vop.lisp

index 4570b36..bf743c2 100644 (file)
--- a/src/compiler/vop.lisp
+++ b/src/compiler/vop.lisp
@@ -421,6 +421,47 @@
    home
    save-sp
    dynamic-state)
+
+(defstruct (cloop (:print-function print-cloop)
+                 (:conc-name loop-)
+                 (:predicate loop-p)
+                 (:constructor make-loop)
+                 (:copier copy-loop))
+  ;; The kind of loop that this is.  These values are legal:
+  ;;
+  ;;    :OUTER
+  ;;        This is the outermost loop structure, and represents all the
+  ;;        code in a component.
+  ;;
+  ;;    :NATURAL
+  ;;        A normal loop with only one entry.
+  ;;
+  ;;    :STRANGE
+  ;;        A segment of a "strange loop" in a non-reducible flow graph.
+  (kind (required-argument) :type (member :outer :natural :strange))
+  ;; The first and last blocks in the loop.  There may be more than one tail,
+  ;; since there may be multiple back branches to the same head.
+  (head nil :type (or cblock null))
+  (tail nil :type list)
+  ;; A list of all the blocks in this loop or its inferiors that have a
+  ;; successor outside of the loop.
+  (exits nil :type list)
+  ;; The loop that this loop is nested within.  This is null in the outermost
+  ;; loop structure.
+  (superior nil :type (or cloop null))
+  ;; A list of the loops nested directly within this one.
+  (inferiors nil :type list)
+  (depth 0 :type fixnum)
+  ;; The head of the list of blocks directly within this loop.  We must recurse
+  ;; on INFERIORS to find all the blocks.
+  (blocks nil :type (or null cblock)))
+
+(defprinter (cloop :conc-name LOOP-)
+  kind
+  head
+  tail
+  exits
+  depth)
  \f
  ;;;; VOPs and templates
  
@@ -738,6 +779,7 @@
    ;; is set, then the location is in use somewhere in the block, and
    ;; thus has a conflict for always-live TNs.
    (always-live '#() :type simple-vector)
+  (always-live-count '#() :type simple-vector)
    ;; a vector containing the TN currently live in each location in the
    ;; SB, or NIL if the location is unused. This is used during load-tn pack.
    (live-tns '#() :type simple-vector)
@@ -933,7 +975,9 @@
    (cost 0 :type fixnum)
    ;; If a :ENVIRONMENT or :DEBUG-ENVIRONMENT TN, this is the
    ;; physical environment that the TN is live throughout.
-  (physenv nil :type (or physenv null)))
+  (physenv nil :type (or physenv null))
+  ;; The depth of the deepest loop that this TN is used in.
+  (loop-depth 0 :type fixnum))
  (def!method print-object ((tn tn) stream)
    (print-unreadable-object (tn stream :type t)
      ;; KLUDGE: The distinction between PRINT-TN and PRINT-OBJECT on TN is
diff --git a/version.lisp-expr b/version.lisp-expr

index 1e230e8..3f4b407 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.8.15.12"
+"0.8.15.13"
author	Juho Snellman <jsnell@iki.fi>
	Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)
committer	Juho Snellman <jsnell@iki.fi>
	Tue, 12 Oct 2004 22:01:39 +0000 (22:01 +0000)
NEWS		patch \| blob \| history
OPTIMIZATIONS		patch \| blob \| history
build-order.lisp-expr		patch \| blob \| history
src/compiler/loop.lisp	[new file with mode: 0644]	patch \| blob
src/compiler/main.lisp		patch \| blob \| history
src/compiler/meta-vmdef.lisp		patch \| blob \| history
src/compiler/node.lisp		patch \| blob \| history
src/compiler/pack.lisp		patch \| blob \| history
src/compiler/vop.lisp		patch \| blob \| history
version.lisp-expr		patch \| blob \| history