From bffa99d35c7d50ac46b9eb7dbe25d1ab1a0e6145 Mon Sep 17 00:00:00 2001 From: Juho Snellman Date: Tue, 12 Oct 2004 22:01:39 +0000 Subject: [PATCH] 0.8.15.13: Port over / reconstruct ancient CMUCL loop analysis code. Improve register allocation: ... Pack TNs that are used in deep loops first, giving them a higher chance of getting allocated in a register. ... Inside loops, pack the most used TNs first. ... When (> SPEED COMPILE-SPEED) attempt to pack the TNs into the most used locations in the SB instead of the earlier behaviour of scattering them into as many locations as possible. This results in tighter allocation / fewer spills. --- NEWS | 1 + OPTIMIZATIONS | 28 ----- build-order.lisp-expr | 1 + src/compiler/loop.lisp | 218 ++++++++++++++++++++++++++++++++ src/compiler/main.lisp | 21 ++++ src/compiler/meta-vmdef.lisp | 5 +- src/compiler/node.lisp | 11 +- src/compiler/pack.lisp | 284 +++++++++++++++++++++++++++--------------- src/compiler/vop.lisp | 46 ++++++- version.lisp-expr | 2 +- 10 files changed, 483 insertions(+), 134 deletions(-) create mode 100644 src/compiler/loop.lisp diff --git a/NEWS b/NEWS index 06d43e5..b95d794 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ changes in sbcl-0.8.16 relative to sbcl-0.8.15: Beane) * fixed compiler failure, caused by instrumenting code during IR1-optimization. (Debian bug report #273606 by Gabor Melis) + * optimization: added loop analysis and improved register allocation * fixed some bugs revealed by Paul Dietz' test suite: ** POSITION on displaced vectors with non-zero displacement returns the right answer. diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index b0c8f47..5474bd2 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -62,34 +62,6 @@ VOP DATA-VECTOR-SET/SIMPLE-STRING V2!14[EDI] t32[EAX] t30[S2]>t33[CL] * And why two moves? -------------------------------------------------------------------------------- -#6 -09:49:05 I have found a case in those where suboptimal code is - generate with nested loops, it might be moderately easy to fix that -09:49:28 see - http://www.bagley.org/~doug/shootout/bench/nestedloop/nestedloop.cmucl -09:50:30 if you add declarations to dotimes, generated code is - almost optimal, but most inner loops run out of registers and use - memory location for iteration variable - -;;; -*- mode: lisp -*- -;;; http://www.bagley.org/~doug/shootout/ -;;; from Friedrich Dominicus - -(defun main () - (let ((n (parse-integer (or (car (last extensions:*command-line-strings*)) "1"))) - (x 0)) - (declare (fixnum n) - (fixnum x) - (optimize (speed 3) (debug 0) (safety 0))) - (dotimes (a n) - (dotimes (b n) - (dotimes (c n) - (dotimes (d n) - (dotimes (e n) - (dotimes (f n) - (incf x))))))) - (format t "~A~%" x))) --------------------------------------------------------------------------------- #8 (defun foo (d) (declare (optimize (speed 3) (safety 0) (debug 0))) diff --git a/build-order.lisp-expr b/build-order.lisp-expr index 441e757..0cdd20b 100644 --- a/build-order.lisp-expr +++ b/build-order.lisp-expr @@ -480,6 +480,7 @@ ("src/compiler/ir1util") ("src/compiler/ir1report") ("src/compiler/ir1opt") + ("src/compiler/loop") ("src/compiler/ir1final") ("src/compiler/array-tran") diff --git a/src/compiler/loop.lisp b/src/compiler/loop.lisp new file mode 100644 index 0000000..c43c056 --- /dev/null +++ b/src/compiler/loop.lisp @@ -0,0 +1,218 @@ +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +;;; ********************************************************************** +;;; +;;; Stuff to annotate the flow graph with information about the loops in it. +;;; +;;; Written by Rob MacLachlan +(in-package "SB!C") + +;;; FIND-DOMINATORS -- Internal +;;; +;;; Find the set of blocks that dominates each block in COMPONENT. We +;;; assume that the DOMINATORS for each block is initially NIL, which +;;; serves to represent the set of all blocks. If a block is not +;;; reachable from an entry point, then its dominators will still be +;;; NIL when we are done. +(defun find-dominators (component) + (let ((head (loop-head (component-outer-loop component))) + changed) + (let ((set (make-sset))) + (sset-adjoin head set) + (setf (block-dominators head) set)) + (loop + (setq changed nil) + (do-blocks (block component :tail) + (let ((dom (block-dominators block))) + (when dom (sset-delete block dom)) + (dolist (pred (block-pred block)) + (let ((pdom (block-dominators pred))) + (when pdom + (if dom + (when (sset-intersection dom pdom) + (setq changed t)) + (setq dom (copy-sset pdom) changed t))))) + (setf (block-dominators block) dom) + (when dom (sset-adjoin block dom)))) + (unless changed (return))))) + + +;;; DOMINATES-P -- Internal +;;; +;;; Return true if BLOCK1 dominates BLOCK2, false otherwise. +(defun dominates-p (block1 block2) + (let ((set (block-dominators block2))) + (if set + (sset-member block1 set) + t))) + +;;; LOOP-ANALYZE -- Interface +;;; +;;; Set up the LOOP structures which describe the loops in the flow +;;; graph for COMPONENT. We NIL out any existing loop information, +;;; and then scan through the blocks looking for blocks which are the +;;; destination of a retreating edge: an edge that goes backward in +;;; the DFO. We then create LOOP structures to describe the loops +;;; that have those blocks as their heads. If find the head of a +;;; strange loop, then we do some graph walking to find the other +;;; segments in the strange loop. After we have found the loop +;;; structure, we walk it to initialize the block lists. +(defun loop-analyze (component) + (let ((loop (component-outer-loop component))) + (do-blocks (block component :both) + (setf (block-loop block) nil)) + (setf (loop-inferiors loop) ()) + (setf (loop-blocks loop) nil) + (do-blocks (block component) + (let ((number (block-number block))) + (dolist (pred (block-pred block)) + (when (<= (block-number pred) number) + (when (note-loop-head block component) + (clear-flags component) + (setf (block-flag block) :good) + (dolist (succ (block-succ block)) + (find-strange-loop-blocks succ block)) + (find-strange-loop-segments block component)) + (return))))) + (find-loop-blocks (component-outer-loop component)))) + + +;;; FIND-LOOP-BLOCKS -- Internal +;;; +;;; This function initializes the block lists for LOOP and the loops +;;; nested within it. We recursively descend into the loop nesting +;;; and place the blocks in the appropriate loop on the way up. When +;;; we are done, we scan the blocks looking for exits. An exit is +;;; always a block that has a successor which doesn't have a LOOP +;;; assigned yet, since the target of the exit must be in a superior +;;; loop. +;;; +;;; We find the blocks by doing a forward walk from the head of the +;;; loop and from any exits of nested loops. The walks from inferior +;;; loop exits are necessary because the walks from the head terminate +;;; when they encounter a block in an inferior loop. +(defun find-loop-blocks (loop) + (dolist (sub-loop (loop-inferiors loop)) + (find-loop-blocks sub-loop)) + + (find-blocks-from-here (loop-head loop) loop) + (dolist (sub-loop (loop-inferiors loop)) + (dolist (exit (loop-exits sub-loop)) + (dolist (succ (block-succ exit)) + (find-blocks-from-here succ loop)))) + + (collect ((exits)) + (dolist (sub-loop (loop-inferiors loop)) + (dolist (exit (loop-exits sub-loop)) + (dolist (succ (block-succ exit)) + (unless (block-loop succ) + (exits exit) + (return))))) + + (do ((block (loop-blocks loop) (block-loop-next block))) + ((null block)) + (dolist (succ (block-succ block)) + (unless (block-loop succ) + (exits block) + (return)))) + (setf (loop-exits loop) (exits)))) + + +;;; FIND-BLOCKS-FROM-HERE -- Internal +;;; +;;; This function does a graph walk to find the blocks directly within +;;; LOOP that can be reached by a forward walk from BLOCK. If BLOCK +;;; is already in a loop or is not dominated by the LOOP-HEAD, then we +;;; return. Otherwise, we add the block to the BLOCKS for LOOP and +;;; recurse on its successors. +(defun find-blocks-from-here (block loop) + (when (and (not (block-loop block)) + (dominates-p (loop-head loop) block)) + (setf (block-loop block) loop) + (shiftf (block-loop-next block) (loop-blocks loop) block) + (dolist (succ (block-succ block)) + (find-blocks-from-here succ loop)))) + + +;;; NOTE-LOOP-HEAD -- Internal +;;; +;;; Create a loop structure to describe the loop headed by the block +;;; HEAD. If there is one already, just return. If some retreating +;;; edge into the head is from a block which isn't dominated by the +;;; head, then we have the head of a strange loop segment. We return +;;; true if HEAD is part of a newly discovered strange loop. +(defun note-loop-head (head component) + (let ((superior (find-superior head (component-outer-loop component)))) + (unless (eq (loop-head superior) head) + (let ((result (make-loop :head head + :kind :natural + :superior superior + :depth (1+ (loop-depth superior)))) + (number (block-number head))) + (push result (loop-inferiors superior)) + (dolist (pred (block-pred head)) + (when (<= (block-number pred) number) + (if (dominates-p head pred) + (push pred (loop-tail result)) + (setf (loop-kind result) :strange)))) + (eq (loop-kind result) :strange))))) + + +;;; FIND-SUPERIOR -- Internal +;;; +;;; Find the loop which would be the superior of a loop headed by +;;; HEAD. If there is already a loop with that head, then return that +;;; loop. +(defun find-superior (head loop) + (if (eq (loop-head loop) head) + loop + (dolist (inferior (loop-inferiors loop) loop) + (when (dominates-p (loop-head inferior) head) + (return (find-superior head inferior)))))) + + +;;; FIND-STRANGE-LOOP-BLOCKS -- Internal +;;; +;;; Do a graph walk to find the blocks in the strange loop which HEAD +;;; is in. BLOCK is the block we are currently at and COMPONENT is +;;; the component we are in. We do a walk forward from block, using +;;; only edges which are not back edges. We return true if there is a +;;; path from BLOCK to HEAD, false otherwise. If the BLOCK-FLAG is +;;; true then we return. We use two non-null values of FLAG to +;;; indicate whether a path from the BLOCK back to HEAD was found. +(defun find-strange-loop-blocks (block head) + (let ((flag (block-flag block))) + (cond (flag + (if (eq flag :good) + t + nil)) + (t + (setf (block-flag block) :bad) + (unless (dominates-p block head) + (dolist (succ (block-succ block)) + (when (find-strange-loop-blocks succ head) + (setf (block-flag block) :good)))) + (eq (block-flag block) :good))))) + +;;; FIND-STRANGE-LOOP-SEGMENTS -- Internal +;;; +;;; Do a graph walk to find the segments in the strange loop that has +;;; BLOCK in it. We walk forward, looking only at blocks in the loop +;;; (flagged as :GOOD.) Each block in the loop that has predecessors +;;; outside of the loop is the head of a segment. We enter the LOOP +;;; structures in COMPONENT. +(defun find-strange-loop-segments (block component) + (when (eq (block-flag block) :good) + (setf (block-flag block) :done) + (unless (every #'(lambda (x) (member (block-flag x) '(:good :done))) + (block-pred block)) + (note-loop-head block component)) + (dolist (succ (block-succ block)) + (find-strange-loop-segments succ component)))) diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp index 01d9c6f..0951cc8 100644 --- a/src/compiler/main.lisp +++ b/src/compiler/main.lisp @@ -38,6 +38,9 @@ (defvar *check-consistency* nil) (defvar *all-components*) +;;; Set to NIL to disable loop analysis for register allocation. +(defvar *loop-analyze* t) + ;;; Bind this to a stream to capture various internal debugging output. (defvar *compiler-trace-output* nil) @@ -523,6 +526,24 @@ (ir1-phases component) + (when *loop-analyze* + (find-dominators component) + (loop-analyze component)) + + #| + (when (and *loop-analyze* *compiler-trace-output*) + (labels ((print-blocks (block) + (format *compiler-trace-output* " ~A~%" block) + (when (block-loop-next block) + (print-blocks (block-loop-next block)))) + (print-loop (loop) + (format *compiler-trace-output* "loop=~A~%" loop) + (print-blocks (loop-blocks loop)) + (dolist (l (loop-inferiors loop)) + (print-loop l)))) + (print-loop (component-outer-loop component)))) + |# + ;; FIXME: What is MAYBE-MUMBLE for? Do we need it any more? (maybe-mumble "env ") (physenv-analyze component) diff --git a/src/compiler/meta-vmdef.lisp b/src/compiler/meta-vmdef.lisp index 805761f..d464ce4 100644 --- a/src/compiler/meta-vmdef.lisp +++ b/src/compiler/meta-vmdef.lisp @@ -72,7 +72,10 @@ (/show0 "doing third SETF") (setf (finite-sb-live-tns res) (make-array ',size :initial-element nil)) - (/show0 "doing fourth and final SETF") + (/show0 "doing fourth SETF") + (setf (finite-sb-always-live-count res) + (make-array ',size :initial-element 0)) + (/show0 "doing fifth and final SETF") (setf (gethash ',name *backend-sb-names*) res))) diff --git a/src/compiler/node.lisp b/src/compiler/node.lisp index d9ff1eb..4f0ad48 100644 --- a/src/compiler/node.lisp +++ b/src/compiler/node.lisp @@ -227,6 +227,13 @@ (gen nil) (in nil) (out nil) + ;; Set of all blocks that dominate this block. NIL is interpreted + ;; as "all blocks in component". + (dominators nil :type (or null sset)) + ;; the LOOP that this block belongs to + (loop nil :type (or null cloop)) + ;; next block in the loop. + (loop-next nil :type (or null cblock)) ;; the component this block is in, or NIL temporarily during IR1 ;; conversion and in deleted blocks (component (progn @@ -378,7 +385,9 @@ (delete-blocks nil :type list) (nlx-info-generated-p nil :type boolean) ;; this is filled by physical environment analysis - (dx-lvars nil :type list)) + (dx-lvars nil :type list) + ;; The default LOOP in the component. + (outer-loop (make-loop :kind :outer :head head) :type cloop)) (defprinter (component :identity t) name #!+sb-show id diff --git a/src/compiler/pack.lisp b/src/compiler/pack.lisp index a03d156..e28b767 100644 --- a/src/compiler/pack.lisp +++ b/src/compiler/pack.lisp @@ -92,7 +92,7 @@ ;;; the bits, otherwise we OR in the local conflict bits. ;;; -- If the TN is local, then we just do the block it is local to, ;;; setting always-live and OR'ing in the local conflicts. -(defun add-location-conflicts (tn sc offset) +(defun add-location-conflicts (tn sc offset optimize) (declare (type tn tn) (type sc sc) (type index offset)) (let ((confs (tn-global-conflicts tn)) (sb (sc-sb sc)) @@ -123,7 +123,24 @@ (let ((num (ir2-block-number (tn-local tn)))) (setf (sbit loc-live num) 1) (bit-ior (the local-tn-bit-vector (svref loc-confs num)) - (tn-local-conflicts tn) t)))))))) + (tn-local-conflicts tn) t)))) + ;; Calculating ALWAYS-LIVE-COUNT is moderately expensive, and + ;; currently the information isn't used unless (> SPEED + ;; COMPILE-SPEED). + (when optimize + (setf (svref (finite-sb-always-live-count sb) this-offset) + (find-location-usage sb this-offset)))))) + (values)) + +;; A rought measure of how much a given OFFSET in SB is currently +;; used. Current implementation counts the amount of blocks where the +;; offset has been marked as ALWAYS-LIVE. +(defun find-location-usage (sb offset) + (declare (optimize speed)) + (declare (type sb sb) (type index offset)) + (let* ((always-live (svref (finite-sb-always-live sb) offset))) + (declare (simple-bit-vector always-live)) + (count 1 always-live))) ;;; Return the total number of IR2-BLOCKs in COMPONENT. (defun ir2-block-count (component) @@ -144,6 +161,7 @@ (unless (eq (sb-kind sb) :non-packed) (let* ((conflicts (finite-sb-conflicts sb)) (always-live (finite-sb-always-live sb)) + (always-live-count (finite-sb-always-live-count sb)) (max-locs (length conflicts)) (last-count (finite-sb-last-block-count sb))) (unless (zerop max-locs) @@ -172,7 +190,8 @@ (setf (svref conflicts i) new-vec)) (setf (svref always-live i) (make-array new-size :element-type 'bit - :initial-element 0))))) + :initial-element 0)) + (setf (svref always-live-count i) 0)))) (t (dotimes (i (finite-sb-current-size sb)) (declare (type index i)) @@ -181,7 +200,8 @@ (dotimes (j last-count) (declare (type index j)) (clear-bit-vector (svref conf j)))) - (clear-bit-vector (svref always-live i))))))) + (clear-bit-vector (svref always-live i)) + (setf (svref always-live-count i) 0)))))) (setf (finite-sb-last-block-count sb) nblocks) (setf (finite-sb-current-size sb) (sb-size sb)) @@ -233,6 +253,12 @@ :element-type 'bit))) (setf (finite-sb-always-live sb) new-live)) + (let ((new-live-count (make-array new-size))) + (declare (optimize speed)) ;; FILL deftransform + (replace new-live-count (finite-sb-always-live-count sb)) + (fill new-live-count 0 :start size) + (setf (finite-sb-always-live-count sb) new-live-count)) + (let ((new-tns (make-array new-size :initial-element nil))) (replace new-tns (finite-sb-live-tns sb)) (fill (finite-sb-live-tns sb) nil) @@ -421,7 +447,7 @@ (setf (tn-save-tn tn) res) (setf (tn-save-tn res) tn) (setf (tn-sc res) alt) - (pack-tn res t) + (pack-tn res t nil) (return res))))) ;;; Find the load function for moving from SRC to DEST and emit a @@ -756,6 +782,40 @@ ((null ref)) (incf cost)) (setf (tn-cost tn) cost)))) + +;;; Iterate over the normal TNs, storing the depth of the deepest loop +;;; that the TN is used in TN-LOOP-DEPTH. +(defun assign-tn-depths (component) + (when *loop-analyze* + (do-ir2-blocks (block component) + (do ((vop (ir2-block-start-vop block) + (vop-next vop))) + ((null vop)) + (flet ((find-all-tns (head-fun) + (collect ((tns)) + (do ((ref (funcall head-fun vop) (tn-ref-across ref))) + ((null ref)) + (tns (tn-ref-tn ref))) + (tns)))) + (dolist (tn (nconc (find-all-tns #'vop-args) + (find-all-tns #'vop-results) + (find-all-tns #'vop-temps) + ;; What does "references in this VOP + ;; mean"? Probably something that isn't + ;; useful in this context, since these + ;; TN-REFs are linked with TN-REF-NEXT + ;; instead of TN-REF-ACROSS. --JES + ;; 2004-09-11 + ;; (find-all-tns #'vop-refs) + )) + (setf (tn-loop-depth tn) + (max (tn-loop-depth tn) + (let* ((ir1-block (ir2-block-block (vop-block vop))) + (loop (block-loop ir1-block))) + (if loop + (loop-depth loop) + 0)))))))))) + ;;;; load TN packing @@ -1229,68 +1289,52 @@ ;;;; location selection ;;; Select some location for TN in SC, returning the offset if we -;;; succeed, and NIL if we fail. We start scanning at the Last-Offset -;;; in an attempt to distribute the TNs across all storage. +;;; succeed, and NIL if we fail. ;;; -;;; We call OFFSET-CONFLICTS-IN-SB directly, rather than using -;;; CONFLICTS-IN-SC. This allows us to more efficient in packing -;;; multi-location TNs: we don't have to multiply the number of tests -;;; by the TN size. This falls out naturally, since we have to be -;;; aware of TN size anyway so that we don't call CONFLICTS-IN-SC on a -;;; bogus offset. +;;; For :UNBOUNDED SCs just find the smallest correctly aligned offset +;;; where the TN doesn't conflict with the TNs that have already been +;;; packed. For :FINITE SCs try to pack the TN into the most heavily +;;; used locations first (as estimated in FIND-LOCATION-USAGE). ;;; -;;; We give up on finding a location after our current pointer has -;;; wrapped twice. This will result in testing some locations twice in -;;; the case that we fail, but is simpler than trying to figure out -;;; the soonest failure point. -;;; -;;; We also give up without bothering to wrap if the current size -;;; isn't large enough to hold a single element of element-size -;;; without bothering to wrap. If it doesn't fit this iteration, it -;;; won't fit next. -;;; -;;; ### Note that we actually try to pack as many consecutive TNs as -;;; possible in the same location, since we start scanning at the same -;;; offset that the last TN was successfully packed in. This is a -;;; weakening of the scattering hueristic that was put in to prevent -;;; restricted VOP temps from hogging all of the registers. This way, -;;; all of these temps probably end up in one register. -(defun select-location (tn sc &optional use-reserved-locs) +;;; Historically SELECT-LOCATION tried did the opposite and tried to +;;; distribute the TNs evenly across the available locations. At least +;;; on register-starved architectures (x86) this seems to be a bad +;;; strategy. -- JES 2004-09-11 +(defun select-location (tn sc &key use-reserved-locs optimize) (declare (type tn tn) (type sc sc) (inline member)) (let* ((sb (sc-sb sc)) (element-size (sc-element-size sc)) (alignment (sc-alignment sc)) (align-mask (1- alignment)) - (size (finite-sb-current-size sb)) - (start-offset (finite-sb-last-offset sb))) - (let ((current-start - (logandc2 (the index (+ start-offset align-mask)) align-mask)) - (wrap-p nil)) - (declare (type index current-start)) - (loop - (when (> (+ current-start element-size) size) - (cond ((or wrap-p (> element-size size)) - (return nil)) - (t - (setq current-start 0) - (setq wrap-p t)))) - - (if (or (eq (sb-kind sb) :unbounded) - (and (member current-start (sc-locations sc)) - (or use-reserved-locs - (not (member current-start - (sc-reserve-locations sc)))))) - (dotimes (i element-size - (return-from select-location current-start)) - (declare (type index i)) - (let ((offset (+ current-start i))) - (when (offset-conflicts-in-sb tn sb offset) - (setq current-start - (logandc2 (the index (+ (the index (1+ offset)) - align-mask)) - align-mask)) - (return)))) - (incf current-start alignment)))))) + (size (finite-sb-current-size sb))) + (flet ((attempt-location (start-offset) + (dotimes (i element-size + (return-from select-location start-offset)) + (declare (type index i)) + (let ((offset (+ start-offset i))) + (when (offset-conflicts-in-sb tn sb offset) + (return (logandc2 (the index (+ (the index (1+ offset)) + align-mask)) + align-mask))))))) + (if (eq (sb-kind sb) :unbounded) + (loop with offset = 0 + until (> (+ offset element-size) size) do + (setf offset (attempt-location offset))) + (let ((locations (sc-locations sc))) + (when optimize + (setf locations + (stable-sort (copy-list locations) #'> + :key (lambda (location-offset) + (loop for offset from location-offset + repeat element-size + maximize (svref + (finite-sb-always-live-count sb) + offset)))))) + (dolist (offset locations) + (when (or use-reserved-locs + (not (member offset + (sc-reserve-locations sc)))) + (attempt-location offset)))))))) ;;; If a save TN, return the saved TN, otherwise return TN. This is ;;; useful for getting the conflicts of a TN that might be a save TN. @@ -1312,7 +1356,7 @@ ;;; If we are attempting to pack in the SC of the save TN for a TN ;;; with a :SPECIFIED-SAVE TN, then we pack in that location, instead ;;; of allocating a new stack location. -(defun pack-tn (tn restricted) +(defun pack-tn (tn restricted optimize) (declare (type tn tn)) (let* ((original (original-tn tn)) (fsc (tn-sc tn)) @@ -1322,13 +1366,12 @@ (when (and save (eq (tn-kind save) :specified-save)) (tn-sc save)))) - (do ((sc fsc (pop alternates))) ((null sc) (failed-to-pack-error tn restricted)) (when (eq sc specified-save-sc) (unless (tn-offset save) - (pack-tn save nil)) + (pack-tn save nil optimize)) (setf (tn-offset tn) (tn-offset save)) (setf (tn-sc tn) (tn-sc save)) (return)) @@ -1337,17 +1380,16 @@ (let ((loc (or (find-ok-target-offset original sc) (select-location original sc) (and restricted - (select-location original sc t)) + (select-location original sc :use-reserved-locs t)) (when (eq (sb-kind (sc-sb sc)) :unbounded) (grow-sc sc) (or (select-location original sc) (error "failed to pack after growing SC?")))))) (when loc - (add-location-conflicts original sc loc) + (add-location-conflicts original sc loc optimize) (setf (tn-sc tn) sc) (setf (tn-offset tn) loc) (return)))))) - (values)) ;;; Pack a wired TN, checking that the offset is in bounds for the SB, @@ -1360,7 +1402,7 @@ ;;; locations for local call arguments (such as OLD-FP) work, since ;;; the caller and callee OLD-FP save locations may conflict when the ;;; save locations don't really (due to being in different frames.) -(defun pack-wired-tn (tn) +(defun pack-wired-tn (tn optimize) (declare (type tn tn)) (let* ((sc (tn-sc tn)) (sb (sc-sb sc)) @@ -1410,7 +1452,7 @@ (conflicts-in-sc original sc offset)) (error "~S is wired to a location that it conflicts with." tn)) - (add-location-conflicts original sc offset))) + (add-location-conflicts original sc offset optimize))) (defevent repack-block "Repacked a block due to TN unpacking.") @@ -1444,6 +1486,12 @@ ;; construction of this SIMPLE-BIT-VECTOR ;; until runtime. #+sb-xc (make-array 0 :element-type 'bit))) + (setf (finite-sb-always-live-count sb) + (make-array size + :initial-element + #-sb-xc #* + ;; Ibid + #+sb-xc (make-array 0 :element-type 'fixnum))) (fill (finite-sb-conflicts sb) nil) (setf (finite-sb-conflicts sb) @@ -1455,11 +1503,26 @@ (defun pack (component) (unwind-protect - (let ((optimize (policy *lexenv* - (or (>= speed compilation-speed) - (>= space compilation-speed)))) + (let ((optimize nil) (2comp (component-info component))) (init-sb-vectors component) + + ;; Determine whether we want to do more expensive packing by + ;; checking whether any blocks in the component have (> SPEED + ;; COMPILE-SPEED). + ;; + ;; FIXME: This means that a declaration can have a minor + ;; effect even outside its scope, and as the packing is done + ;; component-globally it'd be tricky to use strict scoping. I + ;; think this is still acceptable since it's just a tradeoff + ;; between compilation speed and allocation quality and + ;; doesn't affect the semantics of the generated code in any + ;; way. -- JES 2004-10-06 + (do-ir2-blocks (block component) + (when (policy (block-last (ir2-block-block block)) + (> speed compilation-speed)) + (setf optimize t) + (return))) ;; Call the target functions. (do-ir2-blocks (block component) @@ -1469,49 +1532,66 @@ (when target-fun (funcall target-fun vop))))) - ;; Pack wired TNs first. (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn))) ((null tn)) - (pack-wired-tn tn)) + (pack-wired-tn tn optimize)) ;; Pack restricted component TNs. (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) ((null tn)) (when (eq (tn-kind tn) :component) - (pack-tn tn t))) + (pack-tn tn t optimize))) ;; Pack other restricted TNs. (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) ((null tn)) (unless (tn-offset tn) - (pack-tn tn t))) - - ;; Assign costs to normal TNs so we know which ones should always - ;; be packed on the stack. - (when (and optimize *pack-assign-costs*) - (assign-tn-costs component)) - - ;; Pack normal TNs in the order that they appear in the code. This - ;; should have some tendency to pack important TNs first, since - ;; control analysis favors the drop-through. This should also help - ;; targeting, since we will pack the target TN soon after we - ;; determine the location of the targeting TN. - (do-ir2-blocks (block component) - (let ((ltns (ir2-block-local-tns block))) - (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) - ((minusp i)) - (declare (fixnum i)) - (let ((tn (svref ltns i))) - (unless (or (null tn) (eq tn :more) (tn-offset tn)) - (pack-tn tn nil)))))) + (pack-tn tn t optimize))) + ;; Assign costs to normal TNs so we know which ones should + ;; always be packed on the stack. + (when *pack-assign-costs* + (assign-tn-costs component) + (assign-tn-depths component)) + + ;; Allocate normal TNs, starting with the TNs that are used + ;; in deep loops. + (collect ((tns)) + (do-ir2-blocks (block component) + (let ((ltns (ir2-block-local-tns block))) + (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) + ((minusp i)) + (declare (fixnum i)) + (let ((tn (svref ltns i))) + (unless (or (null tn) + (eq tn :more) + (tn-offset tn)) + ;; If loop analysis has been disabled we might as + ;; well revert to the old behaviour of just + ;; packing TNs linearly as they appear. + (unless *loop-analyze* + (pack-tn tn nil optimize)) + (tns tn)))))) + (dolist (tn (stable-sort (tns) + (lambda (a b) + (cond + ((> (tn-loop-depth a) + (tn-loop-depth b)) + t) + ((= (tn-loop-depth a) + (tn-loop-depth b)) + (> (tn-cost a) (tn-cost b))) + (t nil))))) + (unless (tn-offset tn) + (pack-tn tn nil optimize)))) + ;; Pack any leftover normal TNs. This is to deal with :MORE TNs, ;; which could possibly not appear in any local TN map. (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) ((null tn)) (unless (tn-offset tn) - (pack-tn tn nil))) + (pack-tn tn nil optimize))) ;; Do load TN packing and emit saves. (let ((*repack-blocks* nil)) @@ -1525,13 +1605,13 @@ (pack-load-tns block)))) (when *repack-blocks* (loop - (when (zerop (hash-table-count *repack-blocks*)) (return)) - (maphash (lambda (block v) - (declare (ignore v)) - (remhash block *repack-blocks*) - (event repack-block) - (pack-load-tns block)) - *repack-blocks*)))) + (when (zerop (hash-table-count *repack-blocks*)) (return)) + (maphash (lambda (block v) + (declare (ignore v)) + (remhash block *repack-blocks*) + (event repack-block) + (pack-load-tns block)) + *repack-blocks*)))) (values)) (clean-up-pack-structures))) diff --git a/src/compiler/vop.lisp b/src/compiler/vop.lisp index 4570b36..bf743c2 100644 --- a/src/compiler/vop.lisp +++ b/src/compiler/vop.lisp @@ -421,6 +421,47 @@ home save-sp dynamic-state) + +(defstruct (cloop (:print-function print-cloop) + (:conc-name loop-) + (:predicate loop-p) + (:constructor make-loop) + (:copier copy-loop)) + ;; The kind of loop that this is. These values are legal: + ;; + ;; :OUTER + ;; This is the outermost loop structure, and represents all the + ;; code in a component. + ;; + ;; :NATURAL + ;; A normal loop with only one entry. + ;; + ;; :STRANGE + ;; A segment of a "strange loop" in a non-reducible flow graph. + (kind (required-argument) :type (member :outer :natural :strange)) + ;; The first and last blocks in the loop. There may be more than one tail, + ;; since there may be multiple back branches to the same head. + (head nil :type (or cblock null)) + (tail nil :type list) + ;; A list of all the blocks in this loop or its inferiors that have a + ;; successor outside of the loop. + (exits nil :type list) + ;; The loop that this loop is nested within. This is null in the outermost + ;; loop structure. + (superior nil :type (or cloop null)) + ;; A list of the loops nested directly within this one. + (inferiors nil :type list) + (depth 0 :type fixnum) + ;; The head of the list of blocks directly within this loop. We must recurse + ;; on INFERIORS to find all the blocks. + (blocks nil :type (or null cblock))) + +(defprinter (cloop :conc-name LOOP-) + kind + head + tail + exits + depth) ;;;; VOPs and templates @@ -738,6 +779,7 @@ ;; is set, then the location is in use somewhere in the block, and ;; thus has a conflict for always-live TNs. (always-live '#() :type simple-vector) + (always-live-count '#() :type simple-vector) ;; a vector containing the TN currently live in each location in the ;; SB, or NIL if the location is unused. This is used during load-tn pack. (live-tns '#() :type simple-vector) @@ -933,7 +975,9 @@ (cost 0 :type fixnum) ;; If a :ENVIRONMENT or :DEBUG-ENVIRONMENT TN, this is the ;; physical environment that the TN is live throughout. - (physenv nil :type (or physenv null))) + (physenv nil :type (or physenv null)) + ;; The depth of the deepest loop that this TN is used in. + (loop-depth 0 :type fixnum)) (def!method print-object ((tn tn) stream) (print-unreadable-object (tn stream :type t) ;; KLUDGE: The distinction between PRINT-TN and PRINT-OBJECT on TN is diff --git a/version.lisp-expr b/version.lisp-expr index 1e230e8..3f4b407 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.15.12" +"0.8.15.13" -- 1.7.10.4