X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fpack.lisp;h=4fa1d162d5a39665a91e583a1e2a999876b5d7c8;hb=3d544b84f2b7ecd617d220145a775079df6c7919;hp=339903235a18ab4682209b68c9bf084ff786fc76;hpb=7f6e75c553b4465ced41c3640292834d803761eb;p=sbcl.git diff --git a/src/compiler/pack.lisp b/src/compiler/pack.lisp index 3399032..4fa1d16 100644 --- a/src/compiler/pack.lisp +++ b/src/compiler/pack.lisp @@ -11,7 +11,7 @@ ;;;; provided with absolutely no warranty. See the COPYING and CREDITS ;;;; files for more information. -(in-package "SB!C") +(in-package "SB!REGALLOC") ;;; for debugging: some parameters controlling which optimizations we ;;; attempt @@ -24,8 +24,8 @@ ;;;; conflict determination -;;; Return true if the element at the specified offset in SB has a -;;; conflict with TN: +;;; Return true if the element at the specified offset, or in any of +;;; the [size-1] subsequent offsets, in SB has a conflict with TN: ;;; -- If a component-live TN (:COMPONENT kind), then iterate over ;;; all the blocks. If the element at OFFSET is used anywhere in ;;; any of the component's blocks (always-live /= 0), then there @@ -39,44 +39,58 @@ ;;; that block. ;;; -- If TN is local, then we just check for a conflict in the block ;;; it is local to. -(defun offset-conflicts-in-sb (tn sb offset) - (declare (type tn tn) (type finite-sb sb) (type index offset)) +;;; +;;; If there is a conflict, returns the first such conflicting offset. +(defun offset-conflicts-in-sb (tn sb offset &key (size 1)) + (declare (type tn tn) (type finite-sb sb) (type index offset size)) (let ((confs (tn-global-conflicts tn)) - (kind (tn-kind tn))) - (cond - ((eq kind :component) - (let ((loc-live (svref (finite-sb-always-live sb) offset))) - (dotimes (i (ir2-block-count *component-being-compiled*) nil) - (when (/= (sbit loc-live i) 0) - (return t))))) - (confs - (let ((loc-confs (svref (finite-sb-conflicts sb) offset)) - (loc-live (svref (finite-sb-always-live sb) offset))) - (do ((conf confs (global-conflicts-next-tnwise conf))) - ((null conf) - nil) - (let* ((block (global-conflicts-block conf)) - (num (ir2-block-number block))) - (if (eq (global-conflicts-kind conf) :live) - (when (/= (sbit loc-live num) 0) - (return t)) - (when (/= (sbit (svref loc-confs num) - (global-conflicts-number conf)) - 0) - (return t))))))) - (t - (/= (sbit (svref (svref (finite-sb-conflicts sb) offset) - (ir2-block-number (tn-local tn))) - (tn-local-number tn)) - 0))))) + (kind (tn-kind tn)) + (sb-conflicts (finite-sb-conflicts sb)) + (sb-always-live (finite-sb-always-live sb))) + (macrolet ((do-offsets (&body body) + `(loop repeat size + for offset upfrom offset + thereis (progn ,@body)))) + (cond + ((eq kind :component) + (do-offsets + (let ((loc-live (svref sb-always-live offset))) + (dotimes (i (ir2-block-count *component-being-compiled*)) + (when (/= (sbit loc-live i) 0) + (return offset)))))) + (confs + ;; TN is global, iterate over the blocks TN is live in. + (do ((conf confs (global-conflicts-next-tnwise conf))) + ((null conf) + nil) + (let* ((block (global-conflicts-block conf)) + (num (ir2-block-number block))) + (if (eq (global-conflicts-kind conf) :live) + (do-offsets + (let ((loc-live (svref sb-always-live offset))) + (when (/= (sbit loc-live num) 0) + (return-from offset-conflicts-in-sb offset)))) + (do-offsets + (let ((loc-confs (svref sb-conflicts offset))) + (when (/= (sbit (svref loc-confs num) + (global-conflicts-number conf)) + 0) + (return-from offset-conflicts-in-sb offset)))))))) + (t + (do-offsets + (and (/= (sbit (svref (svref sb-conflicts offset) + (ir2-block-number (tn-local tn))) + (tn-local-number tn)) + 0) + offset))))))) ;;; Return true if TN has a conflict in SC at the specified offset. +(declaim (ftype (function (tn sc index) (values (or null index) &optional)) + conflicts-in-sc)) (defun conflicts-in-sc (tn sc offset) (declare (type tn tn) (type sc sc) (type index offset)) - (let ((sb (sc-sb sc))) - (dotimes (i (sc-element-size sc) nil) - (when (offset-conflicts-in-sb tn sb (+ offset i)) - (return t))))) + (offset-conflicts-in-sb tn (sc-sb sc) offset + :size (sc-element-size sc))) ;;; Add TN's conflicts into the conflicts for the location at OFFSET ;;; in SC. We iterate over each location in TN, adding to the @@ -215,24 +229,26 @@ (let* ((sb (sc-sb sc)) (size (finite-sb-current-size sb)) (align-mask (1- (sc-alignment sc))) - (inc (max (sb-size sb) + (inc (max (finite-sb-size-increment sb) (+ (sc-element-size sc) (- (logandc2 (+ size align-mask) align-mask) size)) (- needed-size size))) - (new-size (+ size inc)) + (new-size (let ((align-mask (1- (finite-sb-size-alignment sb)))) + (logandc2 (+ size inc align-mask) align-mask))) (conflicts (finite-sb-conflicts sb)) (block-size (if (zerop (length conflicts)) (ir2-block-count *component-being-compiled*) - (length (the simple-vector (svref conflicts 0)))))) - (declare (type index inc new-size)) + (length (the simple-vector (svref conflicts 0))))) + (padded-size (ash 1 (integer-length (1- new-size))))) + (declare (type index inc new-size padded-size)) (aver (eq (sb-kind sb) :unbounded)) - (when (> new-size (length conflicts)) - (let ((new-conf (make-array new-size))) + (when (> padded-size (length conflicts)) + (let ((new-conf (make-array padded-size))) (replace new-conf conflicts) (do ((i size (1+ i))) - ((= i new-size)) + ((= i padded-size)) (declare (type index i)) (let ((loc-confs (make-array block-size))) (dotimes (j block-size) @@ -243,23 +259,23 @@ (setf (svref new-conf i) loc-confs))) (setf (finite-sb-conflicts sb) new-conf)) - (let ((new-live (make-array new-size))) + (let ((new-live (make-array padded-size))) (replace new-live (finite-sb-always-live sb)) (do ((i size (1+ i))) - ((= i new-size)) + ((= i padded-size)) (setf (svref new-live i) (make-array block-size :initial-element 0 :element-type 'bit))) (setf (finite-sb-always-live sb) new-live)) - (let ((new-live-count (make-array new-size))) + (let ((new-live-count (make-array padded-size))) (declare (optimize speed)) ;; FILL deftransform (replace new-live-count (finite-sb-always-live-count sb)) (fill new-live-count 0 :start size) (setf (finite-sb-always-live-count sb) new-live-count)) - (let ((new-tns (make-array new-size :initial-element nil))) + (let ((new-tns (make-array padded-size :initial-element nil))) (replace new-tns (finite-sb-live-tns sb)) (fill (finite-sb-live-tns sb) nil) (setf (finite-sb-live-tns sb) new-tns))) @@ -598,7 +614,7 @@ (values)) ;;; Load the TN from its save location, allocating one if necessary. -;;; The load is inserted BEFORE the specifier VOP. CONTEXT is a VOP +;;; The load is inserted BEFORE the specified VOP. CONTEXT is a VOP ;;; used to tell which node/block to use for the new VOP. (defun restore-tn (tn before context) (declare (type tn tn) (type (or vop null) before) (type vop context)) @@ -741,65 +757,85 @@ (return t))) (setq block (optimized-emit-saves-block block saves restores))) (setq block (ir2-block-prev block))))) - + ;;; Iterate over the normal TNs, finding the cost of packing on the -;;; stack in units of the number of references. We count all -;;; references as +1, and subtract out REGISTER-SAVE-PENALTY for each -;;; place where we would have to save a register. +;;; stack in units of the number of references. We count all read +;;; references as +1, write references as + *tn-write-cost*, and +;;; subtract out REGISTER-SAVE-PENALTY for each place where we would +;;; have to save a register. +;;; The subtraction reflects the fact that having a value in a +;;; register around a call means that code to spill and unspill must +;;; be inserted. +;;; +;;; The costs also take into account the loop depth at which each +;;; reference occurs: the penalty or cost is incremented by the depth +;;; scaled by *tn-loop-depth-multiplier*. The default (NIL) is to let +;;; this be one more than the max of the cost for reads (1), for write +;;; references and for being live across a call. +(defvar *tn-write-cost* 2) +(defvar *tn-loop-depth-multiplier* nil) + (defun assign-tn-costs (component) - (do-ir2-blocks (block component) - (do ((vop (ir2-block-start-vop block) (vop-next vop))) - ((null vop)) - (when (eq (vop-info-save-p (vop-info vop)) t) - (do-live-tns (tn (vop-save-set vop) block) - (decf (tn-cost tn) *backend-register-save-penalty*))))) - - (do ((tn (ir2-component-normal-tns (component-info component)) - (tn-next tn))) - ((null tn)) - (let ((cost (tn-cost tn))) - (declare (fixnum cost)) - (do ((ref (tn-reads tn) (tn-ref-next ref))) - ((null ref)) - (incf cost)) - (do ((ref (tn-writes tn) (tn-ref-next ref))) - ((null ref)) - (incf cost)) - (setf (tn-cost tn) cost)))) - -;;; Iterate over the normal TNs, storing the depth of the deepest loop -;;; that the TN is used in TN-LOOP-DEPTH. -(defun assign-tn-depths (component) + (let* ((save-penalty *backend-register-save-penalty*) + (write-cost *tn-write-cost*) + (depth-scale (or *tn-loop-depth-multiplier* + (1+ (max 1 write-cost save-penalty))))) + (flet ((vop-depth-cost (vop) + (let ((loop (block-loop + (ir2-block-block + (vop-block vop))))) + (if loop + (* depth-scale (loop-depth loop)) + 0)))) + (do-ir2-blocks (block component) + (do ((vop (ir2-block-start-vop block) (vop-next vop))) + ((null vop)) + (when (eq (vop-info-save-p (vop-info vop)) t) + (let ((penalty (+ save-penalty (vop-depth-cost vop)))) + (do-live-tns (tn (vop-save-set vop) block) + (decf (tn-cost tn) penalty)))))) + + (do ((tn (ir2-component-normal-tns (component-info component)) + (tn-next tn))) + ((null tn)) + (let ((cost (tn-cost tn))) + (declare (fixnum cost)) + (do ((ref (tn-reads tn) (tn-ref-next ref))) + ((null ref)) + (incf cost (1+ (vop-depth-cost (tn-ref-vop ref))))) + (do ((ref (tn-writes tn) (tn-ref-next ref))) + ((null ref)) + (incf cost (+ write-cost (vop-depth-cost (tn-ref-vop ref))))) + (setf (tn-cost tn) cost)))))) + +;;; Iterate over the normal TNs, folding over the depth of the looops +;;; that the TN is used in and storing the result in TN-LOOP-DEPTH. +;;: reducer is the function used to join depth values together. #'max +;;; gives the maximum depth, #'+ the sum. +(defun assign-tn-depths (component &key (reducer #'max)) + (declare (type function reducer)) (when *loop-analyze* - (do-ir2-blocks (block component) - (do ((vop (ir2-block-start-vop block) - (vop-next vop))) - ((null vop)) - (flet ((find-all-tns (head-fun) - (collect ((tns)) - (do ((ref (funcall head-fun vop) (tn-ref-across ref))) - ((null ref)) - (tns (tn-ref-tn ref))) - (tns)))) - (dolist (tn (nconc (find-all-tns #'vop-args) - (find-all-tns #'vop-results) - (find-all-tns #'vop-temps) - ;; What does "references in this VOP - ;; mean"? Probably something that isn't - ;; useful in this context, since these - ;; TN-REFs are linked with TN-REF-NEXT - ;; instead of TN-REF-ACROSS. --JES - ;; 2004-09-11 - ;; (find-all-tns #'vop-refs) - )) - (setf (tn-loop-depth tn) - (max (tn-loop-depth tn) - (let* ((ir1-block (ir2-block-block (vop-block vop))) - (loop (block-loop ir1-block))) - (if loop - (loop-depth loop) - 0)))))))))) - + ;; We only use tn depth for normal TNs + (do ((tn (ir2-component-normal-tns (component-info component)) + (tn-next tn))) + ((null tn)) + (let ((depth 0)) + (declare (type fixnum depth)) + (flet ((frob (ref) + (declare (type (or null tn-ref) ref)) + (do ((ref ref (tn-ref-next ref))) + ((null ref)) + (let* ((vop (tn-ref-vop ref)) + (block (ir2-block-block (vop-block vop))) + (loop (block-loop block))) + (setf depth (funcall reducer + depth + (if loop + (loop-depth loop) + 0))))))) + (frob (tn-reads tn)) + (frob (tn-writes tn)) + (setf (tn-loop-depth tn) depth)))))) ;;;; load TN packing @@ -826,11 +862,21 @@ (let* ((sc (tn-sc tn)) (sb (sc-sb sc))) (when (eq (sb-kind sb) :finite) - (do ((offset (tn-offset tn) (1+ offset)) - (end (+ (tn-offset tn) (sc-element-size sc)))) - ((= offset end)) - (declare (type index offset end)) - (setf (svref (finite-sb-live-tns sb) offset) tn))))) + ;; KLUDGE: we can have "live" TNs that are neither read + ;; to nor written from, due to more aggressive (type- + ;; directed) constant propagation. Such TNs will never + ;; be assigned an offset nor be in conflict with anything. + ;; + ;; Ideally, it seems to me we could make sure these TNs + ;; are never allocated in the first place in + ;; ASSIGN-LAMBDA-VAR-TNS. + (if (tn-offset tn) + (do ((offset (tn-offset tn) (1+ offset)) + (end (+ (tn-offset tn) (sc-element-size sc)))) + ((= offset end)) + (declare (type index offset end)) + (setf (svref (finite-sb-live-tns sb) offset) tn)) + (aver (and (null (tn-reads tn)) (null (tn-writes tn)))))))) (setq *live-block* block) (setq *live-vop* (ir2-block-last-vop block)) @@ -908,8 +954,8 @@ ;;; aren't any TN-REFs to represent the implicit reading of results or ;;; writing of arguments. ;;; -;;; The second bullet corresponds conflicts with temporaries or between -;;; arguments and results. +;;; The second bullet corresponds to conflicts with temporaries or +;;; between arguments and results. ;;; ;;; We consider both the TN-REF-TN and the TN-REF-LOAD-TN (if any) to ;;; be referenced simultaneously and in the same way. This causes @@ -1046,9 +1092,9 @@ ;;; This is called by PACK-LOAD-TN where there isn't any location free ;;; that we can pack into. What we do is move some live TN in one of -;;; the specified SCs to memory, then mark this block all blocks that -;;; reference the TN as needing repacking. If we succeed, we throw to -;;; UNPACKED-TN. If we fail, we return NIL. +;;; the specified SCs to memory, then mark all blocks that reference +;;; the TN as needing repacking. If we succeed, we throw to UNPACKED-TN. +;;; If we fail, we return NIL. ;;; ;;; We can unpack any live TN that appears in the NORMAL-TNs list ;;; (isn't wired or restricted.) We prefer to unpack TNs that are not @@ -1106,7 +1152,7 @@ ;;; if that location is in a SC not allowed by the primitive type. ;;; (The SC must still be allowed by the operand restriction.) This ;;; makes move VOPs more efficient, since we won't do a move from the -;;; stack into a non-descriptor any-reg though a descriptor argument +;;; stack into a non-descriptor any-reg through a descriptor argument ;;; load-TN. This does give targeting some real semantics, making it ;;; not a pure advisory to pack. It allows pack to do some packing it ;;; wouldn't have done before. @@ -1236,24 +1282,27 @@ nil))) ;;; Scan along the target path from TN, looking at readers or writers. -;;; When we find a packed TN, return CHECK-OK-TARGET of that TN. If -;;; there is no target, or if the TN has multiple readers (writers), -;;; then we return NIL. We also always return NIL after 10 iterations -;;; to get around potential circularity problems. +;;; When we find a TN, call CALLEE with that TN, and then resume +;;; walking down that TN's target. As soon as there is no target, or +;;; if the TN has multiple readers (writers), we stop walking the +;;; targetting chain. We also always stop after 10 iterations to get +;;; around potential circularity problems. ;;; -;;; FIXME: (30 minutes of reverse engineering?) It'd be nice to -;;; rewrite the header comment here to explain the interface and its -;;; motivation, and move remarks about implementation details (like -;;; 10!) inside. -(defun find-ok-target-offset (tn sc) - (declare (type tn tn) (type sc sc)) - (flet ((frob-slot (slot-fun) - (declare (type function slot-fun)) - (let ((count 10) +;;; Why the single-reader/writer constraint? As far as I can tell, +;;; this is concerned with straight pipeline of data, e.g. CASTs. In +;;; that case, limiting to chains of length 10 seems to be more than +;;; enough. +(declaim (inline %call-with-target-tns)) +(defun %call-with-target-tns (tn callee + &key (limit 10) (reads t) (writes t)) + (declare (type tn tn) (type function callee) (type index limit)) + (flet ((frob-slot (slot-function) + (declare (type function slot-function)) + (let ((count limit) (current tn)) (declare (type index count)) (loop - (let ((refs (funcall slot-fun current))) + (let ((refs (funcall slot-function current))) (unless (and (plusp count) refs (not (tn-ref-next refs))) @@ -1261,12 +1310,30 @@ (let ((target (tn-ref-target refs))) (unless target (return nil)) (setq current (tn-ref-tn target)) - (when (tn-offset current) - (return (check-ok-target current tn sc))) + (funcall callee current) (decf count))))))) - (declare (inline frob-slot)) ; until DYNAMIC-EXTENT works - (or (frob-slot #'tn-reads) - (frob-slot #'tn-writes)))) + (when reads + (frob-slot #'tn-reads)) + (when writes + (frob-slot #'tn-writes)) + nil)) + +(defmacro do-target-tns ((target-variable source-tn + &rest keys &key limit reads writes) + &body body) + (declare (ignore limit reads writes)) + (let ((callback (gensym "CALLBACK"))) + `(flet ((,callback (,target-variable) + ,@body)) + (declare (dynamic-extent #',callback)) + (%call-with-target-tns ,source-tn #',callback ,@keys)))) + +(defun find-ok-target-offset (tn sc) + (declare (type tn tn) (type sc sc)) + (do-target-tns (target tn) + (awhen (and (tn-offset target) + (check-ok-target target tn sc)) + (return-from find-ok-target-offset it)))) ;;;; location selection @@ -1290,14 +1357,11 @@ (align-mask (1- alignment)) (size (finite-sb-current-size sb))) (flet ((attempt-location (start-offset) - (dotimes (i element-size - (return-from select-location start-offset)) - (declare (type index i)) - (let ((offset (+ start-offset i))) - (when (offset-conflicts-in-sb tn sb offset) - (return (logandc2 (the index (+ (the index (1+ offset)) - align-mask)) - align-mask))))))) + (let ((conflict (conflicts-in-sc tn sc start-offset))) + (if conflict + (logandc2 (+ conflict align-mask 1) + align-mask) + (return-from select-location start-offset))))) (if (eq (sb-kind sb) :unbounded) (loop with offset = 0 until (> (+ offset element-size) size) do @@ -1305,13 +1369,14 @@ (let ((locations (sc-locations sc))) (when optimize (setf locations - (stable-sort (copy-list locations) #'> - :key (lambda (location-offset) - (loop for offset from location-offset - repeat element-size - maximize (svref - (finite-sb-always-live-count sb) - offset)))))) + (schwartzian-stable-sort-list + locations '> + :key (lambda (location-offset) + (loop for offset from location-offset + repeat element-size + maximize (svref + (finite-sb-always-live-count sb) + offset)))))) (dolist (offset locations) (when (or use-reserved-locs (not (member offset @@ -1328,6 +1393,15 @@ ;;;; pack interface +;; Misc. utilities +(declaim (inline unbounded-sc-p)) +(defun unbounded-sc-p (sc) + (eq (sb-kind (sc-sb sc)) :unbounded)) + +(defun unbounded-tn-p (tn) + (unbounded-sc-p (tn-sc tn))) +(declaim (notinline unbounded-sc-p)) + ;;; Attempt to pack TN in all possible SCs, first in the SC chosen by ;;; representation selection, then in the alternate SCs in the order ;;; they were specified in the SC definition. If the TN-COST is @@ -1338,8 +1412,9 @@ ;;; If we are attempting to pack in the SC of the save TN for a TN ;;; with a :SPECIFIED-SAVE TN, then we pack in that location, instead ;;; of allocating a new stack location. -(defun pack-tn (tn restricted optimize) +(defun pack-tn (tn restricted optimize &key (allow-unbounded-sc t)) (declare (type tn tn)) + (aver (not (tn-offset tn))) (let* ((original (original-tn tn)) (fsc (tn-sc tn)) (alternates (unless restricted (sc-alternate-scs fsc))) @@ -1351,19 +1426,23 @@ (do ((sc fsc (pop alternates))) ((null sc) (failed-to-pack-error tn restricted)) + (unless (or allow-unbounded-sc + (not (unbounded-sc-p sc))) + (return nil)) (when (eq sc specified-save-sc) (unless (tn-offset save) (pack-tn save nil optimize)) (setf (tn-offset tn) (tn-offset save)) (setf (tn-sc tn) (tn-sc save)) - (return)) + (return t)) (when (or restricted (not (and (minusp (tn-cost tn)) (sc-save-p sc)))) (let ((loc (or (find-ok-target-offset original sc) - (select-location original sc) + (select-location original sc :optimize optimize) (and restricted - (select-location original sc :use-reserved-locs t)) - (when (eq (sb-kind (sc-sb sc)) :unbounded) + (select-location original sc :use-reserved-locs t + :optimize optimize)) + (when (unbounded-sc-p sc) (grow-sc sc) (or (select-location original sc) (error "failed to pack after growing SC?")))))) @@ -1371,7 +1450,7 @@ (add-location-conflicts original sc loc optimize) (setf (tn-sc tn) sc) (setf (tn-offset tn) loc) - (return)))))) + (return t)))))) (values)) ;;; Pack a wired TN, checking that the offset is in bounds for the SB, @@ -1432,7 +1511,8 @@ (or (= offset 0) (= offset 1)))) (conflicts-in-sc original sc offset)) - (error "~S is wired to a location that it conflicts with." tn)) + (error "~S is wired to location ~D in SC ~A of kind ~S that it conflicts with." + tn offset sc (tn-kind tn))) (add-location-conflicts original sc offset optimize))) @@ -1483,6 +1563,31 @@ (setf (finite-sb-live-tns sb) (make-array size :initial-element nil)))))) +(defun tn-lexical-depth (tn) + (let ((path t)) ; dummy initial value + (labels ((path (lambda) + (do ((acc '()) + (lambda lambda (lambda-parent lambda))) + ((null lambda) acc) + (push lambda acc))) + (register-scope (lambda) + (let ((new-path (path lambda))) + (setf path (if (eql path t) + new-path + (subseq path + 0 (mismatch path new-path)))))) + (walk-tn-refs (ref) + (do ((ref ref (tn-ref-next ref))) + ((or (null ref) + (null path))) + (awhen (vop-node (tn-ref-vop ref)) + (register-scope (lexenv-lambda (node-lexenv it))))))) + (walk-tn-refs (tn-reads tn)) + (walk-tn-refs (tn-writes tn)) + (if (eql path t) + most-positive-fixnum + (length path))))) + (defun pack (component) (unwind-protect (let ((optimize nil) @@ -1514,66 +1619,57 @@ (when target-fun (funcall target-fun vop))))) - ;; Pack wired TNs first. - (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn))) - ((null tn)) - (pack-wired-tn tn optimize)) - - ;; Pack restricted component TNs. - (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) - ((null tn)) - (when (eq (tn-kind tn) :component) - (pack-tn tn t optimize))) - - ;; Pack other restricted TNs. - (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) - ((null tn)) - (unless (tn-offset tn) - (pack-tn tn t optimize))) - - ;; Assign costs to normal TNs so we know which ones should - ;; always be packed on the stack. + ;; Assign costs to normal TNs so we know which ones should always + ;; be packed on the stack, and which are important not to spill. (when *pack-assign-costs* - (assign-tn-costs component) - (assign-tn-depths component)) - - ;; Allocate normal TNs, starting with the TNs that are used - ;; in deep loops. - (collect ((tns)) - (do-ir2-blocks (block component) - (let ((ltns (ir2-block-local-tns block))) - (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) - ((minusp i)) - (declare (fixnum i)) - (let ((tn (svref ltns i))) - (unless (or (null tn) - (eq tn :more) - (tn-offset tn)) - ;; If loop analysis has been disabled we might as - ;; well revert to the old behaviour of just - ;; packing TNs linearly as they appear. - (unless *loop-analyze* - (pack-tn tn nil optimize)) - (tns tn)))))) - (dolist (tn (stable-sort (tns) - (lambda (a b) - (cond - ((> (tn-loop-depth a) - (tn-loop-depth b)) - t) - ((= (tn-loop-depth a) - (tn-loop-depth b)) - (> (tn-cost a) (tn-cost b))) - (t nil))))) - (unless (tn-offset tn) - (pack-tn tn nil optimize)))) - - ;; Pack any leftover normal TNs. This is to deal with :MORE TNs, - ;; which could possibly not appear in any local TN map. - (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) - ((null tn)) - (unless (tn-offset tn) - (pack-tn tn nil optimize))) + (assign-tn-costs component)) + + ;; Actually allocate registers for most TNs. After this, only + ;; :normal tns may be left unallocated (or TNs :restricted to + ;; an unbounded SC). + (pack-greedy component 2comp optimize) + + ;; Pack any leftover normal/restricted TN that is not already + ;; allocated to a finite SC, or TNs that do not appear in any + ;; local TN map (e.g. :MORE TNs). Since we'll likely be + ;; allocating on the stack, first allocate TNs that are + ;; associated with code at shallow lexical depths: this will + ;; allocate long live ranges (i.e. TNs with more conflicts) + ;; first, and hopefully minimise stack fragmentation. + ;; Component TNs are a degenerate case: they are always live. + (let ((component-tns '()) + (contiguous-tns '()) + (tns '())) + (flet ((register-tn (tn) + (unless (tn-offset tn) + (case (tn-kind tn) + (:component + (push tn component-tns)) + ((:environment :debug-environment) + (push tn contiguous-tns)) + (t + (push tn tns)))))) + (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) + ((null tn)) + ;; by this time, restricted TNs must either be + ;; allocated in the right SC or unbounded + (aver (or (tn-offset tn) (unbounded-tn-p tn))) + (register-tn tn)) + (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) + ((null tn)) + (register-tn tn))) + (flet ((pack-tns (tns &optional in-order) + (dolist (tn (if in-order + tns + (schwartzian-stable-sort-list + tns #'< :key #'tn-lexical-depth))) + (unless (tn-offset tn) + (pack-tn tn nil optimize))))) + ;; first pack TNs that are known to have simple live + ;; ranges (contiguous lexical scopes) + (pack-tns component-tns t) + (pack-tns contiguous-tns) + (pack-tns tns))) ;; Do load TN packing and emit saves. (let ((*repack-blocks* nil)) @@ -1595,3 +1691,63 @@ (values)) (clean-up-pack-structures))) + +(defun pack-greedy (component 2comp optimize) + (declare (type component component) + (type ir2-component 2comp)) + ;; Pack wired TNs first. + (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn))) + ((null tn)) + (pack-wired-tn tn optimize)) + + ;; Then, pack restricted TNs, ones that are live over the whole + ;; component first (they cause no fragmentation). Sort by TN cost + ;; to help important TNs get good targeting. + (collect ((component) + (normal)) + (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) + ((null tn)) + (unless (or (tn-offset tn) (unbounded-tn-p tn)) + (if (eq :component (tn-kind tn)) + (component tn) + (normal tn)))) + (flet ((pack-tns (tns) + (dolist (tn (stable-sort tns #'> :key #'tn-cost)) + (pack-tn tn t optimize)))) + (pack-tns (component)) + (pack-tns (normal)))) + + (cond ((and *loop-analyze* *pack-assign-costs*) + ;; Allocate normal TNs, starting with the TNs that are + ;; heavily used in deep loops (which is taken into account in + ;; TN spill costs). Only allocate in finite SCs (i.e. not on + ;; the stack). + (collect ((tns)) + (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) + ((null tn)) + (unless (or (tn-offset tn) + (eq (tn-kind tn) :more) + (unbounded-tn-p tn) + (and (sc-save-p (tn-sc tn)) ; SC caller-save, but TN + (minusp (tn-cost tn)))) ; lives over many calls + (tns tn))) + (dolist (tn (stable-sort (tns) #'> :key #'tn-cost)) + (unless (tn-offset tn) + ;; if it can't fit in a bounded SC, the final pass will + ;; take care of stack packing. + (pack-tn tn nil optimize :allow-unbounded-sc nil))))) + (t + ;; If loop analysis has been disabled we might as well revert + ;; to the old behaviour of just packing TNs linearly as they + ;; appear. + (do-ir2-blocks (block component) + (let ((ltns (ir2-block-local-tns block))) + (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) + ((minusp i)) + (declare (fixnum i)) + (let ((tn (svref ltns i))) + (unless (or (null tn) + (eq tn :more) + (tn-offset tn) + (unbounded-tn-p tn)) + (pack-tn tn nil optimize :allow-unbounded-sc nil)))))))))