X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fpack.lisp;h=4fa1d162d5a39665a91e583a1e2a999876b5d7c8;hb=3d544b84f2b7ecd617d220145a775079df6c7919;hp=451f83e620ec3438049511c94827788cd896e107;hpb=cf96bb4253ddc05188f4a45aead5f0497339ad17;p=sbcl.git diff --git a/src/compiler/pack.lisp b/src/compiler/pack.lisp index 451f83e..4fa1d16 100644 --- a/src/compiler/pack.lisp +++ b/src/compiler/pack.lisp @@ -11,7 +11,7 @@ ;;;; provided with absolutely no warranty. See the COPYING and CREDITS ;;;; files for more information. -(in-package "SB!C") +(in-package "SB!REGALLOC") ;;; for debugging: some parameters controlling which optimizations we ;;; attempt @@ -757,7 +757,7 @@ (return t))) (setq block (optimized-emit-saves-block block saves restores))) (setq block (ir2-block-prev block))))) - + ;;; Iterate over the normal TNs, finding the cost of packing on the ;;; stack in units of the number of references. We count all read ;;; references as +1, write references as + *tn-write-cost*, and @@ -766,29 +766,47 @@ ;;; The subtraction reflects the fact that having a value in a ;;; register around a call means that code to spill and unspill must ;;; be inserted. +;;; +;;; The costs also take into account the loop depth at which each +;;; reference occurs: the penalty or cost is incremented by the depth +;;; scaled by *tn-loop-depth-multiplier*. The default (NIL) is to let +;;; this be one more than the max of the cost for reads (1), for write +;;; references and for being live across a call. (defvar *tn-write-cost* 2) -(defun assign-tn-costs (component) - (let ((save-penalty *backend-register-save-penalty*)) - (do-ir2-blocks (block component) - (do ((vop (ir2-block-start-vop block) (vop-next vop))) - ((null vop)) - (when (eq (vop-info-save-p (vop-info vop)) t) - (do-live-tns (tn (vop-save-set vop) block) - (decf (tn-cost tn) save-penalty)))))) +(defvar *tn-loop-depth-multiplier* nil) - (let ((write-cost *tn-write-cost*)) - (do ((tn (ir2-component-normal-tns (component-info component)) - (tn-next tn))) - ((null tn)) - (let ((cost (tn-cost tn))) - (declare (fixnum cost)) - (do ((ref (tn-reads tn) (tn-ref-next ref))) - ((null ref)) - (incf cost)) - (do ((ref (tn-writes tn) (tn-ref-next ref))) - ((null ref)) - (incf cost write-cost)) - (setf (tn-cost tn) cost))))) +(defun assign-tn-costs (component) + (let* ((save-penalty *backend-register-save-penalty*) + (write-cost *tn-write-cost*) + (depth-scale (or *tn-loop-depth-multiplier* + (1+ (max 1 write-cost save-penalty))))) + (flet ((vop-depth-cost (vop) + (let ((loop (block-loop + (ir2-block-block + (vop-block vop))))) + (if loop + (* depth-scale (loop-depth loop)) + 0)))) + (do-ir2-blocks (block component) + (do ((vop (ir2-block-start-vop block) (vop-next vop))) + ((null vop)) + (when (eq (vop-info-save-p (vop-info vop)) t) + (let ((penalty (+ save-penalty (vop-depth-cost vop)))) + (do-live-tns (tn (vop-save-set vop) block) + (decf (tn-cost tn) penalty)))))) + + (do ((tn (ir2-component-normal-tns (component-info component)) + (tn-next tn))) + ((null tn)) + (let ((cost (tn-cost tn))) + (declare (fixnum cost)) + (do ((ref (tn-reads tn) (tn-ref-next ref))) + ((null ref)) + (incf cost (1+ (vop-depth-cost (tn-ref-vop ref))))) + (do ((ref (tn-writes tn) (tn-ref-next ref))) + ((null ref)) + (incf cost (+ write-cost (vop-depth-cost (tn-ref-vop ref))))) + (setf (tn-cost tn) cost)))))) ;;; Iterate over the normal TNs, folding over the depth of the looops ;;; that the TN is used in and storing the result in TN-LOOP-DEPTH. @@ -1375,6 +1393,15 @@ ;;;; pack interface +;; Misc. utilities +(declaim (inline unbounded-sc-p)) +(defun unbounded-sc-p (sc) + (eq (sb-kind (sc-sb sc)) :unbounded)) + +(defun unbounded-tn-p (tn) + (unbounded-sc-p (tn-sc tn))) +(declaim (notinline unbounded-sc-p)) + ;;; Attempt to pack TN in all possible SCs, first in the SC chosen by ;;; representation selection, then in the alternate SCs in the order ;;; they were specified in the SC definition. If the TN-COST is @@ -1387,6 +1414,7 @@ ;;; of allocating a new stack location. (defun pack-tn (tn restricted optimize &key (allow-unbounded-sc t)) (declare (type tn tn)) + (aver (not (tn-offset tn))) (let* ((original (original-tn tn)) (fsc (tn-sc tn)) (alternates (unless restricted (sc-alternate-scs fsc))) @@ -1399,7 +1427,7 @@ ((null sc) (failed-to-pack-error tn restricted)) (unless (or allow-unbounded-sc - (neq (sb-kind (sc-sb sc)) :unbounded)) + (not (unbounded-sc-p sc))) (return nil)) (when (eq sc specified-save-sc) (unless (tn-offset save) @@ -1410,10 +1438,11 @@ (when (or restricted (not (and (minusp (tn-cost tn)) (sc-save-p sc)))) (let ((loc (or (find-ok-target-offset original sc) - (select-location original sc) + (select-location original sc :optimize optimize) (and restricted - (select-location original sc :use-reserved-locs t)) - (when (eq (sb-kind (sc-sb sc)) :unbounded) + (select-location original sc :use-reserved-locs t + :optimize optimize)) + (when (unbounded-sc-p sc) (grow-sc sc) (or (select-location original sc) (error "failed to pack after growing SC?")))))) @@ -1537,9 +1566,10 @@ (defun tn-lexical-depth (tn) (let ((path t)) ; dummy initial value (labels ((path (lambda) - (nreverse (loop while lambda - collect lambda - do (setf lambda (lambda-parent lambda))))) + (do ((acc '()) + (lambda lambda (lambda-parent lambda))) + ((null lambda) acc) + (push lambda acc))) (register-scope (lambda) (let ((new-path (path lambda))) (setf path (if (eql path t) @@ -1548,11 +1578,10 @@ 0 (mismatch path new-path)))))) (walk-tn-refs (ref) (do ((ref ref (tn-ref-next ref))) - ((null ref)) - (binding* ((node (vop-node (tn-ref-vop ref)) - :exit-if-null)) - (register-scope (lexenv-lambda - (node-lexenv node))))))) + ((or (null ref) + (null path))) + (awhen (vop-node (tn-ref-vop ref)) + (register-scope (lexenv-lambda (node-lexenv it))))))) (walk-tn-refs (tn-reads tn)) (walk-tn-refs (tn-writes tn)) (if (eql path t) @@ -1590,87 +1619,55 @@ (when target-fun (funcall target-fun vop))))) - ;; Pack wired TNs first. - (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn))) - ((null tn)) - (pack-wired-tn tn optimize)) - - ;; Pack restricted component TNs. - (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) - ((null tn)) - (when (eq (tn-kind tn) :component) - (pack-tn tn t optimize))) - - ;; Pack other restricted TNs. - (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) - ((null tn)) - (unless (tn-offset tn) - (pack-tn tn t optimize))) - - ;; Assign costs to normal TNs so we know which ones should - ;; always be packed on the stack. + ;; Assign costs to normal TNs so we know which ones should always + ;; be packed on the stack, and which are important not to spill. (when *pack-assign-costs* - (assign-tn-costs component) - (assign-tn-depths component)) - - ;; Allocate normal TNs, starting with the TNs that are used - ;; in deep loops. Only allocate in finite SCs (i.e. not on - ;; the stack). - (collect ((tns)) - (do-ir2-blocks (block component) - (let ((ltns (ir2-block-local-tns block))) - (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) - ((minusp i)) - (declare (fixnum i)) - (let ((tn (svref ltns i))) - (unless (or (null tn) - (eq tn :more) - (tn-offset tn)) - ;; If loop analysis has been disabled we might as - ;; well revert to the old behaviour of just - ;; packing TNs linearly as they appear. - (unless *loop-analyze* - (pack-tn tn nil optimize :allow-unbounded-sc nil)) - (tns tn)))))) - (dolist (tn (stable-sort (tns) - (lambda (a b) - (cond - ((> (tn-loop-depth a) - (tn-loop-depth b)) - t) - ((= (tn-loop-depth a) - (tn-loop-depth b)) - (> (tn-cost a) (tn-cost b))) - (t nil))))) - (unless (tn-offset tn) - (pack-tn tn nil optimize :allow-unbounded-sc nil)))) - - ;; Pack any leftover normal TNs that could not be allocated - ;; to finite SCs, or TNs that do not appear in any local TN - ;; map (e.g. :MORE TNs). Since we'll likely be allocating - ;; on the stack, first allocate TNs that are associated with - ;; code at shallow lexical depths: this will allocate long - ;; live ranges (i.e. TNs with more conflicts) first, and - ;; hopefully minimise stack fragmentation. - ;; - ;; Collect in reverse order to give priority to older TNs. - (let ((contiguous-tns '()) + (assign-tn-costs component)) + + ;; Actually allocate registers for most TNs. After this, only + ;; :normal tns may be left unallocated (or TNs :restricted to + ;; an unbounded SC). + (pack-greedy component 2comp optimize) + + ;; Pack any leftover normal/restricted TN that is not already + ;; allocated to a finite SC, or TNs that do not appear in any + ;; local TN map (e.g. :MORE TNs). Since we'll likely be + ;; allocating on the stack, first allocate TNs that are + ;; associated with code at shallow lexical depths: this will + ;; allocate long live ranges (i.e. TNs with more conflicts) + ;; first, and hopefully minimise stack fragmentation. + ;; Component TNs are a degenerate case: they are always live. + (let ((component-tns '()) + (contiguous-tns '()) (tns '())) - (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) - ((null tn)) - (unless (tn-offset tn) - (let ((key (cons tn (tn-lexical-depth tn)))) - (if (memq (tn-kind tn) '(:environment :debug-environment - :component)) - (push key contiguous-tns) - (push key tns))))) - (flet ((pack-tns (tns) - (dolist (tn (stable-sort tns #'< :key #'cdr)) - (let ((tn (car tn))) - (unless (tn-offset tn) - (pack-tn tn nil optimize)))))) - ;; first pack TNs that are known to have simple - ;; live ranges (contiguous lexical scopes) + (flet ((register-tn (tn) + (unless (tn-offset tn) + (case (tn-kind tn) + (:component + (push tn component-tns)) + ((:environment :debug-environment) + (push tn contiguous-tns)) + (t + (push tn tns)))))) + (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) + ((null tn)) + ;; by this time, restricted TNs must either be + ;; allocated in the right SC or unbounded + (aver (or (tn-offset tn) (unbounded-tn-p tn))) + (register-tn tn)) + (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) + ((null tn)) + (register-tn tn))) + (flet ((pack-tns (tns &optional in-order) + (dolist (tn (if in-order + tns + (schwartzian-stable-sort-list + tns #'< :key #'tn-lexical-depth))) + (unless (tn-offset tn) + (pack-tn tn nil optimize))))) + ;; first pack TNs that are known to have simple live + ;; ranges (contiguous lexical scopes) + (pack-tns component-tns t) (pack-tns contiguous-tns) (pack-tns tns))) @@ -1694,3 +1691,63 @@ (values)) (clean-up-pack-structures))) + +(defun pack-greedy (component 2comp optimize) + (declare (type component component) + (type ir2-component 2comp)) + ;; Pack wired TNs first. + (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn))) + ((null tn)) + (pack-wired-tn tn optimize)) + + ;; Then, pack restricted TNs, ones that are live over the whole + ;; component first (they cause no fragmentation). Sort by TN cost + ;; to help important TNs get good targeting. + (collect ((component) + (normal)) + (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn))) + ((null tn)) + (unless (or (tn-offset tn) (unbounded-tn-p tn)) + (if (eq :component (tn-kind tn)) + (component tn) + (normal tn)))) + (flet ((pack-tns (tns) + (dolist (tn (stable-sort tns #'> :key #'tn-cost)) + (pack-tn tn t optimize)))) + (pack-tns (component)) + (pack-tns (normal)))) + + (cond ((and *loop-analyze* *pack-assign-costs*) + ;; Allocate normal TNs, starting with the TNs that are + ;; heavily used in deep loops (which is taken into account in + ;; TN spill costs). Only allocate in finite SCs (i.e. not on + ;; the stack). + (collect ((tns)) + (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) + ((null tn)) + (unless (or (tn-offset tn) + (eq (tn-kind tn) :more) + (unbounded-tn-p tn) + (and (sc-save-p (tn-sc tn)) ; SC caller-save, but TN + (minusp (tn-cost tn)))) ; lives over many calls + (tns tn))) + (dolist (tn (stable-sort (tns) #'> :key #'tn-cost)) + (unless (tn-offset tn) + ;; if it can't fit in a bounded SC, the final pass will + ;; take care of stack packing. + (pack-tn tn nil optimize :allow-unbounded-sc nil))))) + (t + ;; If loop analysis has been disabled we might as well revert + ;; to the old behaviour of just packing TNs linearly as they + ;; appear. + (do-ir2-blocks (block component) + (let ((ltns (ir2-block-local-tns block))) + (do ((i (1- (ir2-block-local-tn-count block)) (1- i))) + ((minusp i)) + (declare (fixnum i)) + (let ((tn (svref ltns i))) + (unless (or (null tn) + (eq tn :more) + (tn-offset tn) + (unbounded-tn-p tn)) + (pack-tn tn nil optimize :allow-unbounded-sc nil)))))))))