;;;; provided with absolutely no warranty. See the COPYING and CREDITS
;;;; files for more information.
-(in-package "SB!C")
+(in-package "SB!REGALLOC")
;;; for debugging: some parameters controlling which optimizations we
;;; attempt
(kind (tn-kind tn))
(sb-conflicts (finite-sb-conflicts sb))
(sb-always-live (finite-sb-always-live sb)))
- (macrolet ((do-offsets (&body body)
- `(loop repeat size
- for offset upfrom offset
+ (macrolet ((do-offsets ((var) &body body)
+ `(loop for ,var upfrom offset
+ repeat size
thereis (progn ,@body))))
(cond
((eq kind :component)
- (do-offsets
- (let ((loc-live (svref sb-always-live offset)))
+ (do-offsets (offset-iter)
+ (let ((loc-live (svref sb-always-live offset-iter)))
(dotimes (i (ir2-block-count *component-being-compiled*))
(when (/= (sbit loc-live i) 0)
- (return offset))))))
+ (return offset-iter))))))
(confs
;; TN is global, iterate over the blocks TN is live in.
(do ((conf confs (global-conflicts-next-tnwise conf)))
(let* ((block (global-conflicts-block conf))
(num (ir2-block-number block)))
(if (eq (global-conflicts-kind conf) :live)
- (do-offsets
- (let ((loc-live (svref sb-always-live offset)))
+ (do-offsets (offset-iter)
+ (let ((loc-live (svref sb-always-live offset-iter)))
(when (/= (sbit loc-live num) 0)
- (return-from offset-conflicts-in-sb offset))))
- (do-offsets
- (let ((loc-confs (svref sb-conflicts offset)))
+ (return-from offset-conflicts-in-sb offset-iter))))
+ (do-offsets (offset-iter)
+ (let ((loc-confs (svref sb-conflicts offset-iter)))
(when (/= (sbit (svref loc-confs num)
(global-conflicts-number conf))
0)
- (return-from offset-conflicts-in-sb offset))))))))
+ (return-from offset-conflicts-in-sb offset-iter))))))))
(t
- (do-offsets
- (and (/= (sbit (svref (svref sb-conflicts offset)
+ (do-offsets (offset-iter)
+ (and (/= (sbit (svref (svref sb-conflicts offset-iter)
(ir2-block-number (tn-local tn)))
(tn-local-number tn))
0)
- offset)))))))
+ offset-iter)))))))
;;; Return true if TN has a conflict in SC at the specified offset.
(declaim (ftype (function (tn sc index) (values (or null index) &optional))
(return t)))
(setq block (optimized-emit-saves-block block saves restores)))
(setq block (ir2-block-prev block)))))
-
+\f
;;; Iterate over the normal TNs, finding the cost of packing on the
;;; stack in units of the number of references. We count all read
;;; references as +1, write references as + *tn-write-cost*, and
;;; The subtraction reflects the fact that having a value in a
;;; register around a call means that code to spill and unspill must
;;; be inserted.
+;;;
+;;; The costs also take into account the loop depth at which each
+;;; reference occurs: the penalty or cost is incremented by the depth
+;;; scaled by *tn-loop-depth-multiplier*. The default (NIL) is to let
+;;; this be one more than the max of the cost for reads (1), for write
+;;; references and for being live across a call.
(defvar *tn-write-cost* 2)
-(defun assign-tn-costs (component)
- (let ((save-penalty *backend-register-save-penalty*))
- (do-ir2-blocks (block component)
- (do ((vop (ir2-block-start-vop block) (vop-next vop)))
- ((null vop))
- (when (eq (vop-info-save-p (vop-info vop)) t)
- (do-live-tns (tn (vop-save-set vop) block)
- (decf (tn-cost tn) save-penalty))))))
+(defvar *tn-loop-depth-multiplier* nil)
- (let ((write-cost *tn-write-cost*))
- (do ((tn (ir2-component-normal-tns (component-info component))
- (tn-next tn)))
- ((null tn))
- (let ((cost (tn-cost tn)))
- (declare (fixnum cost))
- (do ((ref (tn-reads tn) (tn-ref-next ref)))
- ((null ref))
- (incf cost))
- (do ((ref (tn-writes tn) (tn-ref-next ref)))
- ((null ref))
- (incf cost write-cost))
- (setf (tn-cost tn) cost)))))
+(defun assign-tn-costs (component)
+ (let* ((save-penalty *backend-register-save-penalty*)
+ (write-cost *tn-write-cost*)
+ (depth-scale (or *tn-loop-depth-multiplier*
+ (1+ (max 1 write-cost save-penalty)))))
+ (flet ((vop-depth-cost (vop)
+ (let ((loop (block-loop
+ (ir2-block-block
+ (vop-block vop)))))
+ (if loop
+ (* depth-scale (loop-depth loop))
+ 0))))
+ (do-ir2-blocks (block component)
+ (do ((vop (ir2-block-start-vop block) (vop-next vop)))
+ ((null vop))
+ (when (eq (vop-info-save-p (vop-info vop)) t)
+ (let ((penalty (+ save-penalty (vop-depth-cost vop))))
+ (do-live-tns (tn (vop-save-set vop) block)
+ (decf (tn-cost tn) penalty))))))
+
+ (do ((tn (ir2-component-normal-tns (component-info component))
+ (tn-next tn)))
+ ((null tn))
+ (let ((cost (tn-cost tn)))
+ (declare (fixnum cost))
+ (do ((ref (tn-reads tn) (tn-ref-next ref)))
+ ((null ref))
+ (incf cost (1+ (vop-depth-cost (tn-ref-vop ref)))))
+ (do ((ref (tn-writes tn) (tn-ref-next ref)))
+ ((null ref))
+ (incf cost (+ write-cost (vop-depth-cost (tn-ref-vop ref)))))
+ (setf (tn-cost tn) cost))))))
;;; Iterate over the normal TNs, folding over the depth of the looops
;;; that the TN is used in and storing the result in TN-LOOP-DEPTH.
most-positive-fixnum
(length path)))))
+(declaim (type (member :iterative :greedy :adaptive)
+ *register-allocation-method*))
+(defvar *register-allocation-method* :adaptive)
+
+(declaim (ftype function pack-greedy pack-iterative))
+
(defun pack (component)
(unwind-protect
(let ((optimize nil)
+ (speed-3 nil)
(2comp (component-info component)))
(init-sb-vectors component)
;; checking whether any blocks in the component have (> SPEED
;; COMPILE-SPEED).
;;
+ ;; Also, determine if any such block also declares (speed 3),
+ ;; in which case :adaptive register allocation will switch to
+ ;; the iterative Chaitin-Briggs spilling/coloring algorithm.
+ ;;
;; FIXME: This means that a declaration can have a minor
;; effect even outside its scope, and as the packing is done
;; component-globally it'd be tricky to use strict scoping. I
;; doesn't affect the semantics of the generated code in any
;; way. -- JES 2004-10-06
(do-ir2-blocks (block component)
- (when (policy (block-last (ir2-block-block block))
- (> speed compilation-speed))
- (setf optimize t)
- (return)))
+ (let ((block (block-last (ir2-block-block block))))
+ (when (policy block (> speed compilation-speed))
+ (setf optimize t)
+ (when (policy block (= speed 3))
+ (setf speed-3 t)
+ (return)))))
;; Call the target functions.
(do-ir2-blocks (block component)
(when target-fun
(funcall target-fun vop)))))
- ;; Pack wired TNs first.
- (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn)))
- ((null tn))
- (pack-wired-tn tn optimize))
-
- ;; Pack restricted component TNs.
- (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
- ((null tn))
- (when (eq (tn-kind tn) :component)
- (pack-tn tn t optimize)))
-
- ;; Pack other restricted TNs.
- (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
- ((null tn))
- (unless (tn-offset tn)
- (pack-tn tn t optimize)))
-
- ;; Assign costs to normal TNs so we know which ones should
- ;; always be packed on the stack.
+ ;; Assign costs to normal TNs so we know which ones should always
+ ;; be packed on the stack, and which are important not to spill.
(when *pack-assign-costs*
- (assign-tn-costs component)
- (assign-tn-depths component))
-
- ;; Allocate normal TNs, starting with the TNs that are used
- ;; in deep loops. Only allocate in finite SCs (i.e. not on
- ;; the stack).
- (collect ((tns))
- (do-ir2-blocks (block component)
- (let ((ltns (ir2-block-local-tns block)))
- (do ((i (1- (ir2-block-local-tn-count block)) (1- i)))
- ((minusp i))
- (declare (fixnum i))
- (let ((tn (svref ltns i)))
- (unless (or (null tn)
- (eq tn :more)
- (tn-offset tn))
- ;; If loop analysis has been disabled we might as
- ;; well revert to the old behaviour of just
- ;; packing TNs linearly as they appear.
- (unless *loop-analyze*
- (pack-tn tn nil optimize :allow-unbounded-sc nil))
- (tns tn))))))
- (dolist (tn (stable-sort (tns)
- (lambda (a b)
- (cond
- ((> (tn-loop-depth a)
- (tn-loop-depth b))
- t)
- ((= (tn-loop-depth a)
- (tn-loop-depth b))
- (> (tn-cost a) (tn-cost b)))
- (t nil)))))
- (unless (tn-offset tn)
- (pack-tn tn nil optimize :allow-unbounded-sc nil))))
-
- ;; Pack any leftover normal TNs that could not be allocated
- ;; to finite SCs, or TNs that do not appear in any local TN
- ;; map (e.g. :MORE TNs). Since we'll likely be allocating
- ;; on the stack, first allocate TNs that are associated with
- ;; code at shallow lexical depths: this will allocate long
- ;; live ranges (i.e. TNs with more conflicts) first, and
- ;; hopefully minimise stack fragmentation.
- ;;
- ;; Collect in reverse order to give priority to older TNs.
- (let ((contiguous-tns '())
+ (assign-tn-costs component))
+
+ ;; Actually allocate registers for most TNs. After this, only
+ ;; :normal tns may be left unallocated (or TNs :restricted to
+ ;; an unbounded SC).
+ (funcall (ecase *register-allocation-method*
+ (:greedy #'pack-greedy)
+ (:iterative #'pack-iterative)
+ (:adaptive (if speed-3 #'pack-iterative #'pack-greedy)))
+ component 2comp optimize)
+
+ ;; Pack any leftover normal/restricted TN that is not already
+ ;; allocated to a finite SC, or TNs that do not appear in any
+ ;; local TN map (e.g. :MORE TNs). Since we'll likely be
+ ;; allocating on the stack, first allocate TNs that are
+ ;; associated with code at shallow lexical depths: this will
+ ;; allocate long live ranges (i.e. TNs with more conflicts)
+ ;; first, and hopefully minimise stack fragmentation.
+ ;; Component TNs are a degenerate case: they are always live.
+ (let ((component-tns '())
+ (contiguous-tns '())
(tns '()))
- (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn)))
- ((null tn))
- (unless (tn-offset tn)
- (let ((key (cons tn (tn-lexical-depth tn))))
- (if (memq (tn-kind tn) '(:environment :debug-environment
- :component))
- (push key contiguous-tns)
- (push key tns)))))
- (flet ((pack-tns (tns)
- (dolist (tn (stable-sort tns #'< :key #'cdr))
- (let ((tn (car tn)))
- (unless (tn-offset tn)
- (pack-tn tn nil optimize))))))
- ;; first pack TNs that are known to have simple
- ;; live ranges (contiguous lexical scopes)
+ (flet ((register-tn (tn)
+ (unless (tn-offset tn)
+ (case (tn-kind tn)
+ (:component
+ (push tn component-tns))
+ ((:environment :debug-environment)
+ (push tn contiguous-tns))
+ (t
+ (push tn tns))))))
+ (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
+ ((null tn))
+ ;; by this time, restricted TNs must either be
+ ;; allocated in the right SC or unbounded
+ (aver (or (tn-offset tn) (unbounded-tn-p tn)))
+ (register-tn tn))
+ (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn)))
+ ((null tn))
+ (register-tn tn)))
+ (flet ((pack-tns (tns &optional in-order)
+ (dolist (tn (if in-order
+ tns
+ (schwartzian-stable-sort-list
+ tns #'< :key #'tn-lexical-depth)))
+ (unless (tn-offset tn)
+ (pack-tn tn nil optimize)))))
+ ;; first pack TNs that are known to have simple live
+ ;; ranges (contiguous lexical scopes)
+ (pack-tns component-tns t)
(pack-tns contiguous-tns)
(pack-tns tns)))
(values))
(clean-up-pack-structures)))
+
+(defun pack-greedy (component 2comp optimize)
+ (declare (type component component)
+ (type ir2-component 2comp))
+ ;; Pack wired TNs first.
+ (do ((tn (ir2-component-wired-tns 2comp) (tn-next tn)))
+ ((null tn))
+ (pack-wired-tn tn optimize))
+
+ ;; Then, pack restricted TNs, ones that are live over the whole
+ ;; component first (they cause no fragmentation). Sort by TN cost
+ ;; to help important TNs get good targeting.
+ (collect ((component)
+ (normal))
+ (do ((tn (ir2-component-restricted-tns 2comp) (tn-next tn)))
+ ((null tn))
+ (unless (or (tn-offset tn) (unbounded-tn-p tn))
+ (if (eq :component (tn-kind tn))
+ (component tn)
+ (normal tn))))
+ (flet ((pack-tns (tns)
+ (dolist (tn (stable-sort tns #'> :key #'tn-cost))
+ (pack-tn tn t optimize))))
+ (pack-tns (component))
+ (pack-tns (normal))))
+
+ (cond ((and *loop-analyze* *pack-assign-costs*)
+ ;; Allocate normal TNs, starting with the TNs that are
+ ;; heavily used in deep loops (which is taken into account in
+ ;; TN spill costs). Only allocate in finite SCs (i.e. not on
+ ;; the stack).
+ (collect ((tns))
+ (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn)))
+ ((null tn))
+ (unless (or (tn-offset tn)
+ (eq (tn-kind tn) :more)
+ (unbounded-tn-p tn)
+ (and (sc-save-p (tn-sc tn)) ; SC caller-save, but TN
+ (minusp (tn-cost tn)))) ; lives over many calls
+ (tns tn)))
+ (dolist (tn (stable-sort (tns) #'> :key #'tn-cost))
+ (unless (tn-offset tn)
+ ;; if it can't fit in a bounded SC, the final pass will
+ ;; take care of stack packing.
+ (pack-tn tn nil optimize :allow-unbounded-sc nil)))))
+ (t
+ ;; If loop analysis has been disabled we might as well revert
+ ;; to the old behaviour of just packing TNs linearly as they
+ ;; appear.
+ (do-ir2-blocks (block component)
+ (let ((ltns (ir2-block-local-tns block)))
+ (do ((i (1- (ir2-block-local-tn-count block)) (1- i)))
+ ((minusp i))
+ (declare (fixnum i))
+ (let ((tn (svref ltns i)))
+ (unless (or (null tn)
+ (eq tn :more)
+ (tn-offset tn)
+ (unbounded-tn-p tn))
+ (pack-tn tn nil optimize :allow-unbounded-sc nil)))))))))