From ba39d165a0bb6fabba6d6feb9b6fb88ae4d544ff Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Mon, 24 Jun 2013 23:24:05 -0400 Subject: [PATCH] Pack (mostly) stack TNs according to lexical scope information Packing TNs from shallow scopes before more deeply nested one is a perfect elimination order when the live ranges span the full scope (the interference graph is a comparability graph). Use that as a heuristic, and do that for TNs that are known to have such simple live ranges before the rest: this ensures that bad TNs don't mess everything up. The result is much tighter stack allocation (most of the effect comes from initialising stack frames at a smaller size, and growing less aggressively), and fewer long-lived stray references. Incidentally: fix catch block packing on win32, solving lp#1072739 --- NEWS | 5 +++ src/compiler/pack.lisp | 77 ++++++++++++++++++++++++++++++++++++++-------- tests/compiler.pure.lisp | 50 ++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index 684a691..e578a61 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,15 @@ ;;;; -*- coding: utf-8; fill-column: 78 -*- changes relative to sbcl-1.1.9: * enhancement: ASDF has been updated to 3.0.2. + * optimization: stack frames are packed more efficiently on x86oids, which + ought to reduce the frequency of Methuselahn conservative references (it + certainly helps with gc.impure.lisp / BUG-936304 on x86). * bug fix: Compiling potential modularic arithmetic forms does not cause type errors when some integer types lack lower or upper bounds. (lp#1199127) * bug fix: Non-trivial modular arithmetic forms are always cut to the right bitwidth before being used in a non-modular context. (lp#1199428) + * bug fix: Multiple catch/unwind blocks in a single function are now + allocated in the right stack order on win32. (lp#1072739) changes in sbcl-1.1.9 relative to sbcl-1.1.8: * new feature: the contrib SB-GMP links with libgmp at runtime to speed diff --git a/src/compiler/pack.lisp b/src/compiler/pack.lisp index 51ebd36..b30b86d 100644 --- a/src/compiler/pack.lisp +++ b/src/compiler/pack.lisp @@ -1351,7 +1351,7 @@ ;;; If we are attempting to pack in the SC of the save TN for a TN ;;; with a :SPECIFIED-SAVE TN, then we pack in that location, instead ;;; of allocating a new stack location. -(defun pack-tn (tn restricted optimize) +(defun pack-tn (tn restricted optimize &key (allow-unbounded-sc t)) (declare (type tn tn)) (let* ((original (original-tn tn)) (fsc (tn-sc tn)) @@ -1364,12 +1364,15 @@ (do ((sc fsc (pop alternates))) ((null sc) (failed-to-pack-error tn restricted)) + (unless (or allow-unbounded-sc + (neq (sb-kind (sc-sb sc)) :unbounded)) + (return nil)) (when (eq sc specified-save-sc) (unless (tn-offset save) (pack-tn save nil optimize)) (setf (tn-offset tn) (tn-offset save)) (setf (tn-sc tn) (tn-sc save)) - (return)) + (return t)) (when (or restricted (not (and (minusp (tn-cost tn)) (sc-save-p sc)))) (let ((loc (or (find-ok-target-offset original sc) @@ -1384,7 +1387,7 @@ (add-location-conflicts original sc loc optimize) (setf (tn-sc tn) sc) (setf (tn-offset tn) loc) - (return)))))) + (return t)))))) (values)) ;;; Pack a wired TN, checking that the offset is in bounds for the SB, @@ -1496,6 +1499,31 @@ (setf (finite-sb-live-tns sb) (make-array size :initial-element nil)))))) +(defun tn-lexical-depth (tn) + (let ((path t)) ; dummy initial value + (labels ((path (lambda) + (nreverse (loop while lambda + collect lambda + do (setf lambda (lambda-parent lambda))))) + (register-scope (lambda) + (let ((new-path (path lambda))) + (setf path (if (eql path t) + new-path + (subseq path + 0 (mismatch path new-path)))))) + (walk-tn-refs (ref) + (do ((ref ref (tn-ref-next ref))) + ((null ref)) + (binding* ((node (vop-node (tn-ref-vop ref)) + :exit-if-null)) + (register-scope (lexenv-lambda + (node-lexenv node))))))) + (walk-tn-refs (tn-reads tn)) + (walk-tn-refs (tn-writes tn)) + (if (eql path t) + most-positive-fixnum + (length path))))) + (defun pack (component) (unwind-protect (let ((optimize nil) @@ -1551,7 +1579,8 @@ (assign-tn-depths component)) ;; Allocate normal TNs, starting with the TNs that are used - ;; in deep loops. + ;; in deep loops. Only allocate in finite SCs (i.e. not on + ;; the stack). (collect ((tns)) (do-ir2-blocks (block component) (let ((ltns (ir2-block-local-tns block))) @@ -1566,7 +1595,7 @@ ;; well revert to the old behaviour of just ;; packing TNs linearly as they appear. (unless *loop-analyze* - (pack-tn tn nil optimize)) + (pack-tn tn nil optimize :allow-unbounded-sc nil)) (tns tn)))))) (dolist (tn (stable-sort (tns) (lambda (a b) @@ -1579,14 +1608,36 @@ (> (tn-cost a) (tn-cost b))) (t nil))))) (unless (tn-offset tn) - (pack-tn tn nil optimize)))) - - ;; Pack any leftover normal TNs. This is to deal with :MORE TNs, - ;; which could possibly not appear in any local TN map. - (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) - ((null tn)) - (unless (tn-offset tn) - (pack-tn tn nil optimize))) + (pack-tn tn nil optimize :allow-unbounded-sc nil)))) + + ;; Pack any leftover normal TNs that could not be allocated + ;; to finite SCs, or TNs that do not appear in any local TN + ;; map (e.g. :MORE TNs). Since we'll likely be allocating + ;; on the stack, first allocate TNs that are associated with + ;; code at shallow lexical depths: this will allocate long + ;; live ranges (i.e. TNs with more conflicts) first, and + ;; hopefully minimise stack fragmentation. + ;; + ;; Collect in reverse order to give priority to older TNs. + (let ((contiguous-tns '()) + (tns '())) + (do ((tn (ir2-component-normal-tns 2comp) (tn-next tn))) + ((null tn)) + (unless (tn-offset tn) + (let ((key (cons tn (tn-lexical-depth tn)))) + (if (memq (tn-kind tn) '(:environment :debug-environment + :component)) + (push key contiguous-tns) + (push key tns))))) + (flet ((pack-tns (tns) + (dolist (tn (stable-sort tns #'< :key #'cdr)) + (let ((tn (car tn))) + (unless (tn-offset tn) + (pack-tn tn nil optimize)))))) + ;; first pack TNs that are known to have simple + ;; live ranges (contiguous lexical scopes) + (pack-tns contiguous-tns) + (pack-tns tns))) ;; Do load TN packing and emit saves. (let ((*repack-blocks* nil)) diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp index 9e955c1..aa8861d 100644 --- a/tests/compiler.pure.lisp +++ b/tests/compiler.pure.lisp @@ -4719,3 +4719,53 @@ (b -3745511761)) (assert (eql (funcall f1 a b) (funcall f2 a b)))))) + +;; win32 is very specific about the order in which catch blocks +;; must be allocated on the stack +(with-test (:name :bug-121581169) + (let ((f (compile nil + `(lambda () + (STRING= + (LET ((% 23)) + (WITH-OUTPUT-TO-STRING (G13908) + (PRINC + (LET () + (DECLARE (OPTIMIZE (SB-EXT:INHIBIT-WARNINGS 3))) + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13909) (PRINC %A%B% G13909) G13909) + (UNBOUND-VARIABLE NIL + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13914) + (PRINC %A%B% G13914) + (PRINC "" G13914) + G13914) + (UNBOUND-VARIABLE NIL + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13913) + (PRINC %A%B G13913) + (PRINC "%" G13913) + G13913) + (UNBOUND-VARIABLE NIL + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13912) + (PRINC %A% G13912) + (PRINC "b%" G13912) + G13912) + (UNBOUND-VARIABLE NIL + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13911) + (PRINC %A G13911) + (PRINC "%b%" G13911) + G13911) + (UNBOUND-VARIABLE NIL + (HANDLER-CASE + (WITH-OUTPUT-TO-STRING (G13910) + (PRINC % G13910) + (PRINC "a%b%" G13910) + G13910) + (UNBOUND-VARIABLE NIL + (ERROR "Interpolation error in \"%a%b%\" +")))))))))))))) + G13908))) + "23a%b%"))))) + (assert (funcall f)))) -- 1.7.10.4