src/compiler/physenvanal.lisp

   1 ;;;; This file implements the environment analysis phase for the
   2 ;;;; compiler. This phase annotates IR1 with a hierarchy environment
   3 ;;;; structures, determining the physical environment that each LAMBDA
   4 ;;;; allocates its variables and finding what values are closed over
   5 ;;;; by each physical environment.
   6
   7 ;;;; This software is part of the SBCL system. See the README file for
   8 ;;;; more information.
   9 ;;;;
  10 ;;;; This software is derived from the CMU CL system, which was
  11 ;;;; written at Carnegie Mellon University and released into the
  12 ;;;; public domain. The software is in the public domain and is
  13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
  14 ;;;; files for more information.
  15
  16 (in-package "SB!C")
  17
  18 ;;; Do environment analysis on the code in COMPONENT. This involves
  19 ;;; various things:
  20 ;;;  1. Make a PHYSENV structure for each non-LET LAMBDA, assigning
  21 ;;;     the LAMBDA-PHYSENV for all LAMBDAs.
  22 ;;;  2. Find all values that need to be closed over by each
  23 ;;;     physical environment.
  24 ;;;  3. Scan the blocks in the component closing over non-local-exit
  25 ;;;     continuations.
  26 ;;;  4. Delete all non-top-level functions with no references. This
  27 ;;;     should only get functions with non-NULL kinds, since normal
  28 ;;;     functions are deleted when their references go to zero.
  29 (defun physenv-analyze (component)
  30   (declare (type component component))
  31   (aver (every (lambda (x)
  32                  (eq (functional-kind x) :deleted))
  33                (component-new-functionals component)))
  34   (setf (component-new-functionals component) ())
  35   (dolist (clambda (component-lambdas component))
  36     (reinit-lambda-physenv clambda))
  37   (mapc #'add-lambda-vars-and-let-vars-to-closures
  38         (component-lambdas component))
  39
  40   (find-non-local-exits component)
  41   (find-cleanup-points component)
  42   (tail-annotate component)
  43
  44   (dolist (fun (component-lambdas component))
  45     (when (null (leaf-refs fun))
  46       (let ((kind (functional-kind fun)))
  47         (unless (or (eq kind :toplevel)
  48                     (functional-has-external-references-p fun))
  49           (aver (member kind '(:optional :cleanup :escape)))
  50           (setf (functional-kind fun) nil)
  51           (delete-functional fun)))))
  52
  53   (values))
  54
  55 ;;; This is to be called on a COMPONENT with top level LAMBDAs before
  56 ;;; the compilation of the associated non-top-level code to detect
  57 ;;; closed over top level variables. We just do COMPUTE-CLOSURE on all
  58 ;;; the lambdas. This will pre-allocate environments for all the
  59 ;;; functions with closed-over top level variables. The post-pass will
  60 ;;; use the existing structure, rather than allocating a new one. We
  61 ;;; return true if we discover any possible closure vars.
  62 (defun pre-physenv-analyze-toplevel (component)
  63   (declare (type component component))
  64   (let ((found-it nil))
  65     (dolist (lambda (component-lambdas component))
  66       (when (add-lambda-vars-and-let-vars-to-closures lambda)
  67         (setq found-it t)))
  68     found-it))
  69
  70 ;;; This is like old CMU CL PRE-ENVIRONMENT-ANALYZE-TOPLEVEL, except
  71 ;;;   (1) It's been brought into the post-0.7.0 world where the property
  72 ;;;       HAS-EXTERNAL-REFERENCES-P is orthogonal to the property of
  73 ;;;       being specialized/optimized for locall at top level.
  74 ;;;   (2) There's no return value, since we don't care whether we
  75 ;;;       find any possible closure variables.
  76 ;;;
  77 ;;; I wish I could find an explanation of why
  78 ;;; PRE-ENVIRONMENT-ANALYZE-TOPLEVEL is important. The old CMU CL
  79 ;;; comments said
  80 ;;;     Called on component with top level lambdas before the
  81 ;;;     compilation of the associated non-top-level code to detect
  82 ;;;     closed over top level variables. We just do COMPUTE-CLOSURE on
  83 ;;;     all the lambdas. This will pre-allocate environments for all
  84 ;;;     the functions with closed-over top level variables. The
  85 ;;;     post-pass will use the existing structure, rather than
  86 ;;;     allocating a new one. We return true if we discover any
  87 ;;;     possible closure vars.
  88 ;;; But that doesn't seem to explain either why it's important to do
  89 ;;; this for top level lambdas, or why it's important to do it only
  90 ;;; for top level lambdas instead of just doing it indiscriminately
  91 ;;; for all lambdas. I do observe that when it's not done, compiler
  92 ;;; assertions occasionally fail. My tentative hypothesis for why it's
  93 ;;; important to do it is that other environment analysis expects to
  94 ;;; bottom out on the outermost enclosing thing, and (insert
  95 ;;; mysterious reason here) it's important to set up bottomed-out-here
  96 ;;; environments before anything else. I haven't been able to guess
  97 ;;; why it's important to do it selectively instead of
  98 ;;; indiscriminately. -- WHN 2001-11-10
  99 (defun preallocate-physenvs-for-toplevelish-lambdas (component)
 100   (dolist (clambda (component-lambdas component))
 101     (when (lambda-toplevelish-p clambda)
 102       (add-lambda-vars-and-let-vars-to-closures clambda)))
 103   (values))
 104
 105 ;;; If CLAMBDA has a PHYSENV, return it, otherwise assign an empty one
 106 ;;; and return that.
 107 (defun get-lambda-physenv (clambda)
 108   (declare (type clambda clambda))
 109   (let ((homefun (lambda-home clambda)))
 110     (or (lambda-physenv homefun)
 111         (let ((res (make-physenv :lambda homefun)))
 112           (setf (lambda-physenv homefun) res)
 113           ;; All the LETLAMBDAs belong to HOMEFUN, and share the same
 114           ;; PHYSENV. Thus, (1) since HOMEFUN's PHYSENV was NIL,
 115           ;; theirs should be NIL too, and (2) since we're modifying
 116           ;; HOMEFUN's PHYSENV, we should modify theirs, too.
 117           (dolist (letlambda (lambda-lets homefun))
 118             (aver (eql (lambda-home letlambda) homefun))
 119             (aver (null (lambda-physenv letlambda)))
 120             (setf (lambda-physenv letlambda) res))
 121           res))))
 122
 123 ;;; If FUN has no physical environment, assign one, otherwise clean up
 124 ;;; the old physical environment, removing/flagging variables that
 125 ;;; have no sets or refs. If a var has no references, we remove it
 126 ;;; from the closure. If it has no sets, we clear the INDIRECT flag.
 127 ;;; This is necessary because pre-analysis is done before
 128 ;;; optimization.
 129 (defun reinit-lambda-physenv (fun)
 130   (let ((old (lambda-physenv (lambda-home fun))))
 131     (cond (old
 132            (setf (physenv-closure old)
 133                  (delete-if (lambda (x)
 134                               (and (lambda-var-p x)
 135                                    (null (leaf-refs x))))
 136                             (physenv-closure old)))
 137            (flet ((clear (fun)
 138                     (dolist (var (lambda-vars fun))
 139                       (unless (lambda-var-sets var)
 140                         (setf (lambda-var-indirect var) nil)))))
 141              (clear fun)
 142              (map nil #'clear (lambda-lets fun))))
 143           (t
 144            (get-lambda-physenv fun))))
 145   (values))
 146
 147 ;;; Get NODE's environment, assigning one if necessary.
 148 (defun get-node-physenv (node)
 149   (declare (type node node))
 150   (get-lambda-physenv (node-home-lambda node)))
 151
 152 ;;; private guts of ADD-LAMBDA-VARS-AND-LET-VARS-TO-CLOSURES
 153 ;;;
 154 ;;; This is the old CMU CL COMPUTE-CLOSURE, which only works on
 155 ;;; LAMBDA-VARS directly, not on the LAMBDA-VARS of LAMBDA-LETS. It
 156 ;;; seems never to be valid to use this operation alone, so in SBCL,
 157 ;;; it's private, and the public interface,
 158 ;;; ADD-LAMBDA-VARS-AND-LET-VARS-TO-CLOSURES, always runs over all the
 159 ;;; variables, not only the LAMBDA-VARS of CLAMBDA itself but also
 160 ;;; the LAMBDA-VARS of CLAMBDA's LAMBDA-LETS.
 161 (defun %add-lambda-vars-to-closures (clambda)
 162   (let ((physenv (get-lambda-physenv clambda))
 163         (did-something nil))
 164     (note-unreferenced-vars clambda)
 165     (dolist (var (lambda-vars clambda))
 166       (dolist (ref (leaf-refs var))
 167         (let ((ref-physenv (get-node-physenv ref)))
 168           (unless (eq ref-physenv physenv)
 169             (when (lambda-var-sets var)
 170               (setf (lambda-var-indirect var) t))
 171             (setq did-something t)
 172             (close-over var ref-physenv physenv))))
 173       (dolist (set (basic-var-sets var))
 174
 175         ;; Variables which are set but never referenced can be
 176         ;; optimized away, and closing over them here would just
 177         ;; interfere with that. (In bug 147, it *did* interfere with
 178         ;; that, causing confusion later. This UNLESS solves that
 179         ;; problem, but I (WHN) am not 100% sure it's best to solve
 180         ;; the problem this way instead of somehow solving it
 181         ;; somewhere upstream and just doing (AVER (LEAF-REFS VAR))
 182         ;; here.)
 183         (unless (null (leaf-refs var))
 184
 185           (let ((set-physenv (get-node-physenv set)))
 186             (unless (eq set-physenv physenv)
 187               (setf did-something t
 188                     (lambda-var-indirect var) t)
 189               (close-over var set-physenv physenv))))))
 190     did-something))
 191
 192 ;;; Find any variables in CLAMBDA -- either directly in LAMBDA-VARS or
 193 ;;; in the LAMBDA-VARS of elements of LAMBDA-LETS -- with references
 194 ;;; outside of the home environment and close over them. If a
 195 ;;; closed-over variable is set, then we set the INDIRECT flag so that
 196 ;;; we will know the closed over value is really a pointer to the
 197 ;;; value cell. We also warn about unreferenced variables here, just
 198 ;;; because it's a convenient place to do it. We return true if we
 199 ;;; close over anything.
 200 (defun add-lambda-vars-and-let-vars-to-closures (clambda)
 201   (declare (type clambda clambda))
 202   (let ((did-something nil))
 203     (when (%add-lambda-vars-to-closures clambda)
 204       (setf did-something t))
 205     (dolist (lambda-let (lambda-lets clambda))
 206       ;; There's no need to recurse through full COMPUTE-CLOSURE
 207       ;; here, since LETS only go one layer deep.
 208       (aver (null (lambda-lets lambda-let)))
 209       (when (%add-lambda-vars-to-closures lambda-let)
 210         (setf did-something t)))
 211     did-something))
 212
 213 ;;; Make sure that THING is closed over in REF-PHYSENV and in all
 214 ;;; PHYSENVs for the functions that reference REF-PHYSENV's function
 215 ;;; (not just calls). HOME-PHYSENV is THING's home environment. When we
 216 ;;; reach the home environment, we stop propagating the closure.
 217 (defun close-over (thing ref-physenv home-physenv)
 218   (declare (type physenv ref-physenv home-physenv))
 219   (let ((flooded-physenvs nil))
 220     (named-let flood ((flooded-physenv ref-physenv))
 221       (unless (or (eql flooded-physenv home-physenv)
 222                   (member flooded-physenv flooded-physenvs))
 223         (push flooded-physenv flooded-physenvs)
 224         (pushnew thing (physenv-closure flooded-physenv))
 225         (dolist (ref (leaf-refs (physenv-lambda flooded-physenv)))
 226           (flood (get-node-physenv ref))))))
 227   (values))
 228 \f
 229 ;;;; non-local exit
 230
 231 ;;; Insert the entry stub before the original exit target, and add a
 232 ;;; new entry to the PHYSENV-NLX-INFO. The %NLX-ENTRY call in the
 233 ;;; stub is passed the NLX-INFO as an argument so that the back end
 234 ;;; knows what entry is being done.
 235 ;;;
 236 ;;; The link from the EXIT block to the entry stub is changed to be a
 237 ;;; link to the component head. Similarly, the EXIT block is linked to
 238 ;;; the component tail. This leaves the entry stub reachable, but
 239 ;;; makes the flow graph less confusing to flow analysis.
 240 ;;;
 241 ;;; If a CATCH or an UNWIND-protect, then we set the LEXENV for the
 242 ;;; last node in the cleanup code to be the enclosing environment, to
 243 ;;; represent the fact that the binding was undone as a side effect of
 244 ;;; the exit. This will cause a lexical exit to be broken up if we are
 245 ;;; actually exiting the scope (i.e. a BLOCK), and will also do any
 246 ;;; other cleanups that may have to be done on the way.
 247 (defun insert-nlx-entry-stub (exit env)
 248   (declare (type physenv env) (type exit exit))
 249   (let* ((exit-block (node-block exit))
 250          (next-block (first (block-succ exit-block)))
 251          (cleanup (entry-cleanup (exit-entry exit)))
 252          (info (make-nlx-info :cleanup cleanup
 253                               :continuation (node-cont exit)))
 254          (entry (exit-entry exit))
 255          (new-block (insert-cleanup-code exit-block next-block
 256                                          entry
 257                                          `(%nlx-entry ',info)
 258                                          (entry-cleanup entry)))
 259          (component (block-component new-block)))
 260     (unlink-blocks exit-block new-block)
 261     (link-blocks exit-block (component-tail component))
 262     (link-blocks (component-head component) new-block)
 263
 264     (setf (nlx-info-target info) new-block)
 265     (push info (physenv-nlx-info env))
 266     (push info (cleanup-nlx-info cleanup))
 267     (when (member (cleanup-kind cleanup) '(:catch :unwind-protect))
 268       (setf (node-lexenv (block-last new-block))
 269             (node-lexenv entry))))
 270
 271   (values))
 272
 273 ;;; Do stuff necessary to represent a non-local exit from the node
 274 ;;; EXIT into ENV. This is called for each non-local exit node, of
 275 ;;; which there may be several per exit continuation. This is what we
 276 ;;; do:
 277 ;;; -- If there isn't any NLX-INFO entry in the environment, make
 278 ;;;    an entry stub, otherwise just move the exit block link to
 279 ;;;    the component tail.
 280 ;;; -- Close over the NLX-INFO in the exit environment.
 281 ;;; -- If the exit is from an :ESCAPE function, then substitute a
 282 ;;;    constant reference to NLX-INFO structure for the escape
 283 ;;;    function reference. This will cause the escape function to
 284 ;;;    be deleted (although not removed from the DFO.)  The escape
 285 ;;;    function is no longer needed, and we don't want to emit code
 286 ;;;    for it. We then also change the %NLX-ENTRY call to use the
 287 ;;;    NLX continuation so that there will be a use to represent
 288 ;;;    the NLX use.
 289 (defun note-non-local-exit (env exit)
 290   (declare (type physenv env) (type exit exit))
 291   (let ((entry (exit-entry exit))
 292         (cont (node-cont exit))
 293         (exit-fun (node-home-lambda exit)))
 294     (if (find-nlx-info entry cont)
 295         (let ((block (node-block exit)))
 296           (aver (= (length (block-succ block)) 1))
 297           (unlink-blocks block (first (block-succ block)))
 298           (link-blocks block (component-tail (block-component block))))
 299         (insert-nlx-entry-stub exit env))
 300     (let ((info (find-nlx-info entry cont)))
 301       (aver info)
 302       (close-over info (node-physenv exit) env)
 303       (when (eq (functional-kind exit-fun) :escape)
 304         (mapc (lambda (x)
 305                 (setf (node-derived-type x) *wild-type*))
 306               (leaf-refs exit-fun))
 307         (substitute-leaf (find-constant info) exit-fun)
 308         (let ((node (block-last (nlx-info-target info))))
 309           (delete-continuation-use node)
 310           (add-continuation-use node (nlx-info-continuation info))))))
 311   (values))
 312
 313 ;;; Iterate over the EXITs in COMPONENT, calling NOTE-NON-LOCAL-EXIT
 314 ;;; when we find a block that ends in a non-local EXIT node. We also
 315 ;;; ensure that all EXIT nodes are either non-local or degenerate by
 316 ;;; calling IR1-OPTIMIZE-EXIT on local exits. This makes life simpler
 317 ;;; for later phases.
 318 (defun find-non-local-exits (component)
 319   (declare (type component component))
 320   (dolist (lambda (component-lambdas component))
 321     (dolist (entry (lambda-entries lambda))
 322       (dolist (exit (entry-exits entry))
 323         (let ((target-physenv (node-physenv entry)))
 324           (if (eq (node-physenv exit) target-physenv)
 325               (maybe-delete-exit exit)
 326               (note-non-local-exit target-physenv exit))))))
 327   (values))
 328 \f
 329 ;;;; cleanup emission
 330
 331 ;;; Zoom up the cleanup nesting until we hit CLEANUP1, accumulating
 332 ;;; cleanup code as we go. When we are done, convert the cleanup code
 333 ;;; in an implicit MV-PROG1. We have to force local call analysis of
 334 ;;; new references to UNWIND-PROTECT cleanup functions. If we don't
 335 ;;; actually have to do anything, then we don't insert any cleanup
 336 ;;; code. (FIXME: There's some confusion here, left over from CMU CL
 337 ;;; comments. CLEANUP1 isn't mentioned in the code of this function.
 338 ;;; It is in code elsewhere, but if the comments for this function
 339 ;;; mention it they should explain the relationship to the other code.)
 340 ;;;
 341 ;;; If we do insert cleanup code, we check that BLOCK1 doesn't end in
 342 ;;; a "tail" local call.
 343 ;;;
 344 ;;; We don't need to adjust the ending cleanup of the cleanup block,
 345 ;;; since the cleanup blocks are inserted at the start of the DFO, and
 346 ;;; are thus never scanned.
 347 (defun emit-cleanups (block1 block2)
 348   (declare (type cblock block1 block2))
 349   (collect ((code)
 350             (reanalyze-funs))
 351     (let ((cleanup2 (block-start-cleanup block2)))
 352       (do ((cleanup (block-end-cleanup block1)
 353                     (node-enclosing-cleanup (cleanup-mess-up cleanup))))
 354           ((eq cleanup cleanup2))
 355         (let* ((node (cleanup-mess-up cleanup))
 356                (args (when (basic-combination-p node)
 357                        (basic-combination-args node))))
 358           (ecase (cleanup-kind cleanup)
 359             (:special-bind
 360              (code `(%special-unbind ',(continuation-value (first args)))))
 361             (:catch
 362              (code `(%catch-breakup)))
 363             (:unwind-protect
 364              (code `(%unwind-protect-breakup))
 365              (let ((fun (ref-leaf (continuation-use (second args)))))
 366                (reanalyze-funs fun)
 367                (code `(%funcall ,fun))))
 368             ((:block :tagbody)
 369              (dolist (nlx (cleanup-nlx-info cleanup))
 370                (code `(%lexical-exit-breakup ',nlx)))))))
 371
 372       (when (code)
 373         (aver (not (node-tail-p (block-last block1))))
 374         (insert-cleanup-code block1 block2
 375                              (block-last block1)
 376                              `(progn ,@(code)))
 377         (dolist (fun (reanalyze-funs))
 378           (locall-analyze-fun-1 fun)))))
 379
 380   (values))
 381
 382 ;;; Loop over the blocks in COMPONENT, calling EMIT-CLEANUPS when we
 383 ;;; see a successor in the same environment with a different cleanup.
 384 ;;; We ignore the cleanup transition if it is to a cleanup enclosed by
 385 ;;; the current cleanup, since in that case we are just messing up the
 386 ;;; environment, hence this is not the place to clean it.
 387 (defun find-cleanup-points (component)
 388   (declare (type component component))
 389   (do-blocks (block1 component)
 390     (let ((env1 (block-physenv block1))
 391           (cleanup1 (block-end-cleanup block1)))
 392       (dolist (block2 (block-succ block1))
 393         (when (block-start block2)
 394           (let ((env2 (block-physenv block2))
 395                 (cleanup2 (block-start-cleanup block2)))
 396             (unless (or (not (eq env2 env1))
 397                         (eq cleanup1 cleanup2)
 398                         (and cleanup2
 399                              (eq (node-enclosing-cleanup
 400                                   (cleanup-mess-up cleanup2))
 401                                  cleanup1)))
 402               (emit-cleanups block1 block2)))))))
 403   (values))
 404
 405 ;;; Mark optimizable tail-recursive uses of function result
 406 ;;; continuations with the corresponding TAIL-SET.
 407 (defun tail-annotate (component)
 408   (declare (type component component))
 409   (dolist (fun (component-lambdas component))
 410     (let ((ret (lambda-return fun)))
 411       ;; Nodes whose type is NIL (i.e. don't return) such as calls to
 412       ;; ERROR are never annotated as TAIL-P, in order to preserve
 413       ;; debugging information.
 414       ;;
 415       ;; FIXME: It might be better to add another DEFKNOWN property
 416       ;; (e.g. NO-TAIL-RECURSION) and use it for error-handling
 417       ;; functions like ERROR, instead of spreading this special case
 418       ;; net so widely.
 419       (when ret
 420         (let ((result (return-result ret)))
 421           (do-uses (use result)
 422             (when (and (policy use merge-tail-calls)
 423                        (immediately-used-p result use)
 424                        (or (not (eq (node-derived-type use) *empty-type*))
 425                            (not (basic-combination-p use))
 426                            (eq (basic-combination-kind use) :local)))
 427               (setf (node-tail-p use) t)))))))
 428   (values))