experimental/compiler.lisp

   1 ;;; compiler.lisp ---
   2
   3 ;; Copyright (C) 2013 David Vazquez
   4
   5 ;; JSCL is free software: you can redistribute it and/or
   6 ;; modify it under the terms of the GNU General Public License as
   7 ;; published by the Free Software Foundation, either version 3 of the
   8 ;; License, or (at your option) any later version.
   9 ;;
  10 ;; JSCL is distributed in the hope that it will be useful, but
  11 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 ;; General Public License for more details.
  14 ;;
  15 ;; You should have received a copy of the GNU General Public License
  16 ;; along with JSCL.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 (defpackage :jscl
  19   (:use :cl))
  20
  21 (in-package :jscl)
  22
  23 ;;;; Utilities
  24 ;;;;
  25 ;;;; Random Common Lisp code useful to use here and there.
  26
  27 (defmacro with-gensyms ((&rest vars) &body body)
  28   `(let ,(mapcar (lambda (var) `(,var (gensym ,(concatenate 'string (string var) "-")))) vars)
  29      ,@body))
  30
  31 (defun singlep (x)
  32   (and (consp x) (null (cdr x))))
  33
  34 (defun unlist (x)
  35   (assert (singlep x))
  36   (first x))
  37
  38 (defun generic-printer (x stream)
  39   (print-unreadable-object (x stream :type t :identity t)))
  40
  41 ;;; A generic counter mechanism. IDs are used generally for debugging
  42 ;;; purposes. You can bind *counter-alist* to NIL to reset the
  43 ;;; counters in a dynamic extent.
  44 (defvar *counter-alist* nil)
  45 (defun generate-id (class)
  46   (let ((e (assoc class *counter-alist*)))
  47     (if e
  48         (incf (cdr e))
  49         (prog1 1
  50           (push (cons class 1) *counter-alist*)))))
  51
  52
  53 ;;;; Intermediate representation structures
  54 ;;;;
  55 ;;;; This intermediate representation (IR) is a simplified version of
  56 ;;;; the first intermediate representation what you will find if you
  57 ;;;; have a look to the source code of SBCL. Some terminology is also
  58 ;;;; used, but other is changed, so be careful if you assume you know
  59 ;;;; what it is because you know the name.
  60 ;;;;
  61 ;;;; Computations are represented by `node'.  Nodes are grouped
  62 ;;;; sequencially into `basic-block'. It is a plain representation
  63 ;;;; rather than a nested one. Computations take data and produce a
  64 ;;;; value. Both data transfer are represented by `lvar'.
  65
  66 (defstruct leaf)
  67
  68 ;;; A (lexical) variable. Special variables has not a special
  69 ;;; representation in the IR. They are handled by the primitive
  70 ;;; functions `%symbol-function' and `%symbol-value'.
  71 (defstruct (var (:include leaf))
  72   ;; The symbol which names this variable in the source code.
  73   name)
  74
  75 ;;; A literal Lisp object. It usually comes from a quoted expression.
  76 (defstruct (constant (:include leaf))
  77   ;; The object itself.
  78   value)
  79
  80 ;;; A lambda expression. Why do we name it `functional'? Well,
  81 ;;; function is reserved by the ANSI, isn't it?
  82 (defstruct (functional (:include leaf) (:print-object generic-printer))
  83   ;; The symbol which names this function in the source code or null
  84   ;; if we do not know or it is an anonymous function.
  85   name
  86   arguments
  87   return-lvar
  88   component)
  89
  90 ;;; An abstract place where the result of a computation is stored and
  91 ;;; it can be referenced from other nodes, so lvars are responsible
  92 ;;; for keeping the necessary information of the nested structure of
  93 ;;; the code in this plain representation.
  94 (defstruct lvar
  95   (id (generate-id 'lvar)))
  96
  97 ;;; A base structure for every single computation. Most of the
  98 ;;; computations are valued.
  99 (defstruct (node (:print-object generic-printer))
 100   ;; The next and the prev slots are the next nodes and the previous
 101   ;; node in the basic block sequence respectively.
 102   next prev
 103   ;; Lvar which stands for the result of the computation of this node.
 104   lvar)
 105
 106 ;;; Sentinel nodes in the basic block sequence of nodes.
 107 (defstruct (block-entry (:include node)))
 108 (defstruct (block-exit (:include node)))
 109
 110 ;;; A reference to a leaf (variable, constant and functions). The
 111 ;;; meaning of this node is leaving the leaf into the lvar of the
 112 ;;; node.
 113 (defstruct (ref (:include node))
 114   leaf)
 115
 116 ;;; An assignation of the LVAR VALUE into the var VARIABLE.
 117 (defstruct (assignment (:include node))
 118   variable
 119   value)
 120
 121 ;;; A base node to function calls with a list of lvar as ARGUMENTS.
 122 (defstruct (combination (:include node) (:constructor))
 123   arguments)
 124
 125 ;;; A function call to the ordinary Lisp function in the lvar FUNCTION.
 126 (defstruct (call (:include combination))
 127   function)
 128
 129 ;;; A function call to the primitive FUNCTION.
 130 (defstruct (primitive-call (:include combination))
 131   function)
 132
 133
 134 ;;; A conditional branch. If the LVAR is not NIL, then we will jump to
 135 ;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By
 136 ;;; definition, a conditional must appear at the end of a basic block.
 137 (defstruct (conditional (:include node))
 138   test
 139   consequent
 140   alternative)
 141
 142
 143
 144 ;;; Blocks are `basic block`. Basic blocks are organized as a control
 145 ;;; flow graph with some more information in omponents.
 146 (defstruct (basic-block
 147              (:conc-name "BLOCK-")
 148              (:constructor make-block)
 149              (:predicate block-p)
 150              (:print-object generic-printer))
 151   (id (generate-id 'basic-block))
 152   ;; List of successors and predecessors of this basic block. They are
 153   ;; null only for deleted blocks and component's entry and exit.
 154   succ pred
 155   ;; The sentinel nodes of the sequence.
 156   entry exit
 157   ;; The component where the basic block belongs to.
 158   component
 159   ;; A bit-vector representating the set of dominators. See the
 160   ;; function `compute-dominators' to know how to use it properly.
 161   dominators%
 162   ;; Arbitrary data which could be necessary to keep during IR
 163   ;; processing.
 164   data)
 165
 166 ;;; Sentinel nodes in the control flow graph of basic blocks.
 167 (defstruct (component-entry (:include basic-block)))
 168 (defstruct (component-exit (:include basic-block)))
 169
 170 ;;; Return T if B is an empty basic block and NIL otherwise.
 171 (defun empty-block-p (b)
 172   (block-exit-p (node-next (block-entry b))))
 173
 174 (defun boundary-block-p (block)
 175   (or (component-entry-p block)
 176       (component-exit-p block)))
 177
 178 ;;; Iterate across the nodes in a basic block forward.
 179 (defmacro do-nodes
 180     ((node block &optional result &key include-sentinel-p) &body body)
 181   `(do ((,node ,(if include-sentinel-p
 182                     `(block-entry ,block)
 183                     `(node-next (block-entry ,block)))
 184                (node-next ,node)))
 185        (,(if include-sentinel-p
 186              `(null ,node)
 187              `(block-exit-p ,node))
 188         ,result)
 189      ,@body))
 190
 191 ;;; Iterate across the nodes in a basic block backward.
 192 (defmacro do-nodes-backward
 193     ((node block &optional result &key include-sentinel-p) &body body)
 194   `(do ((,node ,(if include-sentinel-p
 195                     `(block-exit ,block)
 196                     `(node-prev (block-entry ,block)))
 197                (node-prev ,node)))
 198        (,(if include-sentinel-p
 199              `(null ,node)
 200              `(block-entry-p ,node))
 201         ,result)
 202      ,@body))
 203
 204 ;;; Link FROM and TO nodes together. FROM and TO must belong to the
 205 ;;; same basic block and appear in such order. The nodes between FROM
 206 ;;; and TO are discarded.
 207 (defun link-nodes (from to)
 208   (setf (node-next from) to
 209         (node-prev to) from)
 210   (values))
 211
 212
 213 ;;; Components are connected pieces of the control flow graph of
 214 ;;; basic blocks with some additional information. Components have
 215 ;;; well-defined entry and exit nodes. It is the toplevel
 216 ;;; organizational entity in the compiler. The IR translation result
 217 ;;; is accumulated into components incrementally.
 218 (defstruct (component (:print-object generic-printer))
 219   (id (generate-id 'component))
 220   name
 221   entry
 222   exit
 223   functions
 224   blocks)
 225
 226 ;;; The current component.
 227 (defvar *component*)
 228
 229 ;;; Create a new fresh empty basic block in the current component.
 230 (defun make-empty-block ()
 231   (let ((entry (make-block-entry))
 232         (exit (make-block-exit)))
 233     (link-nodes entry exit)
 234     (let ((block (make-block :entry entry :exit exit :component *component*)))
 235       (push block (component-blocks *component*))
 236       block)))
 237
 238 ;;; Create a new component with an empty basic block, ready to start
 239 ;;; conversion to IR. It returns the component and the basic block as
 240 ;;; multiple values.
 241 (defun make-empty-component (&optional name)
 242   (let ((*component* (make-component :name name)))
 243     (let ((entry (make-component-entry :component *component*))
 244           (exit (make-component-exit :component *component*))
 245           (block (make-empty-block)))
 246       (setf (block-succ entry) (list block)
 247             (block-pred exit)  (list block)
 248             (block-succ block) (list exit)
 249             (block-pred block) (list entry)
 250             (component-entry *component*) entry
 251             (component-exit  *component*) exit)
 252       (values *component* block))))
 253
 254 ;;; A few consistency checks in the IR useful for catching bugs.
 255 (defun check-ir-consistency (&optional (component *component*))
 256   (with-simple-restart (continue "Continue execution")
 257     (dolist (block (component-blocks component))
 258       (dolist (succ (block-succ block))
 259         (unless (find block (block-pred succ))
 260           (error "The block `~S' does not belong to the predecessors list of the its successor `~S'"
 261                  block succ))
 262         (unless (or (boundary-block-p succ) (find succ (component-blocks component)))
 263           (error "Block `~S' is reachable from its predecessor `~S' but it is not in the component `~S'"
 264                  succ block component)))
 265       (dolist (pred (block-pred block))
 266         (unless (find block (block-succ pred))
 267           (error "The block `~S' does not belong to the successors' list of its predecessor `~S'"
 268                  block pred))
 269         (unless (or (boundary-block-p pred) (find pred (component-blocks component)))
 270           (error "Block `~S' is reachable from its sucessor `~S' but it is not in the component `~S'"
 271                  pred block component))))))
 272
 273 ;;; Prepare a new component with a current empty block ready to start
 274 ;;; IR conversion bound in the current cursor. BODY is evaluated and
 275 ;;; the value of the last form is returned.
 276 (defmacro with-component-compilation ((&optional name) &body body)
 277   (with-gensyms (block)
 278     `(multiple-value-bind (*component* ,block)
 279          (make-empty-component ,name)
 280        (let ((*cursor* (cursor :block ,block)))
 281          ,@body))))
 282
 283 ;;; Call function for each reachable block in component in
 284 ;;; post-order. The consequences are unspecified if a block is
 285 ;;; FUNCTION modifies a block which has not been processed yet.
 286 (defun map-postorder-blocks (function component)
 287   (let ((seen nil))
 288     (labels ((compute-from (block)
 289                (unless (or (component-exit-p block) (find block seen))
 290                  (push block seen)
 291                  (dolist (successor (block-succ block))
 292                    (unless (component-exit-p block)
 293                      (compute-from successor)))
 294                  (funcall function block))))
 295       (compute-from (unlist (block-succ (component-entry component))))
 296       nil)))
 297
 298 ;;; Change all the predecessors of BLOCK to precede NEW-BLOCK
 299 ;;; instead. As consequence, BLOCK becomes unreachable.
 300 (defun replace-block (block new-block)
 301   (let ((predecessors (block-pred block)))
 302     (setf (block-pred block) nil)
 303     (dolist (pred predecessors)
 304       (pushnew pred (block-pred new-block))
 305       (setf (block-succ pred) (substitute new-block block (block-succ pred)))
 306       (unless (component-entry-p pred)
 307         (let ((last-node (node-prev (block-exit pred))))
 308           (when (conditional-p last-node)
 309             (macrolet ((replacef (place)
 310                          `(setf ,place (if (eq block ,place) new-block ,place))))
 311               (replacef (conditional-consequent last-node))
 312               (replacef (conditional-alternative last-node)))))))))
 313
 314 (defun delete-block (block)
 315   (when (boundary-block-p block)
 316     (error "Cannot delete entry or exit basic blocks."))
 317   (unless (singlep (block-succ block))
 318     (error "Cannot delete a basic block with multiple successors."))
 319   (let ((successor (unlist (block-succ block))))
 320     (replace-block block successor)
 321     ;; At this point, block is unreachable, however we could have
 322     ;; backreferences to it from its successors. Let's get rid of
 323     ;; them.
 324     (setf (block-pred successor) (remove block (block-pred successor)))
 325     (setf (block-succ block) nil)))
 326
 327
 328 ;;;; Cursors
 329 ;;;;
 330 ;;;; A cursor is a point between two nodes in some basic block in the
 331 ;;;; IR representation where manipulations can take place, similarly
 332 ;;;; to the cursors in text editing.
 333 ;;;;
 334 ;;;; Cursors cannot point to special component's entry and exit basic
 335 ;;;; blocks or after a conditional node. Conveniently, the `cursor'
 336 ;;;; function will signal an error if the cursor is not positioned
 337 ;;;; correctly, so the rest of the code does not need to check once
 338 ;;;; and again.
 339
 340 (defstruct cursor
 341   block next)
 342
 343 ;;; The current cursor. It is the default cursor for many functions
 344 ;;; which work on cursors.
 345 (defvar *cursor*)
 346
 347 ;;; Return the current basic block. It is to say, the basic block
 348 ;;; where the current cursor is pointint.
 349 (defun current-block ()
 350   (cursor-block *cursor*))
 351
 352 ;;; Create a cursor which points to the basic block BLOCK. If omitted,
 353 ;;; then the current block is used.
 354 ;;;
 355 ;;; The keywords AFTER and BEFORE specify the cursor will point after (or
 356 ;;; before) that node respectively. If none is specified, the cursor is
 357 ;;; created before the exit node in BLOCK. An error is signaled if both
 358 ;;; keywords are specified inconsistently, or if the nodes do not belong
 359 ;;; to BLOCK.
 360 ;;;
 361 ;;; AFTER and BEFORE could also be the special values :ENTRY and :EXIT,
 362 ;;; which stand for the entry and exit nodes of the block respectively.
 363 (defun cursor (&key (block (current-block))
 364                  (before nil before-p)
 365                  (after nil after-p))
 366   (when (boundary-block-p block)
 367     (error "Invalid cursor on special entry/exit basic block."))
 368   ;; Handle special values :ENTRY and :EXIT.
 369   (flet ((node-designator (x)
 370            (case x
 371              (:entry (block-entry block))
 372              (:exit  (block-exit block))
 373              (t x))))
 374     (setq before (node-designator before))
 375     (setq after  (node-designator after)))
 376   (let* ((next (or before (and after (node-next after)) (block-exit block)))
 377          (cursor (make-cursor :block block :next next)))
 378     (flet ((out-of-range-cursor ()
 379              (error "Out of range cursor."))
 380            (ambiguous-cursor ()
 381              (error "Ambiguous cursor specified between two non-adjacent nodes.")))
 382       (when (conditional-p (node-prev next))
 383         (error "Invalid cursor after conditional node."))
 384       (when (or (null next) (block-entry-p next))
 385         (out-of-range-cursor))
 386       (when (and before-p after-p (not (eq after before)))
 387         (ambiguous-cursor))
 388       (do-nodes-backward (node block (out-of-range-cursor) :include-sentinel-p t)
 389         (when (eq next node) (return))))
 390     cursor))
 391
 392 ;;; Accept a cursor specification just as described in `cursor'
 393 ;;; describing a position in the IR and modify destructively the
 394 ;;; current cursor to point there.
 395 (defun set-cursor (&rest cursor-spec)
 396   (let ((newcursor (apply #'cursor cursor-spec)))
 397     (setf (cursor-block *cursor*) (cursor-block newcursor))
 398     (setf (cursor-next *cursor*) (cursor-next newcursor))
 399     *cursor*))
 400
 401 ;;; Insert NODE at cursor.
 402 (defun insert-node (node &optional (cursor *cursor*))
 403   (link-nodes (node-prev (cursor-next cursor)) node)
 404   (link-nodes node (cursor-next cursor))
 405   t)
 406
 407 ;;; Split the block at CURSOR. The cursor will point to the end of the
 408 ;;; first basic block. Return the three basic blocks as multiple
 409 ;;; values.
 410 (defun split-block (&optional (cursor *cursor*))
 411   ;; <aaaaa|zzzzz>  ==>  <aaaaa|>--<zzzzz>
 412   (let* ((block (cursor-block cursor))
 413          (newexit (make-block-exit))
 414          (newentry (make-block-entry))
 415          (exit (block-exit block))
 416          (newblock (make-block :entry newentry
 417                                :exit exit
 418                                :pred (list block)
 419                                :succ (block-succ block)
 420                                :component *component*)))
 421     (insert-node newexit)
 422     (insert-node newentry)
 423     (setf (node-next newexit)  nil)
 424     (setf (node-prev newentry) nil)
 425     (setf (block-exit block) newexit)
 426     (setf (block-succ block) (list newblock))
 427     (dolist (succ (block-succ newblock))
 428       (setf (block-pred succ) (substitute newblock block (block-pred succ))))
 429     (set-cursor :block block :before newexit)
 430     (push newblock (component-blocks *component*))
 431     newblock))
 432
 433 ;;; Split the block at CURSOR if it is in the middle of it. The cursor
 434 ;;; will point to the end of the first basic block. Return the three
 435 ;;; basic blocks as multiple values.
 436 (defun maybe-split-block (&optional (cursor *cursor*))
 437   ;; If we are converting IR into the end of the basic block, it's
 438   ;; fine, we don't need to do anything.
 439   (unless (block-exit-p (cursor-next cursor))
 440     (split-block cursor)))
 441
 442
 443 ;;;; Lexical environment
 444 ;;;;
 445 ;;;; It keeps an association between names and the IR entities. It is
 446 ;;;; used to guide the translation from the Lisp source code to the
 447 ;;;; intermediate representation.
 448
 449 (defstruct binding
 450   name namespace type value)
 451
 452 (defvar *lexenv* nil)
 453
 454 (defun find-binding (name namespace)
 455   (find-if (lambda (b)
 456              (and (eq (binding-name b) name)
 457                   (eq (binding-namespace b) namespace)))
 458            *lexenv*))
 459
 460 (defun push-binding (name namespace value &optional type)
 461   (push (make-binding :name name
 462                       :namespace namespace
 463                       :type type
 464                       :value value)
 465         *lexenv*))
 466
 467
 468 ;;;; IR Translation
 469 ;;;;
 470 ;;;; This code covers the translation from Lisp source code to the
 471 ;;;; intermediate representation. The main entry point function to do
 472 ;;;; that is the `ir-convert' function, which dispatches to IR
 473 ;;;; translators. This function ss intended to do the initial
 474 ;;;; conversion as well as insert new IR code during optimizations.
 475
 476 ;;; A alist of IR translator functions.
 477 (defvar *ir-translator* nil)
 478
 479 ;;; Define a IR translator for NAME. LAMBDA-LIST is used to
 480 ;;; destructure the arguments of the form. Calling the local function
 481 ;;; `result-lvar' you can get the LVAR where the compilation of the
 482 ;;; expression should store the result of the evaluation.
 483 ;;;
 484 ;;; The cursor is granted to be at the end of a basic block with a
 485 ;;; unique successor, and so it should be when the translator returns.
 486 (defmacro define-ir-translator (name lambda-list &body body)
 487   (check-type name symbol)
 488   (let ((fname (intern (format nil "IR-CONVERT-~a" (string name)))))
 489     (with-gensyms (result form)
 490       `(progn
 491          (defun ,fname (,form ,result)
 492            (flet ((result-lvar () ,result))
 493              (declare (ignorable (function result-lvar)))
 494              (destructuring-bind ,lambda-list ,form
 495                ,@body)))
 496          (push (cons ',name #',fname) *ir-translator*)))))
 497
 498 ;;; Return the unique successor of the current block. If it is not
 499 ;;; unique signal an error.
 500 (defun next-block ()
 501   (unlist (block-succ (current-block))))
 502
 503 ;;; Set the next block of the current one.
 504 (defun (setf next-block) (new-value)
 505   (let ((block (current-block)))
 506     (dolist (succ (block-succ block))
 507       (setf (block-pred succ) (remove block (block-pred succ))))
 508     (setf (block-succ block) (list new-value))
 509     (push block (block-pred new-value))
 510     new-value))
 511
 512 (defun ir-convert-constant (form result)
 513   (let* ((leaf (make-constant :value form)))
 514     (insert-node (make-ref :leaf leaf :lvar result))))
 515
 516 (define-ir-translator quote (form)
 517   (ir-convert-constant form (result-lvar)))
 518
 519 (define-ir-translator setq (variable value)
 520   (let ((b (find-binding variable 'variable)))
 521     (cond
 522       (b
 523        (let ((var (make-var :name variable))
 524              (value-lvar (make-lvar)))
 525          (ir-convert value value-lvar)
 526          (let ((assign (make-assignment :variable var :value value-lvar :lvar (result-lvar))))
 527            (insert-node assign))))
 528       (t
 529        (ir-convert `(set ',variable ,value) (result-lvar))))))
 530
 531 (define-ir-translator progn (&body body)
 532   (mapc #'ir-convert (butlast body))
 533   (ir-convert (car (last body)) (result-lvar)))
 534
 535 (define-ir-translator if (test then &optional else)
 536   ;; It is the schema of how the basic blocks will look like
 537   ;;
 538   ;;              / ..then.. \
 539   ;;  <aaaaXX> --<            >-- <|> -- <zzzz>
 540   ;;              \ ..else.. /
 541   ;;
 542   ;; Note that is important to leave the cursor in an empty basic
 543   ;; block, as zzz could be the exit basic block of the component,
 544   ;; which is an invalid position for a cursor.
 545   (let ((test-lvar (make-lvar))
 546         (then-block (make-empty-block))
 547         (else-block (make-empty-block))
 548         (join-block (make-empty-block)))
 549     (ir-convert test test-lvar)
 550     (insert-node (make-conditional :test test-lvar :consequent then-block :alternative else-block))
 551     (let* ((block (current-block))
 552            (tail-block (next-block)))
 553       ;; Link together the different created basic blocks.
 554       (setf (block-succ block)      (list else-block then-block)
 555             (block-pred else-block) (list block)
 556             (block-pred then-block) (list block)
 557             (block-succ then-block) (list join-block)
 558             (block-succ else-block) (list join-block)
 559             (block-pred join-block) (list else-block then-block)
 560             (block-succ join-block) (list tail-block)
 561             (block-pred tail-block) (substitute join-block block (block-pred tail-block))))
 562     ;; Convert he consequent and alternative forms and update cursor.
 563     (ir-convert then (result-lvar) (cursor :block then-block))
 564     (ir-convert else (result-lvar) (cursor :block else-block))
 565     (set-cursor :block join-block)))
 566
 567 (define-ir-translator block (name &body body)
 568   (let ((new (split-block)))
 569     (push-binding name 'block (cons (next-block) (result-lvar)))
 570     (ir-convert `(progn ,@body) (result-lvar))
 571     (set-cursor :block new)))
 572
 573 (define-ir-translator return-from (name &optional value)
 574   (let ((binding
 575          (or (find-binding name 'block)
 576              (error "Tried to return from unknown block `~S' name" name))))
 577     (destructuring-bind (jump-block . lvar)
 578         (binding-value binding)
 579       (ir-convert value lvar)
 580       (setf (next-block) jump-block)
 581       ;; This block is really unreachable, even if the following code
 582       ;; is labelled in a tagbody, as tagbody will create a new block
 583       ;; for each label. However, we have to leave the cursor
 584       ;; somewhere to convert new input.
 585       (let ((dummy (make-empty-block)))
 586         (set-cursor :block dummy)))))
 587
 588 (define-ir-translator tagbody (&rest statements)
 589   (flet ((go-tag-p (x)
 590            (or (integerp x) (symbolp x))))
 591     (let* ((tags (remove-if-not #'go-tag-p statements))
 592            (tag-blocks nil))
 593       ;; Create a chain of basic blocks for the tags, recording each
 594       ;; block in a alist in TAG-BLOCKS.
 595       (let ((*cursor* *cursor*))
 596         (dolist (tag tags)
 597           (setq *cursor* (cursor :block (split-block)))
 598           (push-binding tag 'tag (current-block))
 599           (if (assoc tag tag-blocks)
 600               (error "Duplicated tag `~S' in tagbody." tag)
 601               (push (cons tag (current-block)) tag-blocks))))
 602       ;; Convert the statements into the correct block.
 603       (dolist (stmt statements)
 604         (if (go-tag-p stmt)
 605             (set-cursor :block (cdr (assoc stmt tag-blocks)))
 606             (ir-convert stmt))))))
 607
 608 (define-ir-translator go (label)
 609   (let ((tag-binding
 610          (or (find-binding label 'tag)
 611              (error "Unable to jump to the label `~S'" label))))
 612     (setf (next-block) (binding-value tag-binding))
 613     ;; Unreachable block.
 614     (let ((dummy (make-empty-block)))
 615       (set-cursor :block dummy))))
 616
 617
 618 (defun ir-convert-functoid (result name arguments &rest body)
 619   (let ((component)
 620         (return-lvar (make-lvar)))
 621     (with-component-compilation (name)
 622       (ir-convert `(progn ,@body) return-lvar)
 623       (ir-normalize)
 624       (setq component *component*))
 625     (let ((functional
 626            (make-functional
 627             :name name
 628             :arguments arguments
 629             :component component
 630             :return-lvar return-lvar)))
 631       (push functional (component-functions *component*))
 632       (insert-node (make-ref :leaf functional :lvar result)))))
 633
 634 (define-ir-translator function (name)
 635   (if (atom name)
 636       (ir-convert `(symbol-function ,name) (result-lvar))
 637       (ecase (car name)
 638         ((lambda named-lambda)
 639          (let ((desc (cdr name)))
 640            (when (eq 'lambda (car name))
 641              (push nil desc))
 642            (apply #'ir-convert-functoid (result-lvar) desc)))
 643         (setf))))
 644
 645 (defun ir-convert-var (form result)
 646   (let ((binds (find-binding form 'variable)))
 647     (if binds
 648         (insert-node (make-ref :leaf (binding-value binds) :lvar result))
 649         (ir-convert `(symbol-value ',form) result))))
 650
 651 (defun ir-convert-call (form result)
 652   (destructuring-bind (function &rest args) form
 653     (let ((func-lvar (make-lvar))
 654           (args-lvars nil))
 655       ;; Argument list
 656       (dolist (arg args)
 657         (let ((arg-lvar (make-lvar)))
 658           (push arg-lvar args-lvars)
 659           (ir-convert arg arg-lvar)))
 660       (setq args-lvars (reverse args-lvars))
 661       ;; Funcall
 662       (if (find-primitive function)
 663           (insert-node (make-primitive-call
 664                         :function (find-primitive function)
 665                         :arguments args-lvars
 666                         :lvar result))
 667           (progn
 668             (ir-convert `(symbol-function ,function) func-lvar)
 669             (insert-node (make-call :function func-lvar
 670                                     :arguments args-lvars
 671                                     :lvar result)))))))
 672
 673 ;;; Convert the Lisp expression FORM, it may create new basic
 674 ;;; blocks. RESULT is the lvar representing the result of the
 675 ;;; computation or null if the value should be discarded. The IR is
 676 ;;; inserted at *CURSOR*.
 677 (defun ir-convert (form &optional result (*cursor* *cursor*))
 678   ;; Rebinding the lexical environment here we make sure that the
 679   ;; lexical information introduced by FORM is just available for
 680   ;; subforms.
 681   (let ((*lexenv* *lexenv*))
 682     ;; Possibly create additional blocks in order to make sure the
 683     ;; cursor is at end the end of a basic block.
 684     (maybe-split-block)
 685     (cond
 686       ((atom form)
 687        (cond
 688          ((symbolp form)
 689           (ir-convert-var form result))
 690          (t
 691           (ir-convert-constant form result))))
 692       (t
 693        (destructuring-bind (op &rest args) form
 694          (let ((translator (cdr (assoc op *ir-translator*))))
 695            (if translator
 696                (funcall translator args result)
 697                (ir-convert-call form result))))))
 698     (values)))
 699
 700
 701 ;;;; IR Normalization
 702 ;;;;
 703 ;;;; IR as generated by `ir-convert' or after some transformations is
 704 ;;;; not appropiated. Here, we remove unreachable and empty blocks and
 705 ;;;; coallesce blocks when it is possible.
 706
 707 ;;; Try to coalesce BLOCK with the successor if it is unique and block
 708 ;;; is its unique predecessor.
 709 (defun maybe-coalesce-block (block)
 710   (when (singlep (block-succ block))
 711     (let ((succ (first (block-succ block))))
 712       (when (and (not (component-exit-p succ)) (singlep (block-pred succ)))
 713         (link-nodes (node-prev (block-exit block))
 714                     (node-next (block-entry succ)))
 715         (setf (block-exit block) (block-exit succ))
 716         (setf (block-succ block) (block-succ succ))
 717         (dolist (next (block-succ succ))
 718           (setf (block-pred next) (substitute block succ (block-pred next))))
 719         (setf (block-succ succ) nil
 720               (block-pred succ) nil)
 721         t))))
 722
 723 ;;; Normalize a component. This function must be called after a batch
 724 ;;; of modifications to the flowgraph of the component to make sure it
 725 ;;; is a valid input for the possible optimizations and the backend.
 726 (defun ir-normalize (&optional (component *component*))
 727   ;; Initialize blocks as unreachables and remove empty basic blocks.
 728   (dolist (block (component-blocks component))
 729     (setf (block-data block) 'unreachable))
 730   ;; Coalesce and mark blocks as reachable.
 731   (map-postorder-blocks
 732    (lambda (block)
 733      (maybe-coalesce-block block)
 734      (setf (block-data block) 'reachable))
 735    component)
 736   (let ((block-list nil))
 737     (dolist (block (component-blocks component))
 738       (cond
 739         ;; If the block is unreachable, but it is predeces a reachable
 740         ;; one, then break the link between them. So we discard it
 741         ;; from the flowgraph.
 742         ((eq (block-data block) 'unreachable)
 743          (setf (block-succ block) nil)
 744          (dolist (succ (block-succ block))
 745            (when (eq (block-data succ) 'reachable)
 746              (remove block (block-pred succ)))))
 747         ;; Delete empty blocks
 748         ((empty-block-p block)
 749          (delete-block block))
 750         ;; The rest of blocks remain in the component.
 751         (t
 752          (push block block-list))))
 753     (setf (component-blocks component) block-list))
 754   (check-ir-consistency))
 755
 756
 757 ;;;; IR Analysis
 758 ;;;;
 759 ;;;; Once IR conversion has been finished. We do some analysis of the
 760 ;;;; component to produce information which is useful for both
 761 ;;;; optimizations and code generation. Indeed, we provide some
 762 ;;;; abstractions to use this information.
 763
 764 (defun compute-reverse-post-order (component)
 765   (let ((output nil))
 766     (flet ((add-block-to-list (block)
 767              (push block output)))
 768       (map-postorder-blocks #'add-block-to-list component))
 769     (setf (component-blocks component) output)))
 770
 771 ;;; Iterate across blocks in COMPONENT in reverse post order.
 772 (defmacro do-blocks-forward ((block component &optional result) &body body)
 773   `(dolist (,block (component-blocks ,component) ,result)
 774      ,@body))
 775 ;;; Iterate across blocks in COMPONENT in post order.
 776 (defmacro do-blocks-backward ((block component &optional result) &body body)
 777   `(dolist (,block (reverse (component-blocks ,component)) ,result)
 778      ,@body))
 779
 780
 781 (defun compute-dominators (component)
 782   ;; Initialize the dominators of the entry to the component to be
 783   ;; empty and the power set of the set of blocks for proper basic
 784   ;; blocks in the component.
 785   (let ((n (length (component-blocks component))))
 786     ;; The component entry special block has not predecessors in the
 787     ;; set of (proper) basic blocks.
 788     (setf (block-dominators% (component-entry component))
 789           (make-array n :element-type 'bit :initial-element 0))
 790     (dolist (block (component-blocks component))
 791       (setf (block-dominators% block) (make-array n :element-type 'bit :initial-element 1))))
 792   ;; Iterate across the blocks in the component removing non domintors
 793   ;; until it reaches a fixed point.
 794   (do ((i 0 0)
 795        (iteration 0 (1+ iteration))
 796        (changes t))
 797       ((not changes))
 798     (setf changes nil)
 799     (do-blocks-forward (block component)
 800       (let ((new (reduce #'bit-and (mapcar #'block-dominators% (block-pred block)))))
 801         (format t "Dominators for ~a is ~S~%" (block-id block) new)
 802         (setf (aref new i) 1)
 803         (setf changes (or changes (not (equal new (block-dominators% block)))))
 804         (setf (block-dominators% block) new)
 805         (incf i)))))
 806
 807
 808 ;;;; IR Debugging
 809 ;;;;
 810 ;;;; This section provides a function `/print' which write a textual
 811 ;;;; representation of a component to the standard output. Also, a
 812 ;;;; `/ir' macro is provided, which takes a form, convert it to IR and
 813 ;;;; then print the component as above.  They are useful commands if
 814 ;;;; you are hacking the front-end of the compiler.
 815 ;;;;
 816
 817 (defun format-block-name (block)
 818   (cond
 819     ((eq block (unlist (block-succ (component-entry (block-component block)))))
 820      (format nil "ENTRY-~a" (component-id (block-component block))))
 821     ((component-exit-p block)
 822      (format nil "EXIT-~a" (component-id (block-component block))))
 823     (t
 824      (format nil "BLOCK ~a" (block-id block)))))
 825
 826
 827 (defun print-node (node)
 828   (when (node-lvar node)
 829     (format t "$~a = " (lvar-id (node-lvar node))))
 830   (cond
 831     ((ref-p node)
 832      (let ((leaf (ref-leaf node)))
 833        (cond
 834          ((var-p leaf)
 835           (format t "~a" (var-name leaf)))
 836          ((constant-p leaf)
 837           (format t "'~s" (constant-value leaf)))
 838          ((functional-p leaf)
 839           (format t "#<function ~a>" (functional-name leaf))))))
 840     ((assignment-p node)
 841      (format t "set ~a $~a"
 842              (var-name (assignment-variable node))
 843              (lvar-id (assignment-value node))))
 844     ((primitive-call-p node)
 845      (format t "primitive ~a" (primitive-name (primitive-call-function node)))
 846      (dolist (arg (primitive-call-arguments node))
 847        (format t " $~a" (lvar-id arg))))
 848     ((call-p node)
 849      (format t "call $~a" (lvar-id (call-function node)))
 850      (dolist (arg (call-arguments node))
 851        (format t " $~a" (lvar-id arg))))
 852     ((conditional-p node)
 853      (format t "if $~a then ~a else ~a~%"
 854              (lvar-id (conditional-test node))
 855              (format-block-name (conditional-consequent node))
 856              (format-block-name (conditional-alternative node))))
 857     (t
 858      (error "`print-node' does not support printing ~S as a node." node)))
 859   (terpri))
 860
 861 (defun print-block (block)
 862   (write-line (format-block-name block))
 863   (do-nodes (node block)
 864     (print-node node))
 865   (when (singlep (block-succ block))
 866     (format t "GO ~a~%~%" (format-block-name (unlist (block-succ block))))))
 867
 868 (defun /print (component &optional (stream *standard-output*))
 869   (format t ";;; COMPONENT ~a (~a) ~%~%" (component-name component) (component-id component))
 870   (let ((*standard-output* stream))
 871     (do-blocks-forward (block component)
 872       (print-block block)))
 873   (format t ";;; END COMPONENT ~a ~%~%" (component-name component))
 874   (let ((*standard-output* stream))
 875     (dolist (func (component-functions component))
 876       (/print (functional-component func)))))
 877
 878 ;;; Translate FORM into IR and print a textual repreresentation of the
 879 ;;; component.
 880 (defun convert-toplevel-and-print (form)
 881   (let ((*counter-alist* nil))
 882     (with-component-compilation ('toplevel)
 883       (ir-convert form (make-lvar :id "out"))
 884       (ir-normalize)
 885       (compute-reverse-post-order *component*)
 886       (/print *component*)
 887       *component*)))
 888
 889 (defmacro /ir (form)
 890   `(convert-toplevel-and-print ',form))
 891
 892
 893
 894 ;;;; Primitives
 895 ;;;;
 896 ;;;; Primitive functions are a set of functions provided by the
 897 ;;;; compiler. They cannot usually be written in terms of other
 898 ;;;; functions. When the compiler tries to compile a function call, it
 899 ;;;; looks for a primitive function firstly, and if it is found and
 900 ;;;; the declarations allow it, a primitive call is inserted in the
 901 ;;;; IR. The back-end of the compiler knows how to compile primitive
 902 ;;;; calls.
 903 ;;;;
 904
 905 (defvar *primitive-function-table* nil)
 906
 907 (defstruct primitive
 908   name)
 909
 910 (defmacro define-primitive (name args &body body)
 911   (declare (ignore args body))
 912   `(push (make-primitive :name ',name)
 913          *primitive-function-table*))
 914
 915 (defun find-primitive (name)
 916   (find name *primitive-function-table* :key #'primitive-name))
 917
 918 (define-primitive symbol-function (symbol))
 919 (define-primitive symbol-value (symbol))
 920 (define-primitive set (symbol value))
 921 (define-primitive fset (symbol value))
 922
 923 (define-primitive + (&rest numbers))
 924 (define-primitive - (number &rest other-numbers))
 925
 926 (define-primitive consp (x))
 927 (define-primitive cons (x y))
 928 (define-primitive car (x))
 929 (define-primitive cdr (x))
 930
 931
 932 ;;; compiler.lisp ends here