3 ;; Copyright (C) 2013 David Vazquez
5 ;; JSCL is free software: you can redistribute it and/or
6 ;; modify it under the terms of the GNU General Public License as
7 ;; published by the Free Software Foundation, either version 3 of the
8 ;; License, or (at your option) any later version.
10 ;; JSCL is distributed in the hope that it will be useful, but
11 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with JSCL. If not, see <http://www.gnu.org/licenses/>.
25 ;;;; Random Common Lisp code useful to use here and there.
27 (defmacro with-gensyms ((&rest vars) &body body)
28 `(let ,(mapcar (lambda (var) `(,var (gensym ,(string var)))) vars)
32 (and (consp x) (null (cdr x))))
39 ;;;; Intermediate representation structures
41 ;;;; This intermediate representation (IR) is a simplified version of
42 ;;;; the first intermediate representation what you will find if you
43 ;;;; have a look to the source code of SBCL. Some terminology is also
44 ;;;; used, but other is changed, so be careful if you assume you know
45 ;;;; what it is because you know the name.
47 ;;;; Computations are represented by `node'. Nodes are grouped
48 ;;;; sequencially into `basic-block'. It is a plain representation
49 ;;;; rather than a nested one. Computations take data and produce a
50 ;;;; value. Both data transfer are represented by `lvar'.
54 ;;; A (lexical) variable. Special variables has not a special
55 ;;; representation in the IR. They are handled by the primitive
56 ;;; functions `%symbol-function' and `%symbol-value'.
57 (defstruct (var (:include leaf))
58 ;; The symbol which names this variable in the source code.
61 ;;; A literal Lisp object. It usually comes from a quoted expression.
62 (defstruct (constant (:include leaf))
66 ;;; A lambda expression. Why do we name it `functional'? Well,
67 ;;; function is reserved by the ANSI, isn't it?
68 (defstruct (functional (:include leaf))
69 ;; The symbol which names this function in the source code or null
70 ;; if we do not know or it is an anonymous function.
76 ;;; An abstract place where the result of a computation is stored and
77 ;;; it can be referenced from other nodes, so lvars are responsible
78 ;;; for keeping the necessary information of the nested structure of
79 ;;; the code in this plain representation.
83 ;;; A base structure for every single computation. Most of the
84 ;;; computations are valued.
86 ;; The next and the prev slots are the next nodes and the previous
87 ;; node in the basic block sequence respectively.
89 ;; Lvar which stands for the result of the computation of this node.
92 ;;; Sentinel nodes in the basic block sequence of nodes.
93 (defstruct (block-entry (:include node)))
94 (defstruct (block-exit (:include node)))
96 ;;; A reference to a leaf (variable, constant and functions). The
97 ;;; meaning of this node is leaving the leaf into the lvar of the
99 (defstruct (ref (:include node))
102 ;;; An assignation of the LVAR VALUE into the var VARIABLE.
103 (defstruct (assignment (:include node))
107 ;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS.
108 (defstruct (call (:include node))
112 ;;; A conditional branch. If the LVAR is not NIL, then we will jump to
113 ;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By
114 ;;; definition, a conditional must appear at the end of a basic block.
115 (defstruct (conditional (:include node))
121 ;;; Blocks are `basic block`. Basic blocks are organized as a control
122 ;;; flow graph with some more information in omponents.
123 (defstruct (basic-block
124 (:conc-name "BLOCK-")
125 (:constructor make-block)
126 (:predicate block-p))
128 ;; List of successors and predecessors of this basic block.
130 ;; The sentinel nodes of the sequence.
133 ;;; Sentinel nodes in the control flow graph of basic blocks.
134 (defstruct (component-entry (:include basic-block)))
135 (defstruct (component-exit (:include basic-block)))
137 ;;; Return a fresh empty basic block.
138 (defun make-empty-block ()
139 (let ((entry (make-block-entry))
140 (exit (make-block-exit)))
141 (setf (node-next entry) exit
142 (node-prev exit) entry)
143 (make-block :entry entry :exit exit)))
145 ;;; Return T if B is an empty basic block and NIL otherwise.
146 (defun empty-block-p (b)
147 (block-exit-p (node-next (block-entry b))))
149 ;;; Iterate across the nodes in a basic block forward.
151 ((node block &optional result &key include-sentinel-p) &body body)
152 `(do ((,node ,(if include-sentinel-p
153 `(block-entry ,block)
154 `(node-next (block-entry ,block)))
156 (,(if include-sentinel-p
158 `(block-exit-p ,node))
162 ;;; Iterate across the nodes in a basic block backward.
163 (defmacro do-nodes-backward
164 ((node block &optional result &key include-sentinel-p) &body body)
165 `(do ((,node ,(if include-sentinel-p
167 `(node-prev (block-entry ,block)))
169 (,(if include-sentinel-p
171 `(block-entry-p ,node))
175 ;;; Link FROM and TO nodes together. FROM and TO must belong to the
176 ;;; same basic block and appear in such order. The nodes between FROM
177 ;;; and TO are discarded.
178 (defun link-nodes (from to)
179 (setf (node-next from) to
187 ;;;; A cursor is a point between two nodes in some basic block in the
188 ;;;; IR representation where manipulations can take place, similarly
189 ;;;; to the cursors in text editing.
191 ;;;; Cursors cannot point to special component's entry and exit basic
192 ;;;; blocks or after a conditional node. Conveniently, the `cursor'
193 ;;;; function will signal an error if the cursor is not positioned
194 ;;;; correctly, so the rest of the code does not need to check once
200 ;;; The current cursor. It is the default cursor for many functions
201 ;;; which work on cursors.
204 ;;; Return the current basic block. It is to say, the basic block
205 ;;; where the current cursor is pointint.
206 (defun current-block ()
207 (cursor-block *cursor*))
209 ;;; Create a cursor which points to the basic block BLOCK. If omitted,
210 ;;; then the current block is used.
212 ;;; The keywords AFTER and BEFORE specify the cursor will point after (or
213 ;;; before) that node respectively. If none is specified, the cursor is
214 ;;; created before the exit node in BLOCK. An error is signaled if both
215 ;;; keywords are specified inconsistently, or if the nodes do not belong
218 ;;; AFTER and BEFORE could also be the special values :ENTRY and :EXIT,
219 ;;; which stand for the entry and exit nodes of the block respectively.
220 (defun cursor (&key (block (current-block))
221 (before nil before-p)
223 (when (or (component-entry-p block) (component-exit-p block))
224 (error "Invalid cursor on special entry/exit basic block."))
225 ;; Handle special values :ENTRY and :EXIT.
226 (flet ((node-designator (x)
228 (:entry (block-entry block))
229 (:exit (block-exit block))
231 (setq before (node-designator before))
232 (setq after (node-designator after)))
233 (let* ((next (or before (and after (node-next after)) (block-exit block)))
234 (cursor (make-cursor :block block :next next)))
235 (flet ((out-of-range-cursor ()
236 (error "Out of range cursor."))
238 (error "Ambiguous cursor specified between two non-adjacent nodes.")))
239 (when (conditional-p (node-prev next))
240 (error "Invalid cursor after conditional node."))
241 (when (or (null next) (block-entry-p next))
242 (out-of-range-cursor))
243 (when (and before-p after-p (not (eq after before)))
245 (do-nodes-backward (node block (out-of-range-cursor) :include-sentinel-p t)
246 (when (eq next node) (return))))
249 ;;; Accept a cursor specification just as described in `cursor'
250 ;;; describing a position in the IR and modify destructively the
251 ;;; current cursor to point there.
252 (defun set-cursor (&rest cursor-spec)
253 (let ((newcursor (apply #'cursor cursor-spec)))
254 (setf (cursor-block *cursor*) (cursor-block newcursor))
255 (setf (cursor-next *cursor*) (cursor-next newcursor))
258 ;;; Insert NODE at cursor.
259 (defun insert-node (node &optional (cursor *cursor*))
261 (link-nodes (node-prev (cursor-next cursor)) node)
262 (link-nodes node (cursor-next cursor))
265 ;;; Split the block at CURSOR. The cursor will point to the end of the
266 ;;; first basic block. Return the three basic blocks as multiple
268 (defun split-block (&optional (cursor *cursor*))
269 ;; <aaaaa|zzzzz> ==> <aaaaa|>--<zzzzz>
270 (let* ((block (cursor-block cursor))
271 (newexit (make-block-exit))
272 (newentry (make-block-entry))
273 (exit (block-exit block))
274 (newblock (make-block :entry newentry
277 :succ (block-succ block))))
278 (insert-node newexit)
279 (insert-node newentry)
280 (setf (node-next newexit) nil)
281 (setf (node-prev newentry) nil)
282 (setf (block-exit block) newexit)
283 (setf (block-succ block) (list newblock))
284 (dolist (succ (block-succ newblock))
285 (setf (block-pred succ) (substitute newblock block (block-pred succ))))
286 (set-cursor :block block :before newexit)
289 ;;; Split the block at CURSOR if it is in the middle of it. The cursor
290 ;;; will point to the end of the first basic block. Return the three
291 ;;; basic blocks as multiple values.
292 (defun maybe-split-block (&optional (cursor *cursor*))
293 ;; If we are converting IR into the end of the basic block, it's
294 ;; fine, we don't need to do anything.
295 (unless (block-exit-p (cursor-next cursor))
296 (split-block cursor)))
301 ;;;; Components are connected pieces of the control flow graph of
302 ;;;; basic blocks with some additional information. Components have
303 ;;;; well-defined entry and exit nodes. It is the toplevel
304 ;;;; organizational entity in the compiler. The IR translation result
305 ;;;; is accumulated into components incrementally.
306 (defstruct (component #-jscl (:print-object print-component))
310 ;;; Create a new component with an empty basic block, ready to start
311 ;;; conversion to IR. It returns the component and the basic block as
313 (defun make-empty-component ()
314 (let ((entry (make-component-entry))
315 (block (make-empty-block))
316 (exit (make-component-exit)))
317 (setf (block-succ entry) (list block)
318 (block-pred exit) (list block)
319 (block-succ block) (list exit)
320 (block-pred block) (list entry))
321 (values (make-component :entry entry :exit exit) block)))
323 ;;; Return the list of blocks in COMPONENT, conveniently sorted.
324 (defun component-blocks (component)
327 (labels ((compute-rdfo-from (block)
328 (unless (or (component-exit-p block) (find block seen))
330 (dolist (successor (block-succ block))
331 (unless (component-exit-p block)
332 (compute-rdfo-from successor)))
333 (push block output))))
334 (compute-rdfo-from (unlist (block-succ (component-entry component))))
337 ;;; Iterate across different blocks in COMPONENT.
338 (defmacro do-blocks ((block component &optional result) &body body)
339 `(dolist (,block (component-blocks ,component) ,result)
342 (defmacro do-blocks-backward ((block component &optional result) &body body)
343 `(dolist (,block (reverse (component-blocks ,component)) ,result)
347 ;;; A few consistency checks in the IR useful for catching bugs.
348 (defun check-ir-consistency (component)
349 (with-simple-restart (continue "Continue execution")
350 (do-blocks (block component)
351 (dolist (succ (block-succ block))
352 (unless (find block (block-pred succ))
353 (error "The block `~S' does not belong to the predecessors list of the its successor `~S'"
356 (dolist (pred (block-pred block))
357 (unless (find block (block-succ pred))
358 (error "The block `~S' does not belong to the successors' list of its predecessor `~S'"
360 (block-id pred)))))))
363 ;;;; Lexical environment
365 ;;;; It keeps an association between names and the IR entities. It is
366 ;;;; used to guide the translation from the Lisp source code to the
367 ;;;; intermediate representation.
370 name namespace type value)
374 (defun find-binding (name namespace)
376 (and (eq (binding-name b) name)
377 (eq (binding-namespace b) namespace)))
380 (defun push-binding (name namespace value &optional type)
381 (push (make-binding :name name
390 ;;;; This code covers the translation from Lisp source code to the
391 ;;;; intermediate representation. The main entry point function to do
392 ;;;; that is the `ir-convert' function, which dispatches to IR
393 ;;;; translators. This function ss intended to do the initial
394 ;;;; conversion as well as insert new IR code during optimizations.
396 ;;;; The function `ir-complete' will coalesce basic blocks in a
397 ;;;; component to generate proper maximal basic blocks.
399 ;;; The current component. We accumulate the results of the IR
400 ;;; conversion in this component.
403 ;;; A alist of IR translator functions.
404 (defvar *ir-translator* nil)
406 ;;; Define a IR translator for NAME. LAMBDA-LIST is used to
407 ;;; destructure the arguments of the form. Calling the local function
408 ;;; `result-lvar' you can get the LVAR where the compilation of the
409 ;;; expression should store the result of the evaluation.
411 ;;; The cursor is granted to be at the end of a basic block with a
412 ;;; unique successor, and so it should be when the translator returns.
413 (defmacro define-ir-translator (name lambda-list &body body)
414 (check-type name symbol)
415 (let ((fname (intern (format nil "IR-CONVERT-~a" (string name))))
419 (defun ,fname (,form ,result)
420 (flet ((result-lvar () ,result))
421 (declare (ignorable (function result-lvar)))
422 (destructuring-bind ,lambda-list ,form
424 (push (cons ',name #',fname) *ir-translator*))))
426 ;;; Return the unique successor of the current block. If it is not
427 ;;; unique signal an error.
429 (unlist (block-succ (current-block))))
431 ;;; Set the next block of the current one.
432 (defun (setf next-block) (new-value)
433 (let ((block (current-block)))
434 (dolist (succ (block-succ block))
435 (setf (block-pred succ) (remove block (block-pred succ))))
436 (setf (block-succ block) (list new-value))
437 (push block (block-pred new-value))
440 (defun ir-convert-constant (form result)
441 (let* ((leaf (make-constant :value form)))
442 (insert-node (make-ref :leaf leaf :lvar result))))
444 (define-ir-translator quote (form)
445 (ir-convert-constant form (result-lvar)))
447 (define-ir-translator setq (variable value)
448 (let ((var (make-var :name variable))
449 (value-lvar (make-lvar)))
450 (ir-convert value value-lvar)
451 (let ((assign (make-assignment :variable var :value value-lvar :lvar (result-lvar))))
452 (insert-node assign))))
454 (define-ir-translator progn (&body body)
455 (mapc #'ir-convert (butlast body))
456 (ir-convert (car (last body)) (result-lvar)))
458 (define-ir-translator if (test then &optional else)
459 ;; It is the schema of how the basic blocks will look like
462 ;; <aaaaXX> --< >-- <|> -- <zzzz>
465 ;; Note that is important to leave the cursor in an empty basic
466 ;; block, as zzz could be the exit basic block of the component,
467 ;; which is an invalid position for a cursor.
468 (let ((test-lvar (make-lvar))
469 (then-block (make-empty-block))
470 (else-block (make-empty-block))
471 (join-block (make-empty-block)))
472 (ir-convert test test-lvar)
473 (insert-node (make-conditional :test test-lvar :consequent then-block :alternative else-block))
474 (let* ((block (current-block))
475 (tail-block (next-block)))
476 ;; Link together the different created basic blocks.
477 (setf (block-succ block) (list else-block then-block)
478 (block-pred else-block) (list block)
479 (block-pred then-block) (list block)
480 (block-succ then-block) (list join-block)
481 (block-succ else-block) (list join-block)
482 (block-pred join-block) (list else-block then-block)
483 (block-succ join-block) (list tail-block)
484 (block-pred tail-block) (substitute join-block block (block-pred tail-block))))
485 ;; Convert he consequent and alternative forms and update cursor.
486 (ir-convert then (result-lvar) (cursor :block then-block))
487 (ir-convert else (result-lvar) (cursor :block else-block))
488 (set-cursor :block join-block)))
490 (define-ir-translator block (name &body body)
491 (push-binding name 'block (cons (next-block) (result-lvar)))
492 (ir-convert `(progn ,@body) (result-lvar)))
494 (define-ir-translator return-from (name &optional value)
496 (or (find-binding name 'block)
497 (error "Tried to return from unknown block `~S' name" name))))
498 (destructuring-bind (jump-block . lvar)
499 (binding-value binding)
500 (ir-convert value lvar)
501 (setf (next-block) jump-block)
502 ;; This block is really unreachable, even if the following code
503 ;; is labelled in a tagbody, as tagbody will create a new block
504 ;; for each label. However, we have to leave the cursor
505 ;; somewhere to convert new input.
506 (let ((dummy (make-empty-block)))
507 (set-cursor :block dummy)))))
509 (define-ir-translator tagbody (&rest statements)
511 (or (integerp x) (symbolp x))))
512 (let* ((tags (remove-if-not #'go-tag-p statements))
514 ;; Create a chain of basic blocks for the tags, recording each
515 ;; block in a alist in TAG-BLOCKS.
516 (let ((*cursor* *cursor*))
518 (set-cursor :block (split-block))
519 (push-binding tag 'tag (current-block))
520 (if (assoc tag tag-blocks)
521 (error "Duplicated tag `~S' in tagbody." tag)
522 (push (cons tag (current-block)) tag-blocks))))
523 ;; Convert the statements into the correct block.
524 (dolist (stmt statements)
526 (set-cursor :block (cdr (assoc stmt tag-blocks)))
527 (ir-convert stmt))))))
529 (define-ir-translator go (label)
531 (or (find-binding label 'tag)
532 (error "Unable to jump to the label `~S'" label))))
533 (setf (next-block) (binding-value tag-binding))
534 ;; Unreachable block.
535 (let ((dummy (make-empty-block)))
536 (set-cursor :block dummy))))
539 (defun ir-convert-var (form result)
540 (let* ((leaf (make-var :name form)))
541 (insert-node (make-ref :leaf leaf :lvar result))))
543 (defun ir-convert-call (form result)
544 (destructuring-bind (function &rest args) form
545 (let ((func-lvar (make-lvar))
547 (when (symbolp function)
548 (ir-convert `(%symbol-function ,function) func-lvar))
550 (let ((arg-lvar (make-lvar)))
551 (push arg-lvar args-lvars)
552 (ir-convert arg arg-lvar)))
553 (setq args-lvars (reverse args-lvars))
554 (let ((call (make-call :function func-lvar :arguments args-lvars :lvar result)))
555 (insert-node call)))))
557 ;;; Convert the Lisp expression FORM, it may create new basic
558 ;;; blocks. RESULT is the lvar representing the result of the
559 ;;; computation or null if the value should be discarded. The IR is
560 ;;; inserted at *CURSOR*.
561 (defun ir-convert (form &optional result (*cursor* *cursor*))
562 ;; Rebinding the lexical environment here we make sure that the
563 ;; lexical information introduced by FORM is just available for
565 (let ((*lexenv* *lexenv*))
566 ;; Possibly create additional blocks in order to make sure the
567 ;; cursor is at end the end of a basic block.
573 (ir-convert-var form result))
575 (ir-convert-constant form result))))
577 (destructuring-bind (op &rest args) form
578 (let ((translator (cdr (assoc op *ir-translator*))))
580 (funcall translator args result)
581 (ir-convert-call form result))))))
585 ;;; Prepare a new component with a current empty block ready to start
586 ;;; IR conversion bound in the current cursor. BODY is evaluated and
587 ;;; the value of the last form is returned.
588 (defmacro with-component-compilation (&body body)
589 (let ((block (gensym)))
590 `(multiple-value-bind (*component* ,block)
591 (make-empty-component)
592 (let ((*cursor* (cursor :block ,block))
596 ;;; Change all the predecessors of BLOCK to precede NEW-BLOCK instead.
597 (defun replace-block (block new-block)
598 (let ((predecessors (block-pred block)))
599 (setf (block-pred new-block) (union (block-pred new-block) predecessors))
600 (dolist (pred predecessors)
601 (setf (block-succ pred) (substitute new-block block (block-succ pred)))
602 (unless (component-entry-p pred)
603 (let ((last-node (node-prev (block-exit pred))))
604 (when (conditional-p last-node)
605 (macrolet ((replacef (place)
606 `(setf ,place (if (eq block ,place) new-block ,place))))
607 (replacef (conditional-consequent last-node))
608 (replacef (conditional-alternative last-node)))))))))
610 (defun delete-empty-block (block)
611 (when (or (component-entry-p block) (component-exit-p block))
612 (error "Cannot delete entry or exit basic blocks."))
613 (unless (empty-block-p block)
614 (error "Block `~S' is not empty!" (block-id block)))
615 (replace-block block (unlist (block-succ block))))
617 ;;; Try to coalesce BLOCK with the successor if it is unique and block
618 ;;; is its unique predecessor.
619 (defun maybe-coalesce-block (block)
620 (when (singlep (block-succ block))
621 (let ((succ (first (block-succ block))))
622 (when (and (not (component-exit-p succ)) (singlep (block-pred succ)))
623 (link-nodes (node-prev (block-exit block))
624 (node-next (block-entry succ)))
625 (setf (block-succ block) (block-succ succ))
626 (dolist (next (block-succ succ))
627 (setf (block-pred next) (substitute block succ (block-pred next))))
630 (defun ir-complete (&optional (component *component*))
631 (do-blocks-backward (block component)
632 (maybe-coalesce-block block)
633 (when (empty-block-p block)
634 (delete-empty-block block))))
639 (defun print-node (node)
640 (when (node-lvar node)
641 (format t "~a = " (lvar-id (node-lvar node))))
644 (let ((leaf (ref-leaf node)))
647 (format t "~a" (var-name leaf)))
649 (format t "'~s" (constant-value leaf)))
651 (format t "#<function ~a at ~a>"
652 (functional-name leaf)
653 (functional-entry-point leaf))))))
655 (format t "set ~a ~a"
656 (var-name (assignment-variable node))
657 (lvar-id (assignment-value node))))
659 (format t "call ~a" (lvar-id (call-function node)))
660 (dolist (arg (call-arguments node))
661 (format t " ~a" (lvar-id arg))))
662 ((conditional-p node)
663 (format t "if ~a ~a ~a"
664 (lvar-id (conditional-test node))
665 (block-id (conditional-consequent node))
666 (block-id (conditional-alternative node))))
668 (error "`print-node' does not support printing ~S as a node." node)))
671 (defun print-block (block)
672 (flet ((block-name (block)
674 ((and (singlep (block-pred block))
675 (component-entry-p (unlist (block-pred block))))
677 ((component-exit-p block)
679 (t (string (block-id block))))))
680 (format t "BLOCK ~a:~%" (block-name block))
681 (do-nodes (node block)
683 (when (singlep (block-succ block))
684 (format t "GO ~a~%" (block-name (first (block-succ block)))))
687 (defun print-component (component &optional (stream *standard-output*))
688 (let ((*standard-output* stream))
689 (do-blocks (block component)
690 (print-block block))))
692 ;;; Translate FORM into IR and print a textual repreresentation of the
694 (defun describe-ir (form &optional (complete t))
695 (with-component-compilation
696 (ir-convert form (make-lvar :id "$out"))
697 (when complete (ir-complete))
698 (check-ir-consistency *component*)
699 (print-component *component*)))
704 ;;; compiler.lisp ends here