3 ;; copyright (C) 2013 David Vazquez
5 ;; JSCL is free software: you can redistribute it and/or
6 ;; modify it under the terms of the GNU General Public License as
7 ;; published by the Free Software Foundation, either version 3 of the
8 ;; License, or (at your option) any later version.
10 ;; JSCL is distributed in the hope that it will be useful, but
11 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with JSCL. If not, see <http://www.gnu.org/licenses/>.
25 (defmacro with-gensyms ((&rest vars) &body body)
26 `(let ,(mapcar (lambda (var) `(,var (gensym ,(string var)))) vars)
30 (and (consp x) (null (cdr x))))
36 ;;;; Lexical environment
38 ;;;; The Lexical environment comprises a list of bindings, which
39 ;;;; associates information to symbols. It tracks lexical variables,
40 ;;;; tags, local declarations and many other information in order to
41 ;;;; guide the compiler.
44 name type value declarations)
50 ;;;; Intermediate representation
52 ;;;; This intermediate representation (IR) is a simplified version of
53 ;;;; first intermediate representation what you will find if you have
54 ;;;; a you have the source code of SBCL. Some terminology is also
55 ;;;; used, but other is changed, so be careful if you assume you know
56 ;;;; what it is because you know the name.
59 ;;; A leaf stands for a leaf in the tree of computations. Lexical
60 ;;; variables, constants and literal functions are leafs. Leafs are
61 ;;; not nodes itself, a `ref' node will stands for putting a leaf into
62 ;;; a lvar, which can be used in computations.
65 ;;; Reference a lexical variable. Special variables have not a
66 ;;; representation in IR. They are handled via the primitive functions
67 ;;; `%symbol-function' and `%symbol-value'.
68 (defstruct (var (:include leaf))
69 ;; Name is the symbol used to identify this variable in the lexical
73 ;;; A constant value, mostly from a quoted form, but maybe introduced
74 ;;; in some pass of the compiler.
75 (defstruct (constant (:include leaf))
78 ;;; A literal function. Why do we use `functional' as name? Well,
79 ;;; function is taken, isn't it?
80 (defstruct (functional (:include leaf))
81 ;; The symbol which names this function in the source code.
83 ;; A list of lvars which are bound to the argument values in a call
86 ;; LVAR which contains the return values of the function.
88 ;; The basic block which contain the code which be executed firstly
89 ;; when you call this function.
93 ;;; Used to transfer data between the computations in the intermediate
94 ;;; representation. Each node is valued into a LVar. And nodes which
95 ;;; use resulting values from other nodes use such LVar.
99 ;;; A computation node. It represents a simple computation in the
100 ;;; intermediate representation. Nodes are grouped in basic blocks,
101 ;;; which are delimited by the special nodes `block-entry' and
102 ;;; `block-exit'. Resulting value of the node is stored in LVAR, which it
103 ;;; could be null if the value is discarded.
109 ;;; Sentinel nodes. No computation really, but they make easier to
110 ;;; manipulate the doubly linked-list.
111 (defstruct (block-entry (:include node)))
112 (defstruct (block-exit (:include node)))
114 ;;; A reference to a leaf.
115 (defstruct (ref (:include node))
118 ;;; An assignation of the LVAR VALUE into the var VARIABLE.
119 (defstruct (assignment (:include node))
123 ;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS.
124 (defstruct (call (:include node))
128 ;;; A conditional branch. If the LVAR is not NIL, then we will jump to
129 ;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By
130 ;;; definition, a conditional must appear at the end of a basic block.
131 (defstruct (conditional (:include node))
136 ;;; Blocks are `basic block', which is a maximal sequence of nodes
137 ;;; with an entry point and an exit. Basic blocks are organized as a
138 ;;; control flow graph with some more information in omponents.
139 (defstruct (basic-block
140 (:conc-name "BLOCK-")
141 (:constructor make-block)
142 (:predicate block-p))
149 (defstruct (component-entry (:include basic-block)))
150 (defstruct (component-exit (:include basic-block)))
152 (defun make-empty-block ()
153 (let ((entry (make-block-entry))
154 (exit (make-block-exit)))
155 (setf (node-next entry) exit
156 (node-prev exit) entry)
157 (make-block :entry entry :exit exit)))
159 (defun empty-block-p (b)
160 (block-exit-p (node-next (block-entry b))))
163 ((node block &optional result &key include-sentinel-p) &body body)
164 `(do ((,node ,(if include-sentinel-p
165 `(block-entry ,block)
166 `(node-next (block-entry ,block)))
168 (,(if include-sentinel-p
170 `(block-exit-p ,node))
174 (defmacro do-nodes-backward
175 ((node block &optional result &key include-sentinel-p) &body body)
176 `(do ((,node ,(if include-sentinel-p
178 `(node-prev (block-entry ,block)))
180 (,(if include-sentinel-p
182 `(block-entry-p ,node))
186 ;;; Link FROM and TO nodes together. FROM and TO must belong to the
187 ;;; same basic block and appear in such order. The nodes between FROM
188 ;;; and TO are discarded.
189 (defun link-nodes (from to)
190 (setf (node-next from) to
194 ;;; Components are connected pieces of the control flow graph with
195 ;;; some additional information. Components have well-defined entry
196 ;;; and exit nodes. They also track what basic blocks we have and
197 ;;; other useful information. It is the toplevel organizational entity
198 ;;; in the compiler. The IR translation result is accumulated into
199 ;;; components incrementally.
200 (defstruct (component #-jscl (:print-object print-component))
204 ;;; Create a new component with sentinel nodes and an empty basic
205 ;;; block, ready to start conversion to IR. It returns the component
206 ;;; and the basic block as multiple values.
207 (defun make-empty-component ()
208 (let ((entry (make-component-entry))
209 (block (make-empty-block))
210 (exit (make-component-exit)))
211 (setf (block-succ entry) (list block)
212 (block-pred exit) (list block)
213 (block-succ block) (list exit)
214 (block-pred block) (list entry))
215 (values (make-component :entry entry :exit exit) block)))
217 ;;; Return the list of blocks in COMPONENT.
218 (defun component-blocks (component)
220 (labels ((compute-rdfo-from (block)
221 (unless (or (component-exit-p block) (find block output))
222 (dolist (successor (block-succ block))
223 (unless (component-exit-p block)
224 (compute-rdfo-from successor)))
225 (push block output))))
226 (compute-rdfo-from (unlist (block-succ (component-entry component))))
229 ;;; Iterate across different blocks in COMPONENT.
230 (defmacro do-blocks ((block component &optional result) &body body)
231 `(dolist (,block (component-blocks ,component) ,result)
234 ;;; A few consistency checks in the IR useful for catching bugs.
235 (defun check-ir-consistency (component)
236 (with-simple-restart (continue "Continue execution")
237 (do-blocks (block component)
238 (dolist (succ (block-succ block))
239 (unless (find block (block-pred succ))
240 (error "The block `~S' does not belong to the predecessors list of the its successor `~S'"
243 (dolist (pred (block-pred block))
244 (unless (find block (block-succ pred))
245 (error "The block `~S' does not belong to the successors' list of its predecessor `~S'"
247 (block-id pred)))))))
249 (defun delete-empty-block (block)
250 (when (or (component-entry-p block) (component-exit-p block))
251 (error "Cannot delete entry or exit basic blocks."))
252 (unless (empty-block-p block)
253 (error "Block `~S' is not empty!" (block-id block)))
254 (let ((succ (unlist (block-succ block))))
255 (setf (block-pred succ) (remove block (block-pred succ)))
256 (dolist (pred (block-pred block))
257 (setf (block-succ pred) (substitute succ block (block-succ pred)))
258 (pushnew pred (block-pred succ)))))
260 ;;; Try to coalesce BLOCK with the successor if it is unique and block
261 ;;; is its unique predecessor.
262 (defun maybe-coalesce-block (block)
263 (when (singlep (block-succ block))
264 (let ((succ (first (block-succ block))))
265 (when (and (singlep (block-pred succ)) (not (component-exit-p succ)))
266 (link-nodes (node-prev (block-exit block)) (node-next (block-entry succ)))
267 (setf (block-succ block) (block-succ succ))
268 (dolist (next (block-succ succ))
269 (setf (block-pred next) (substitute block succ (block-pred next))))
272 (defun finish-component (component)
273 (do-blocks (block component)
274 (if (empty-block-p block)
275 (delete-empty-block block)
276 (maybe-coalesce-block block))))
280 ;;; The current component. We accumulate the results of the IR
281 ;;; conversion in this component.
284 ;;; Prepare a new component with a current empty block ready to start
285 ;;; IR conversion bound in the current cursor. BODY is evaluated and
286 ;;; the value of the last form is returned.
287 (defmacro with-component-compilation (&body body)
288 (let ((block (gensym)))
289 `(multiple-value-bind (*component* ,block)
290 (make-empty-component)
291 (with-cursor (:block ,block)
294 ;;; A cursor stands for a point between two nodes in some basic block
295 ;;; in the IR representation where manipulations can take place,
296 ;;; similarly to the cursors in text editing.
300 ;;; The current cursor. It is the point where IR manipulations act by
301 ;;; default. Particularly, newly converted IR code is inserted here.
304 ;;; Create a cursor which pointsto the basic block BLOCK. If omitted,
305 ;;; then the current block is used.
307 ;;; The keywords AFTER and BEFORE specify the cursor will point after
308 ;;; or before that node respectively. If none is specified, the cursor
309 ;;; is created before the exit node in BLOCK. An error is signaled if
310 ;;; both keywords are specified inconsistently, or if the nodes do not
313 ;;; The special values :ENTRY and :EXIT stand for the entry and exit
314 ;;; nodes of the block respectively.
315 (defun cursor (&key (block (cursor-block *cursor*))
316 (before nil before-p)
318 ;; Handle special values :ENTRY and :EXIT.
319 (flet ((node-designator (x)
321 (:entry (block-entry block))
322 (:exit (block-exit block))
324 (setq before (node-designator before))
325 (setq after (node-designator after)))
326 (let* ((next (or before (and after (node-next after)) (block-exit block)))
327 (cursor (make-cursor :block block :next next)))
328 (flet ((out-of-range-cursor ()
329 (error "Out of range cursor."))
331 (error "Ambiguous cursor specified between two non-adjacent nodes.")))
332 (when (or (null next) (block-entry-p next))
333 (out-of-range-cursor))
334 (when (and before-p after-p (not (eq after before)))
336 (do-nodes-backward (node block (out-of-range-cursor) :include-sentinel-p t)
337 (when (eq next node) (return))))
340 (defun set-cursor (&rest cursor-spec)
341 (let ((newcursor (apply #'cursor cursor-spec)))
342 (setf (cursor-block *cursor*) (cursor-block newcursor))
343 (setf (cursor-next *cursor*) (cursor-next newcursor))
346 ;;; Create and bind the current cursor. The cursor specification is
347 ;;; the same as described in the function `create-cursor'.
348 (defmacro with-cursor ((&rest cursor-spec) &body body)
349 `(let* ((*cursor* (cursor ,@cursor-spec)))
352 (defun end-of-block-p (&optional (cursor *cursor*))
353 (block-exit-p (cursor-next cursor)))
355 ;;; Insert NODE at cursor.
356 (defun insert-node (node &optional (cursor *cursor*))
357 (link-nodes (node-prev (cursor-next cursor)) node)
358 (link-nodes node (cursor-next cursor))
361 ;;; Split the block CURSOR points in two basic blocks, returning the
362 ;;; new basic block. The cursor is kept to point at the end of shrunk
364 (defun split-block (&optional (cursor *cursor*))
365 (let* ((block (cursor-block cursor))
366 (newexit (make-block-exit))
367 (newentry (make-block-entry))
368 (exit (block-exit block))
369 (newblock (make-block :entry newentry
372 :succ (block-succ block))))
373 (insert-node newexit)
374 (insert-node newentry)
375 (setf (node-next newexit) nil)
376 (setf (node-prev newentry) nil)
377 (setf (block-exit block) newexit)
378 (setf (block-succ block) (list newblock))
379 (dolist (succ (block-succ newblock))
380 (setf (block-pred succ) (substitute newblock block (block-pred succ))))
381 (set-cursor :block block :before newexit)
385 ;;; A alist of IR translator functions.
386 (defvar *ir-translator* nil)
388 ;;; Define a IR translator for NAME.
389 (defmacro define-ir-translator (name lambda-list &body body)
390 (check-type name symbol)
391 (let ((fname (intern (format nil "IR-CONVERT-~a" (string name))))
395 (defun ,fname (,form ,result)
396 (flet ((result-lvar () ,result))
397 (destructuring-bind ,lambda-list ,form
399 (push (cons ',name #',fname) *ir-translator*))))
401 (defun ir-convert-constant (form result)
402 (let* ((leaf (make-constant :value form)))
403 (insert-node (make-ref :leaf leaf :lvar result))))
405 (define-ir-translator quote (form)
406 (ir-convert-constant form (result-lvar)))
408 (define-ir-translator setq (variable value)
409 (let ((var (make-var :name variable))
410 (value-lvar (make-lvar)))
411 (ir-convert value value-lvar)
412 (let ((assign (make-assignment :variable var :value value-lvar :lvar (result-lvar))))
413 (insert-node assign))))
415 (define-ir-translator progn (&body body)
416 (dolist (form (butlast body))
418 (ir-convert (car (last body)) (result-lvar)))
420 (define-ir-translator if (test then &optional else)
421 (when (conditional-p (cursor-next *cursor*))
422 (error "Impossible to insert a conditional after another conditional."))
423 ;; Split the basic block if we are in the middle of one.
424 (unless (end-of-block-p) (split-block))
425 (let ((test-lvar (make-lvar))
426 (then-block (make-empty-block))
427 (else-block (make-empty-block))
428 (join-block (make-empty-block)))
429 (ir-convert test test-lvar)
430 (insert-node (make-conditional :test test-lvar :consequent then-block :alternative else-block))
431 (let* ((block (cursor-block *cursor*))
432 (tail-block (unlist (block-succ block))))
433 ;; Link together the different created basic blocks.
434 (setf (block-succ block) (list else-block then-block)
435 (block-pred else-block) (list block)
436 (block-pred then-block) (list block)
437 (block-succ then-block) (list join-block)
438 (block-succ else-block) (list join-block)
439 (block-pred join-block) (list else-block then-block)
440 (block-succ join-block) (list tail-block)
441 (block-pred tail-block) (substitute join-block block (block-pred tail-block))))
442 ;; Convert he consequent and alternative forms and update cursor.
443 (ir-convert then (result-lvar) (cursor :block then-block))
444 (ir-convert else (result-lvar) (cursor :block else-block))
445 (set-cursor :block join-block)))
448 (defun ir-convert-var (form result)
449 (let* ((leaf (make-var :name form))
450 (ref (make-ref :leaf leaf :lvar result)))
453 (defun ir-convert-call (form result)
454 (destructuring-bind (function &rest args) form
455 (let ((func-lvar (make-lvar))
457 (when (symbolp function)
458 (ir-convert `(%symbol-function ,function) func-lvar))
460 (let ((arg-lvar (make-lvar)))
461 (push arg-lvar args-lvars)
462 (ir-convert arg arg-lvar)))
463 (setq args-lvars (reverse args-lvars))
464 (let ((call (make-call :function func-lvar :arguments args-lvars :lvar result)))
465 (insert-node call)))))
467 ;;; Convert the Lisp expression FORM into IR before the NEXT node, it
468 ;;; may create new basic blocks into the current component. During the
469 ;;; initial IR conversion, The NEXT node is the EXIT node of the
470 ;;; current basic block, but optimizations could call it to insert IR
472 (defun ir-convert (form &optional result (*cursor* *cursor*))
477 (ir-convert-var form result))
479 (ir-convert-constant form result))))
481 (destructuring-bind (op &rest args) form
482 (let ((translator (cdr (assoc op *ir-translator*))))
484 (funcall translator args result)
485 (ir-convert-call form result))))))
491 (defun print-node (node)
492 (when (node-lvar node)
493 (format t "~a = " (lvar-id (node-lvar node))))
496 (let ((leaf (ref-leaf node)))
499 (format t "~a" (var-name leaf)))
501 (format t "'~a" (constant-value leaf)))
503 (format t "#<function ~a at ~a>"
504 (functional-name leaf)
505 (functional-entry-point leaf))))))
507 (format t "set ~a ~a"
508 (var-name (assignment-variable node))
509 (lvar-id (assignment-value node))))
511 (format t "call ~a" (lvar-id (call-function node)))
512 (dolist (arg (call-arguments node))
513 (format t " ~a" (lvar-id arg))))
514 ((conditional-p node)
515 (format t "if ~a ~a ~a"
516 (lvar-id (conditional-test node))
517 (block-id (conditional-consequent node))
518 (block-id (conditional-alternative node))))
520 (error "`print-node' does not support printing ~S as a node." node)))
523 (defun print-block (block)
524 (flet ((block-name (block)
526 ((and (singlep (block-pred block))
527 (component-entry-p (unlist (block-pred block))))
529 ((component-exit-p block)
531 (t (string (block-id block))))))
532 (format t "BLOCK ~a:~%" (block-name block))
533 (do-nodes (node block)
535 (when (singlep (block-succ block))
536 (format t "GO ~a~%" (block-name (first (block-succ block)))))
539 (defun print-component (component &optional (stream *standard-output*))
540 (let ((*standard-output* stream))
541 (do-blocks (block component)
542 (print-block block))))
544 ;;; Translate FORM into IR and print a textual repreresentation of the
546 (defun describe-ir (form)
547 (with-component-compilation
548 (ir-convert form (make-lvar :id "$out"))
549 (finish-component *component*)
550 (check-ir-consistency *component*)
551 (print-component *component*)))
554 ;;; compiler.lisp ends here