;;; compiler.lisp ---
;; Copyright (C) 2013 David Vazquez
;; JSCL is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; JSCL is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with JSCL. If not, see .
(defpackage :jscl
(:use :cl))
(in-package :jscl)
;;;; Utilities
;;;;
;;;; Random Common Lisp code useful to use here and there.
(defmacro with-gensyms ((&rest vars) &body body)
`(let ,(mapcar (lambda (var) `(,var (gensym ,(string var)))) vars)
,@body))
(defun singlep (x)
(and (consp x) (null (cdr x))))
(defun unlist (x)
(assert (singlep x))
(first x))
;;;; Intermediate representation structures
;;;;
;;;; This intermediate representation (IR) is a simplified version of
;;;; the first intermediate representation what you will find if you
;;;; have a look to the source code of SBCL. Some terminology is also
;;;; used, but other is changed, so be careful if you assume you know
;;;; what it is because you know the name.
;;;;
;;;; Computations are represented by `node'. Nodes are grouped
;;;; sequencially into `basic-block'. It is a plain representation
;;;; rather than a nested one. Computations take data and produce a
;;;; value. Both data transfer are represented by `lvar'.
(defstruct leaf)
;;; A (lexical) variable. Special variables has not a special
;;; representation in the IR. They are handled by the primitive
;;; functions `%symbol-function' and `%symbol-value'.
(defstruct (var (:include leaf))
;; The symbol which names this variable in the source code.
name)
;;; A literal Lisp object. It usually comes from a quoted expression.
(defstruct (constant (:include leaf))
;; The object itself.
value)
;;; A lambda expression. Why do we name it `functional'? Well,
;;; function is reserved by the ANSI, isn't it?
(defstruct (functional (:include leaf))
;; The symbol which names this function in the source code or null
;; if we do not know or it is an anonymous function.
name
arguments
return-lvar
entry-point)
;;; An abstract place where the result of a computation is stored and
;;; it can be referenced from other nodes, so lvars are responsible
;;; for keeping the necessary information of the nested structure of
;;; the code in this plain representation.
(defstruct lvar
(id (gensym "$")))
;;; A base structure for every single computation. Most of the
;;; computations are valued.
(defstruct node
;; The next and the prev slots are the next nodes and the previous
;; node in the basic block sequence respectively.
next prev
;; Lvar which stands for the result of the computation of this node.
lvar)
;;; Sentinel nodes in the basic block sequence of nodes.
(defstruct (block-entry (:include node)))
(defstruct (block-exit (:include node)))
;;; A reference to a leaf (variable, constant and functions). The
;;; meaning of this node is leaving the leaf into the lvar of the
;;; node.
(defstruct (ref (:include node))
leaf)
;;; An assignation of the LVAR VALUE into the var VARIABLE.
(defstruct (assignment (:include node))
variable
value)
;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS.
(defstruct (call (:include node))
function
arguments)
;;; A conditional branch. If the LVAR is not NIL, then we will jump to
;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By
;;; definition, a conditional must appear at the end of a basic block.
(defstruct (conditional (:include node))
test
consequent
alternative)
;;; Blocks are `basic block`. Basic blocks are organized as a control
;;; flow graph with some more information in omponents.
(defstruct (basic-block
(:conc-name "BLOCK-")
(:constructor make-block)
(:predicate block-p))
(id (gensym "L"))
;; List of successors and predecessors of this basic block.
succ pred
;; The sentinel nodes of the sequence.
entry exit)
;;; Sentinel nodes in the control flow graph of basic blocks.
(defstruct (component-entry (:include basic-block)))
(defstruct (component-exit (:include basic-block)))
;;; Return a fresh empty basic block.
(defun make-empty-block ()
(let ((entry (make-block-entry))
(exit (make-block-exit)))
(setf (node-next entry) exit
(node-prev exit) entry)
(make-block :entry entry :exit exit)))
;;; Return T if B is an empty basic block and NIL otherwise.
(defun empty-block-p (b)
(block-exit-p (node-next (block-entry b))))
;;; Iterate across the nodes in a basic block forward.
(defmacro do-nodes
((node block &optional result &key include-sentinel-p) &body body)
`(do ((,node ,(if include-sentinel-p
`(block-entry ,block)
`(node-next (block-entry ,block)))
(node-next ,node)))
(,(if include-sentinel-p
`(null ,node)
`(block-exit-p ,node))
,result)
,@body))
;;; Iterate across the nodes in a basic block backward.
(defmacro do-nodes-backward
((node block &optional result &key include-sentinel-p) &body body)
`(do ((,node ,(if include-sentinel-p
`(block-exit ,block)
`(node-prev (block-entry ,block)))
(node-prev ,node)))
(,(if include-sentinel-p
`(null ,node)
`(block-entry-p ,node))
,result)
,@body))
;;; Link FROM and TO nodes together. FROM and TO must belong to the
;;; same basic block and appear in such order. The nodes between FROM
;;; and TO are discarded.
(defun link-nodes (from to)
(setf (node-next from) to
(node-prev to) from)
(values))
;;;; Cursors
;;;;
;;;; A cursor is a point between two nodes in some basic block in the
;;;; IR representation where manipulations can take place, similarly
;;;; to the cursors in text editing.
;;;;
;;;; Cursors cannot point to special component's entry and exit basic
;;;; blocks or after a conditional node. Conveniently, the `cursor'
;;;; function will signal an error if the cursor is not positioned
;;;; correctly, so the rest of the code does not need to check once
;;;; and again.
(defstruct cursor
block next)
;;; The current cursor. It is the default cursor for many functions
;;; which work on cursors.
(defvar *cursor*)
;;; Return the current basic block. It is to say, the basic block
;;; where the current cursor is pointint.
(defun current-block ()
(cursor-block *cursor*))
;;; Create a cursor which points to the basic block BLOCK. If omitted,
;;; then the current block is used.
;;;
;;; The keywords AFTER and BEFORE specify the cursor will point after (or
;;; before) that node respectively. If none is specified, the cursor is
;;; created before the exit node in BLOCK. An error is signaled if both
;;; keywords are specified inconsistently, or if the nodes do not belong
;;; to BLOCK.
;;;
;;; AFTER and BEFORE could also be the special values :ENTRY and :EXIT,
;;; which stand for the entry and exit nodes of the block respectively.
(defun cursor (&key (block (current-block))
(before nil before-p)
(after nil after-p))
(when (or (component-entry-p block) (component-exit-p block))
(error "Invalid cursor on special entry/exit basic block."))
;; Handle special values :ENTRY and :EXIT.
(flet ((node-designator (x)
(case x
(:entry (block-entry block))
(:exit (block-exit block))
(t x))))
(setq before (node-designator before))
(setq after (node-designator after)))
(let* ((next (or before (and after (node-next after)) (block-exit block)))
(cursor (make-cursor :block block :next next)))
(flet ((out-of-range-cursor ()
(error "Out of range cursor."))
(ambiguous-cursor ()
(error "Ambiguous cursor specified between two non-adjacent nodes.")))
(when (conditional-p (node-prev next))
(error "Invalid cursor after conditional node."))
(when (or (null next) (block-entry-p next))
(out-of-range-cursor))
(when (and before-p after-p (not (eq after before)))
(ambiguous-cursor))
(do-nodes-backward (node block (out-of-range-cursor) :include-sentinel-p t)
(when (eq next node) (return))))
cursor))
;;; Accept a cursor specification just as described in `cursor'
;;; describing a position in the IR and modify destructively the
;;; current cursor to point there.
(defun set-cursor (&rest cursor-spec)
(let ((newcursor (apply #'cursor cursor-spec)))
(setf (cursor-block *cursor*) (cursor-block newcursor))
(setf (cursor-next *cursor*) (cursor-next newcursor))
*cursor*))
;;; Insert NODE at cursor.
(defun insert-node (node &optional (cursor *cursor*))
;; After if? wrong!
(link-nodes (node-prev (cursor-next cursor)) node)
(link-nodes node (cursor-next cursor))
t)
;;; Split the block at CURSOR. The cursor will point to the end of the
;;; first basic block. Return the three basic blocks as multiple
;;; values.
(defun split-block (&optional (cursor *cursor*))
;; ==> --
(let* ((block (cursor-block cursor))
(newexit (make-block-exit))
(newentry (make-block-entry))
(exit (block-exit block))
(newblock (make-block :entry newentry
:exit exit
:pred (list block)
:succ (block-succ block))))
(insert-node newexit)
(insert-node newentry)
(setf (node-next newexit) nil)
(setf (node-prev newentry) nil)
(setf (block-exit block) newexit)
(setf (block-succ block) (list newblock))
(dolist (succ (block-succ newblock))
(setf (block-pred succ) (substitute newblock block (block-pred succ))))
(set-cursor :block block :before newexit)
newblock))
;;; Split the block at CURSOR if it is in the middle of it. The cursor
;;; will point to the end of the first basic block. Return the three
;;; basic blocks as multiple values.
(defun maybe-split-block (&optional (cursor *cursor*))
;; If we are converting IR into the end of the basic block, it's
;; fine, we don't need to do anything.
(unless (block-exit-p (cursor-next cursor))
(split-block cursor)))
;;;; Components
;;;;
;;;; Components are connected pieces of the control flow graph of
;;;; basic blocks with some additional information. Components have
;;;; well-defined entry and exit nodes. It is the toplevel
;;;; organizational entity in the compiler. The IR translation result
;;;; is accumulated into components incrementally.
(defstruct (component #-jscl (:print-object print-component))
entry
exit)
;;; Create a new component with an empty basic block, ready to start
;;; conversion to IR. It returns the component and the basic block as
;;; multiple values.
(defun make-empty-component ()
(let ((entry (make-component-entry))
(block (make-empty-block))
(exit (make-component-exit)))
(setf (block-succ entry) (list block)
(block-pred exit) (list block)
(block-succ block) (list exit)
(block-pred block) (list entry))
(values (make-component :entry entry :exit exit) block)))
;;; Return the list of blocks in COMPONENT, conveniently sorted.
(defun component-blocks (component)
(let ((output nil))
(labels ((compute-rdfo-from (block)
(unless (or (component-exit-p block) (find block output))
(dolist (successor (block-succ block))
(unless (component-exit-p block)
(compute-rdfo-from successor)))
(push block output))))
(compute-rdfo-from (unlist (block-succ (component-entry component))))
output)))
;;; Iterate across different blocks in COMPONENT.
(defmacro do-blocks ((block component &optional result) &body body)
`(dolist (,block (component-blocks ,component) ,result)
,@body))
;;; A few consistency checks in the IR useful for catching bugs.
(defun check-ir-consistency (component)
(with-simple-restart (continue "Continue execution")
(do-blocks (block component)
(dolist (succ (block-succ block))
(unless (find block (block-pred succ))
(error "The block `~S' does not belong to the predecessors list of the its successor `~S'"
(block-id block)
(block-id succ))))
(dolist (pred (block-pred block))
(unless (find block (block-succ pred))
(error "The block `~S' does not belong to the successors' list of its predecessor `~S'"
(block-id block)
(block-id pred)))))))
;;;; Lexical environment
;;;;
;;;; It keeps an association between names and the IR entities. It is
;;;; used to guide the translation from the Lisp source code to the
;;;; intermediate representation.
(defstruct binding
name namespace type value)
(defvar *lexenv*)
(defun find-binding (name namespace)
(find-if (lambda (b)
(and (eq (binding-name b) name)
(eq (binding-namespace b) namespace)))
*lexenv*))
(defun push-binding (name namespace value &optional type)
(push (make-binding :name name
:namespace namespace
:type type
:value value)
*lexenv*))
;;;; IR Translation
;;;;
;;;; This code covers the translation from Lisp source code to the
;;;; intermediate representation. The main entry point function to do
;;;; that is the `ir-convert' function, which dispatches to IR
;;;; translators. This function ss intended to do the initial
;;;; conversion as well as insert new IR code during optimizations.
;;;;
;;;; The function `ir-complete' will coalesce basic blocks in a
;;;; component to generate proper maximal basic blocks.
;;; The current component. We accumulate the results of the IR
;;; conversion in this component.
(defvar *component*)
;;; A alist of IR translator functions.
(defvar *ir-translator* nil)
;;; Define a IR translator for NAME. LAMBDA-LIST is used to
;;; destructure the arguments of the form. Calling the local function
;;; `result-lvar' you can get the LVAR where the compilation of the
;;; expression should store the result of the evaluation.
;;;
;;; The cursor is granted to be at the end of a basic block with a
;;; unique successor, and so it should be when the translator returns.
(defmacro define-ir-translator (name lambda-list &body body)
(check-type name symbol)
(let ((fname (intern (format nil "IR-CONVERT-~a" (string name))))
(result (gensym))
(form (gensym)))
`(progn
(defun ,fname (,form ,result)
(flet ((result-lvar () ,result))
(declare (ignorable (function result-lvar)))
(destructuring-bind ,lambda-list ,form
,@body)))
(push (cons ',name #',fname) *ir-translator*))))
;;; Return the unique successor of the current block. If it is not
;;; unique signal an error.
(defun next-block ()
(unlist (block-succ (current-block))))
;;; Set the next block of the current one.
(defun (setf next-block) (new-value)
(let ((block (current-block))
(next (next-block)))
(setf (block-pred next) (remove block (block-pred next)))
(setf (block-succ block) (list new-value))
(push block (block-pred new-value))
new-value))
(defun ir-convert-constant (form result)
(let* ((leaf (make-constant :value form)))
(insert-node (make-ref :leaf leaf :lvar result))))
(define-ir-translator quote (form)
(ir-convert-constant form (result-lvar)))
(define-ir-translator setq (variable value)
(let ((var (make-var :name variable))
(value-lvar (make-lvar)))
(ir-convert value value-lvar)
(let ((assign (make-assignment :variable var :value value-lvar :lvar (result-lvar))))
(insert-node assign))))
(define-ir-translator progn (&body body)
(mapc #'ir-convert (butlast body))
(ir-convert (car (last body)) (result-lvar)))
(define-ir-translator if (test then &optional else)
;; It is the schema of how the basic blocks will look like
;;
;; / ..then.. \
;; -- => --< >-- <|> --
;; \ ..else.. /
;;
;; Note that is important to leave the cursor in an empty basic
;; block, as zzz could be the exit basic block of the component,
;; which is an invalid position for a cursor.
(let ((test-lvar (make-lvar))
(then-block (make-empty-block))
(else-block (make-empty-block))
(join-block (make-empty-block)))
(ir-convert test test-lvar)
(insert-node (make-conditional :test test-lvar :consequent then-block :alternative else-block))
(let* ((block (current-block))
(tail-block (next-block)))
;; Link together the different created basic blocks.
(setf (block-succ block) (list else-block then-block)
(block-pred else-block) (list block)
(block-pred then-block) (list block)
(block-succ then-block) (list join-block)
(block-succ else-block) (list join-block)
(block-pred join-block) (list else-block then-block)
(block-succ join-block) (list tail-block)
(block-pred tail-block) (substitute join-block block (block-pred tail-block))))
;; Convert he consequent and alternative forms and update cursor.
(ir-convert then (result-lvar) (cursor :block then-block))
(ir-convert else (result-lvar) (cursor :block else-block))
(set-cursor :block join-block)))
(define-ir-translator block (name &body body)
(push-binding name 'block (cons (next-block) (result-lvar)))
(ir-convert `(progn ,@body) (result-lvar)))
(define-ir-translator return-from (name &optional value)
(let ((binding
(or (find-binding name 'block)
(error "Tried to return from unknown block `~S' name" name))))
(destructuring-bind (jump-block . lvar)
(binding-value binding)
(ir-convert value lvar)
(let ((new (split-block)))
(setf (next-block) jump-block)
(set-cursor :block new)))))
(defun ir-convert-var (form result)
(let* ((leaf (make-var :name form)))
(insert-node (make-ref :leaf leaf :lvar result))))
(defun ir-convert-call (form result)
(destructuring-bind (function &rest args) form
(let ((func-lvar (make-lvar))
(args-lvars nil))
(when (symbolp function)
(ir-convert `(%symbol-function ,function) func-lvar))
(dolist (arg args)
(let ((arg-lvar (make-lvar)))
(push arg-lvar args-lvars)
(ir-convert arg arg-lvar)))
(setq args-lvars (reverse args-lvars))
(let ((call (make-call :function func-lvar :arguments args-lvars :lvar result)))
(insert-node call)))))
;;; Convert the Lisp expression FORM into IR before the NEXT node, it
;;; may create new basic blocks into the current component. RESULT is
;;; the lvar representing the result of the computation or null if the
;;; value should be discarded. The IR is inserted at *CURSOR*.
(defun ir-convert (form &optional result (*cursor* *cursor*))
;; Rebinding the lexical environment here we make sure that the
;; lexical information introduced by FORM is just available for
;; subforms.
(let ((*lexenv* *lexenv*))
;; Possibly create additional blocks in order to make sure the
;; cursor is at end the end of a basic block.
(maybe-split-block)
(cond
((atom form)
(cond
((symbolp form)
(ir-convert-var form result))
(t
(ir-convert-constant form result))))
(t
(destructuring-bind (op &rest args) form
(let ((translator (cdr (assoc op *ir-translator*))))
(if translator
(funcall translator args result)
(ir-convert-call form result))))))
(values)))
;;; Prepare a new component with a current empty block ready to start
;;; IR conversion bound in the current cursor. BODY is evaluated and
;;; the value of the last form is returned.
(defmacro with-component-compilation (&body body)
(let ((block (gensym)))
`(multiple-value-bind (*component* ,block)
(make-empty-component)
(let ((*cursor* (cursor :block ,block))
(*lexenv* nil))
,@body))))
(defun delete-empty-block (block)
(when (or (component-entry-p block) (component-exit-p block))
(error "Cannot delete entry or exit basic blocks."))
(unless (empty-block-p block)
(error "Block `~S' is not empty!" (block-id block)))
(let ((succ (unlist (block-succ block))))
(setf (block-pred succ) (remove block (block-pred succ)))
(dolist (pred (block-pred block))
(setf (block-succ pred) (substitute succ block (block-succ pred)))
(pushnew pred (block-pred succ)))))
;;; Try to coalesce BLOCK with the successor if it is unique and block
;;; is its unique predecessor.
(defun maybe-coalesce-block (block)
(when (singlep (block-succ block))
(let ((succ (first (block-succ block))))
(when (and (singlep (block-pred succ))
(not (component-exit-p succ)))
(link-nodes (node-prev (block-exit block))
(node-next (block-entry succ)))
(setf (block-succ block) (block-succ succ))
(dolist (next (block-succ succ))
(setf (block-pred next) (substitute block succ (block-pred next))))
t))))
(defun ir-complete (&optional (component *component*))
(do-blocks (block component)
(if (empty-block-p block)
(delete-empty-block block)
(maybe-coalesce-block block))))
;;; IR Debugging
(defun print-node (node)
(when (node-lvar node)
(format t "~a = " (lvar-id (node-lvar node))))
(cond
((ref-p node)
(let ((leaf (ref-leaf node)))
(cond
((var-p leaf)
(format t "~a" (var-name leaf)))
((constant-p leaf)
(format t "'~a" (constant-value leaf)))
((functional-p leaf)
(format t "#"
(functional-name leaf)
(functional-entry-point leaf))))))
((assignment-p node)
(format t "set ~a ~a"
(var-name (assignment-variable node))
(lvar-id (assignment-value node))))
((call-p node)
(format t "call ~a" (lvar-id (call-function node)))
(dolist (arg (call-arguments node))
(format t " ~a" (lvar-id arg))))
((conditional-p node)
(format t "if ~a ~a ~a"
(lvar-id (conditional-test node))
(block-id (conditional-consequent node))
(block-id (conditional-alternative node))))
(t
(error "`print-node' does not support printing ~S as a node." node)))
(terpri))
(defun print-block (block)
(flet ((block-name (block)
(cond
((and (singlep (block-pred block))
(component-entry-p (unlist (block-pred block))))
"ENTRY")
((component-exit-p block)
"EXIT")
(t (string (block-id block))))))
(format t "BLOCK ~a:~%" (block-name block))
(do-nodes (node block)
(print-node node))
(when (singlep (block-succ block))
(format t "GO ~a~%" (block-name (first (block-succ block)))))
(terpri)))
(defun print-component (component &optional (stream *standard-output*))
(let ((*standard-output* stream))
(do-blocks (block component)
(print-block block))))
;;; Translate FORM into IR and print a textual repreresentation of the
;;; component.
(defun describe-ir (form)
(with-component-compilation
(ir-convert form (make-lvar :id "$out"))
(ir-complete)
(check-ir-consistency *component*)
(print-component *component*)))
;;; compiler.lisp ends here