;;; compiler.lisp ---
;; copyright (C) 2013 David Vazquez
;; JSCL is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; JSCL is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with JSCL. If not, see .
(defpackage :jscl
(:use :cl))
(in-package :jscl)
;;;; Utils
(defun singlep (x)
(and (consp x) (null (cdr x))))
(defun unlist (x)
(assert (singlep x))
(first x))
;;;; Intermediate representation
;;;;
;;;; This intermediate representation (IR) is a simplified version of
;;;; first intermediate representation what you will find if you have
;;;; a you have the source code of SBCL. Some terminology is also
;;;; used, but other is changed, so be careful if you assume you know
;;;; what it is because you know the name.
;;;;
;;; A leaf stands for leaf in the tree of computations. Lexical
;;; variables, constants and literal functions are leafs. Leafs are
;;; not nodes itself, a `ref' node will stands for putting a leaf into
;;; a lvar, which can be used in computations.
(defstruct leaf)
;;; Reference a lexical variable. Special variables have not a
;;; representation in IR. They are handled via the primitive functions
;;; `%symbol-function' and `%symbol-value'.
(defstruct (var (:include leaf))
;; Name is the symbol used to identify this variable in the lexical
;; environment.
name)
;;; A constant value, mostly from a quoted form, but maybe introduced
;;; in some pass of the compiler.
(defstruct (constant (:include leaf))
value)
;;; A literal function. Why do we use `functional' as name? Well,
;;; function is taken, isn't it?
(defstruct (functional (:include leaf))
;; The symbol which names this function in the source code.
name
;; A list of lvars which are bound to the argument values in a call
;; to this function.
arguments
;; LVAR which contains the return values of the function.
return-lvar
;; The basic block which contain the code which be executed firstly
;; when you call this function.
entry-point)
;;; Used to transfer data between the computations in the intermediate
;;; representation. Each node is valued into a LVar. And nodes which
;;; use resulting values from other nodes use such LVar.
(defstruct lvar
(id (gensym "$")))
;;; A computation node. It represents a simple computation in the
;;; intermediate representation. Nodes are grouped in basic blocks,
;;; which are delimited by the special nodes `block-entry' and
;;; `block-exit'. Resulting value of the node is stored in LVAR, which it
;;; could be null if the value is discarded.
(defstruct node
next
prev
lvar)
;;; Sentinel nodes. No computation really, but they make easier to
;;; manipulate the doubly linked-list.
(defstruct (block-entry (:include node)))
(defstruct (block-exit (:include node)))
;;; A reference to a leaf.
(defstruct (ref (:include node))
leaf)
;;; An assignation of the LVAR VALUE into the var VARIABLE.
(defstruct (assignment (:include node))
variable
value)
;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS.
(defstruct (call (:include node))
function
arguments)
;;; A conditional branch. If the LVAR is not NIL, then we will jump to
;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By
;;; definition, a conditional must appear at the end of a basic block.
(defstruct (conditional (:include node))
test
consequent
alternative)
;;; BBlock stands for `basic block', which is a maximal sequence of
;;; nodes with an entry point and an exit. Basic blocks are organized
;;; as a control flow graph with some more information in omponents.
(defstruct bblock
(id (gensym "L"))
succ
pred
entry
exit)
(defstruct (component-entry (:include bblock)))
(defstruct (component-exit (:include bblock)))
(defun make-empty-bblock ()
(let ((entry (make-block-entry))
(exit (make-block-exit)))
(setf (node-next entry) exit
(node-prev exit) entry)
(make-bblock :entry entry :exit exit)))
(defun empty-block-p (b)
(block-exit-p (node-next (bblock-entry b))))
(defmacro do-nodes ((node block &optional result) &body body)
(check-type node symbol)
`(do ((,node (node-next (bblock-entry ,block)) (node-next ,node)))
((block-exit-p ,node) ,result)
,@body))
;;; Link FROM and TO nodes together. FROM and TO must belong to the
;;; same basic block and appear in such order. The nodes between FROM
;;; and TO are discarded.
(defun link-nodes (from to)
(setf (node-next from) to
(node-prev to) from)
(values))
;;; Insert NODE before NEXT.
(defun insert-node-before (next node)
(link-nodes (node-prev next) node)
(link-nodes node next))
;;; Components are connected pieces of the control flow graph with
;;; some additional information. Components have well-defined entry
;;; and exit nodes. They also track what basic blocks we have and
;;; other useful information. It is the toplevel organizational entity
;;; in the compiler. The IR translation result is accumulated into
;;; components incrementally.
(defstruct (component
#-jscl (:print-object print-component))
entry
exit
blocks)
;;; Create a new component, compromised of the sentinel nodes and a
;;; empty basic block, ready to start conversion to IR. It returnes
;;; the component and the basic block as multiple values.
(defun make-empty-component ()
(let ((entry (make-component-entry))
(bblock (make-empty-bblock))
(exit (make-component-exit)))
(setf (bblock-succ entry) (list bblock)
(bblock-pred exit) (list bblock)
(bblock-succ bblock) (list exit)
(bblock-pred bblock) (list entry))
(values (make-component :entry entry :exit exit) bblock)))
;;; Delete an empty block. It is the same as a jump to an
;;; uncondiditonal jump.
(defun delete-empty-block (block)
(when (or (component-entry-p block) (component-exit-p block))
(error "Cannot delete entry or exit basic blocks."))
(unless (empty-block-p block)
(error "Block `~S' is not empty!" (bblock-id block)))
(assert (singlep (bblock-succ block)))
(let ((successor (first (bblock-succ block))))
(dolist (pred (bblock-pred block))
(setf (bblock-succ pred)
(substitute successor block (bblock-succ pred)))
(pushnew pred (bblock-pred successor)))))
(defun finish-component (&optional (component *component*))
(dolist (blk (bblock-pred (component-exit component)))
(when (empty-block-p blk)
(delete-empty-block blk))))
;;; IR Translation
;;; The current component. We accumulate the results of the IR
;;; conversion in this component.
(defvar *component*)
;;; The current block in the current component. IR conversion usually
;;; append nodes to this block. Branching instructions will modify
;;; this variable.
(defvar *bblock*)
;;; Prepare a new component with a current empty content block ready
;;; to start IR conversion. Then BODY is evaluated and the value of
;;; the last form is returned.
(defmacro with-component-compilation (&body body)
`(multiple-value-bind (*component* *bblock*)
(make-empty-component)
,@body))
;;; The Lexical environment is compromised of a list of bindings,
;;; which associates information to symbols. It tracks lexical
;;; variables, tags, local declarations and many other information in
;;; order to guide the compiler.
(defstruct binding
name type value declarations)
(defstruct lexenv
bindings)
;;; A alist of IR translator functions.
(defvar *ir-translator* nil)
;;; Define a IR translator for NAME.
(defmacro define-ir-translator (name (next result) lambda-list &body body)
(let ((fname (intern (format nil "IR-CONVERT-~a" (string name))))
(form (gensym)))
(check-type name symbol)
(check-type next symbol)
`(progn
(defun ,fname (,form ,next ,result)
(destructuring-bind ,lambda-list ,form
,@body))
(push (cons ',name #',fname) *ir-translator*))))
(defun ir-convert-constant (form next result)
(let* ((leaf (make-constant :value form))
(ref (make-ref :leaf leaf :lvar result)))
(insert-node-before next ref)))
(define-ir-translator quote (next result) (form)
(ir-convert-constant form next result))
(define-ir-translator setq (next result) (variable value)
(let ((var (make-var :name variable))
(value-lvar (make-lvar)))
(ir-convert value next value-lvar)
(let ((assign (make-assignment :variable var :value value-lvar :lvar result)))
(insert-node-before next assign))))
;;; Split BLOCK in two basic blocks. BLOCK ends just before BLOCK. A
;;; new block is created starting at NODE until the exit of the
;;; original block. The successors of BLOCK become the successors of
;;; the new block.
(defun split-basic-block-before (node block)
(let ((exit (node-prev (bblock-exit block)))
(newexit (make-block-exit))
(newentry (make-block-entry))
newblock)
(insert-node-before node newentry)
(insert-node-before newentry newexit)
(setf (node-next newexit) nil)
(setf (node-prev newentry) nil)
(setf (bblock-exit block) newexit)
(setq newblock (make-bblock :entry newentry :exit exit))
(rotatef (bblock-succ block) (bblock-succ newblock))
newblock))
(define-ir-translator if (next result) (test then &optional else)
(let ((test-lvar (make-lvar))
(then-block (make-empty-bblock))
(else-block (make-empty-bblock))
(join-block (make-empty-bblock)))
;; Convert the test into the current basic block.
(ir-convert test next test-lvar)
(let ((cond (make-conditional :test test-lvar :consequent then-block :alternative else-block)))
(insert-node-before next cond))
;; If we are not at the end of the content block, split it.
(unless (block-exit-p next)
(setq join-block (split-basic-block-before next *bblock*)))
(dolist (succ (bblock-succ *bblock*))
(setf (bblock-pred succ) (substitute join-block *bblock* (bblock-pred succ))))
(psetf (bblock-succ *bblock*) (list then-block else-block)
(bblock-pred else-block) (list *bblock*)
(bblock-pred then-block) (list *bblock*)
(bblock-succ then-block) (list join-block)
(bblock-succ else-block) (list join-block)
(bblock-pred join-block) (list then-block else-block)
(bblock-succ join-block) (bblock-succ *bblock*))
(let ((*bblock* then-block))
(ir-convert then (bblock-exit then-block) result))
(let ((*bblock* else-block))
(ir-convert else (bblock-exit else-block) result))
(setq *bblock* join-block)))
(defun ir-convert-var (form next result)
(let* ((leaf (make-var :name form))
(ref (make-ref :leaf leaf :lvar result)))
(insert-node-before next ref)))
(defun ir-convert-call (form next result)
(destructuring-bind (function &rest args) form
(let ((func-lvar (make-lvar))
(args-lvars nil))
(when (symbolp function)
(ir-convert `(%symbol-function ,function) next func-lvar))
(dolist (arg args)
(push (make-lvar) args-lvars)
(ir-convert arg next (first args-lvars)))
(setq args-lvars (reverse args-lvars))
(let ((call (make-call :function func-lvar :arguments args-lvars :lvar result)))
(insert-node-before next call)))))
;;; Convert the Lisp expression FORM into IR before the NEXT node, it
;;; may create new basic blocks into the current component. During the
;;; initial IR conversion, The NEXT node is the EXIT node of the
;;; current basic block, but optimizations could call it to insert IR
;;; code somewhere.
(defun ir-convert (form next result)
(when (block-entry-p next)
(error "Can't insert IR before the entry node."))
(cond
((atom form)
(cond
((symbolp form)
(ir-convert-var form next result))
(t
(ir-convert-constant form next result))))
(t
(destructuring-bind (op &rest args) form
(let ((translator (cdr (assoc op *ir-translator*))))
(if translator
(funcall translator args next result)
(ir-convert-call form next result))))))
(values))
(defun compute-dfo (component)
(or (component-blocks component)
(let ((output nil))
(labels ((compute-dfo-from (block)
(unless (or (component-exit-p block) (find block output))
(dolist (successor (bblock-succ block))
(unless (component-exit-p block)
(compute-dfo-from successor)))
(push block output))))
(compute-dfo-from (unlist (bblock-succ (component-entry component))))
(setf (component-blocks component) output)))))
(defmacro do-blocks ((bblock component &optional result) &body body)
`(dolist (,bblock (compute-dfo ,component) ,result)
,@body))
;;; IR Debugging
(defun print-node (node)
(when (node-lvar node)
(format t "~a = " (lvar-id (node-lvar node))))
(cond
((ref-p node)
(let ((leaf (ref-leaf node)))
(cond
((var-p leaf)
(format t "~a" (var-name leaf)))
((constant-p leaf)
(format t "'~a" (constant-value leaf)))
((functional-p leaf)
(format t "#"
(functional-name leaf)
(functional-entry-point leaf))))))
((assignment-p node)
(format t "set ~a ~a"
(var-name (assignment-variable node))
(lvar-id (assignment-value node))))
((call-p node)
(format t "call ~a" (lvar-id (call-function node)))
(dolist (arg (call-arguments node))
(format t " ~a" (lvar-id arg))))
((conditional-p node)
(format t "if ~a ~a ~a"
(lvar-id (conditional-test node))
(bblock-id (conditional-consequent node))
(bblock-id (conditional-alternative node))))
(t
(error "`print-node' does not support printing ~S as a node." node)))
(terpri))
(defun print-bblock (block)
(flet ((bblock-name (block)
(cond
((and (singlep (bblock-pred block))
(component-entry-p (bblock-pred block)))
"ENTRY")
((component-exit-p block)
"EXIT")
(t (string (bblock-id block))))))
(format t "BLOCK ~a:~%" (bblock-name block))
(do-nodes (node block)
(print-node node))
(when (singlep (bblock-succ block))
(format t "GO ~a~%" (bblock-name (first (bblock-succ block)))))
(terpri)))
(defun print-component (component &optional (stream *standard-output*))
(let ((*standard-output* stream))
(do-blocks (block component)
(print-bblock block))))
(defun check-ir-consistency (&optional (component *component*))
(with-simple-restart (continue "Continue execution")
(do-blocks (block component)
(dolist (succ (bblock-succ block))
(unless (find block (bblock-pred succ))
(error "The block `~S' does not belong to the predecessors list of the its successor `~S'"
(bblock-id block)
(bblock-id succ))))
(dolist (pred (bblock-pred block))
(unless (find block (bblock-succ pred))
(error "The block `~S' does not belong to the successors' list of its predecessor `~S'"
(bblock-id block)
(bblock-id pred)))))))
;;; Translate FORM into IR and print a textual repreresentation of the
;;; component.
(defun describe-ir (form)
(with-component-compilation
(ir-convert form (bblock-exit *bblock*) (make-lvar :id "$out"))
(finish-component)
(check-ir-consistency)
(print-component *component*)))
;;; compiler.lisp ends here