X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=experimental%2Fcompiler.lisp;h=11fc29384981ad0441ffac40b1289d219dc2d243;hb=1b2aff657ae8ecf10efe5365e7dc3fe8523c6ebf;hp=df40867adf3e4365480a443c468bbab961bf936d;hpb=b1e6df4cc0e0c5162cc1c85a7977ae6f669b7a5f;p=jscl.git diff --git a/experimental/compiler.lisp b/experimental/compiler.lisp index df40867..11fc293 100644 --- a/experimental/compiler.lisp +++ b/experimental/compiler.lisp @@ -1,6 +1,6 @@ ;;; compiler.lisp --- -;; copyright (C) 2013 David Vazquez +;; Copyright (C) 2013 David Vazquez ;; JSCL is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -20,7 +20,13 @@ (in-package :jscl) -;;;; Utils +;;;; Utilities +;;;; +;;;; Random Common Lisp code useful to use here and there. + +(defmacro with-gensyms ((&rest vars) &body body) + `(let ,(mapcar (lambda (var) `(,var (gensym ,(concatenate 'string (string var) "-")))) vars) + ,@body)) (defun singlep (x) (and (consp x) (null (cdr x)))) @@ -29,71 +35,81 @@ (assert (singlep x)) (first x)) -;;;; Intermediate representation +(defun generic-printer (x stream) + (print-unreadable-object (x stream :type t :identity t))) + +;;; A generic counter mechanism. IDs are used generally for debugging +;;; purposes. You can bind *counter-alist* to NIL to reset the +;;; counters in a dynamic extent. +(defvar *counter-alist* nil) +(defun generate-id (class) + (let ((e (assoc class *counter-alist*))) + (if e + (incf (cdr e)) + (prog1 1 + (push (cons class 1) *counter-alist*))))) + + +;;;; Intermediate representation structures ;;;; ;;;; This intermediate representation (IR) is a simplified version of -;;;; first intermediate representation what you will find if you have -;;;; a you have the source code of SBCL. Some terminology is also +;;;; the first intermediate representation what you will find if you +;;;; have a look to the source code of SBCL. Some terminology is also ;;;; used, but other is changed, so be careful if you assume you know ;;;; what it is because you know the name. ;;;; +;;;; Computations are represented by `node'. Nodes are grouped +;;;; sequencially into `basic-block'. It is a plain representation +;;;; rather than a nested one. Computations take data and produce a +;;;; value. Both data transfer are represented by `lvar'. -;;; A leaf stands for leaf in the tree of computations. Lexical -;;; variables, constants and literal functions are leafs. Leafs are -;;; not nodes itself, a `ref' node will stands for putting a leaf into -;;; a lvar, which can be used in computations. (defstruct leaf) -;;; Reference a lexical variable. Special variables have not a -;;; representation in IR. They are handled via the primitive functions -;;; `%symbol-function' and `%symbol-value'. +;;; A (lexical) variable. Special variables has not a special +;;; representation in the IR. They are handled by the primitive +;;; functions `%symbol-function' and `%symbol-value'. (defstruct (var (:include leaf)) - ;; Name is the symbol used to identify this variable in the lexical - ;; environment. + ;; The symbol which names this variable in the source code. name) -;;; A constant value, mostly from a quoted form, but maybe introduced -;;; in some pass of the compiler. +;;; A literal Lisp object. It usually comes from a quoted expression. (defstruct (constant (:include leaf)) + ;; The object itself. value) -;;; A literal function. Why do we use `functional' as name? Well, -;;; function is taken, isn't it? -(defstruct (functional (:include leaf)) - ;; The symbol which names this function in the source code. +;;; A lambda expression. Why do we name it `functional'? Well, +;;; function is reserved by the ANSI, isn't it? +(defstruct (functional (:include leaf) (:print-object generic-printer)) + ;; The symbol which names this function in the source code or null + ;; if we do not know or it is an anonymous function. name - ;; A list of lvars which are bound to the argument values in a call - ;; to this function. arguments - ;; LVAR which contains the return values of the function. return-lvar - ;; The basic block which contain the code which be executed firstly - ;; when you call this function. - entry-point) - + component) -;;; Used to transfer data between the computations in the intermediate -;;; representation. Each node is valued into a LVar. And nodes which -;;; use resulting values from other nodes use such LVar. +;;; An abstract place where the result of a computation is stored and +;;; it can be referenced from other nodes, so lvars are responsible +;;; for keeping the necessary information of the nested structure of +;;; the code in this plain representation. (defstruct lvar - (id (gensym "$"))) - -;;; A computation node. It represents a simple computation in the -;;; intermediate representation. Nodes are grouped in basic blocks, -;;; which are delimited by the special nodes `block-entry' and -;;; `block-exit'. Resulting value of the node is stored in LVAR, which it -;;; could be null if the value is discarded. -(defstruct node - next - prev + (id (generate-id 'lvar))) + +;;; A base structure for every single computation. Most of the +;;; computations are valued. +(defstruct (node (:print-object generic-printer)) + ;; The next and the prev slots are the next nodes and the previous + ;; node in the basic block sequence respectively. + next prev + ;; Lvar which stands for the result of the computation of this node. lvar) -;;; Sentinel nodes. No computation really, but they make easier to -;;; manipulate the doubly linked-list. +;;; Sentinel nodes in the basic block sequence of nodes. (defstruct (block-entry (:include node))) (defstruct (block-exit (:include node))) -;;; A reference to a leaf. +;;; A reference to a leaf (variable, constant and functions). The +;;; meaning of this node is leaving the leaf into the lvar of the +;;; node. (defstruct (ref (:include node)) leaf) @@ -102,11 +118,19 @@ variable value) -;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS. -(defstruct (call (:include node)) - function +;;; A base node to function calls with a list of lvar as ARGUMENTS. +(defstruct (combination (:include node) (:constructor)) arguments) +;;; A function call to the ordinary Lisp function in the lvar FUNCTION. +(defstruct (call (:include combination)) + function) + +;;; A function call to the primitive FUNCTION. +(defstruct (primitive-call (:include combination)) + function) + + ;;; A conditional branch. If the LVAR is not NIL, then we will jump to ;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By ;;; definition, a conditional must appear at the end of a basic block. @@ -116,33 +140,68 @@ alternative) -;;; BBlock stands for `basic block', which is a maximal sequence of -;;; nodes with an entry point and an exit. Basic blocks are organized -;;; as a control flow graph with some more information in omponents. -(defstruct bblock - (id (gensym "L")) - succ - pred - entry - exit) - -(defstruct (component-entry (:include bblock))) -(defstruct (component-exit (:include bblock))) - -(defun make-empty-bblock () - (let ((entry (make-block-entry)) - (exit (make-block-exit))) - (setf (node-next entry) exit - (node-prev exit) entry) - (make-bblock :entry entry :exit exit))) +;;; Blocks are `basic block`. Basic blocks are organized as a control +;;; flow graph with some more information in omponents. +(defstruct (basic-block + (:conc-name "BLOCK-") + (:constructor make-block) + (:predicate block-p) + (:print-object generic-printer)) + (id (generate-id 'basic-block)) + ;; List of successors and predecessors of this basic block. They are + ;; null only for deleted blocks and component's entry and exit. + succ pred + ;; The sentinel nodes of the sequence. + entry exit + ;; The component where the basic block belongs to. + component + ;; The order in the reverse post ordering of the blocks. + order + ;; A bit-vector representing the set of dominators. See the function + ;; `compute-dominators' to know how to use it properly. + dominators% + ;; Arbitrary data which could be necessary to keep during IR + ;; processing. + data) + +;;; Sentinel nodes in the control flow graph of basic blocks. +(defstruct (component-entry (:include basic-block))) +(defstruct (component-exit (:include basic-block))) + +;;; Return T if B is an empty basic block and NIL otherwise. (defun empty-block-p (b) - (block-exit-p (node-next (bblock-entry b)))) + (or (boundary-block-p b) + (block-exit-p (node-next (block-entry b))))) + +(defun boundary-block-p (block) + (or (component-entry-p block) + (component-exit-p block))) + +;;; Iterate across the nodes in a basic block forward. +(defmacro do-nodes + ((node block &optional result &key include-sentinel-p) &body body) + `(do ((,node ,(if include-sentinel-p + `(block-entry ,block) + `(node-next (block-entry ,block))) + (node-next ,node))) + (,(if include-sentinel-p + `(null ,node) + `(block-exit-p ,node)) + ,result) + ,@body)) -(defmacro do-nodes ((node block &optional result) &body body) - (check-type node symbol) - `(do ((,node (node-next (bblock-entry ,block)) (node-next ,node))) - ((block-exit-p ,node) ,result) +;;; Iterate across the nodes in a basic block backward. +(defmacro do-nodes-backward + ((node block &optional result &key include-sentinel-p) &body body) + `(do ((,node ,(if include-sentinel-p + `(block-exit ,block) + `(node-prev (block-entry ,block))) + (node-prev ,node))) + (,(if include-sentinel-p + `(null ,node) + `(block-entry-p ,node)) + ,result) ,@body)) ;;; Link FROM and TO nodes together. FROM and TO must belong to the @@ -153,225 +212,691 @@ (node-prev to) from) (values)) -;;; Insert NODE before NEXT. -(defun insert-node-before (next node) - (link-nodes (node-prev next) node) - (link-nodes node next)) - -;;; Components are connected pieces of the control flow graph with -;;; some additional information. Components have well-defined entry -;;; and exit nodes. They also track what basic blocks we have and -;;; other useful information. It is the toplevel organizational entity -;;; in the compiler. The IR translation result is accumulated into -;;; components incrementally. -(defstruct (component - #-jscl (:print-object print-component)) +;;; Components are connected pieces of the control flow graph of +;;; basic blocks with some additional information. Components have +;;; well-defined entry and exit nodes. It is the toplevel +;;; organizational entity in the compiler. The IR translation result +;;; is accumulated into components incrementally. +(defstruct (component (:print-object generic-printer)) + (id (generate-id 'component)) + name entry exit + functions + ;; TODO: Replace with a flags slot for indicate what + ;; analysis/transformations have been carried out. + reverse-post-order-p blocks) -;;; Create a new component, compromised of the sentinel nodes and a -;;; empty basic block, ready to start conversion to IR. It returnes -;;; the component and the basic block as multiple values. -(defun make-empty-component () - (let ((entry (make-component-entry)) - (bblock (make-empty-bblock)) - (exit (make-component-exit))) - (setf (bblock-succ entry) (list bblock) - (bblock-pred exit) (list bblock) - (bblock-succ bblock) (list exit) - (bblock-pred bblock) (list entry)) - (values (make-component :entry entry :exit exit) bblock))) - -;;; Delete an empty block. It is the same as a jump to an -;;; uncondiditonal jump. -(defun delete-empty-block (block) - (when (or (component-entry-p block) (component-exit-p block)) - (error "Cannot delete entry or exit basic blocks.")) - (unless (empty-block-p block) - (error "Block `~S' is not empty!" (bblock-id block))) - (assert (singlep (bblock-succ block))) - (let ((successor (first (bblock-succ block)))) - (dolist (pred (bblock-pred block)) - (setf (bblock-succ pred) - (substitute successor block (bblock-succ pred))) - (pushnew pred (bblock-pred successor))))) - -(defun finish-component (&optional (component *component*)) - (dolist (blk (bblock-pred (component-exit component))) - (when (empty-block-p blk) - (delete-empty-block blk)))) - -;;; IR Translation - -;;; The current component. We accumulate the results of the IR -;;; conversion in this component. +;;; The current component. (defvar *component*) -;;; The current block in the current component. IR conversion usually -;;; append nodes to this block. Branching instructions will modify -;;; this variable. -(defvar *bblock*) - -;;; Prepare a new component with a current empty content block ready -;;; to start IR conversion. Then BODY is evaluated and the value of -;;; the last form is returned. -(defmacro with-component-compilation (&body body) - `(multiple-value-bind (*component* *bblock*) - (make-empty-component) - ,@body)) +;;; Create a new fresh empty basic block in the current component. +(defun make-empty-block () + (let ((entry (make-block-entry)) + (exit (make-block-exit))) + (link-nodes entry exit) + (let ((block (make-block :entry entry :exit exit :component *component*))) + (push block (component-blocks *component*)) + block))) + +;;; Create a new component with an empty basic block, ready to start +;;; conversion to IR. It returns the component and the basic block as +;;; multiple values. +(defun make-empty-component (&optional name) + (let ((*component* (make-component :name name))) + (let ((entry (make-component-entry :component *component*)) + (exit (make-component-exit :component *component*)) + (block (make-empty-block))) + (push entry (component-blocks *component*)) + (push exit (component-blocks *component*)) + (setf (block-succ entry) (list block) + (block-pred exit) (list block) + (block-succ block) (list exit) + (block-pred block) (list entry) + (component-entry *component*) entry + (component-exit *component*) exit) + (values *component* block)))) + +;;; A few consistency checks in the IR useful for catching bugs. +(defun check-ir-consistency (&optional (component *component*)) + (with-simple-restart (continue "Continue execution") + (dolist (block (component-blocks component)) + (dolist (succ (block-succ block)) + (unless (find block (block-pred succ)) + (error "The block `~S' does not belong to the predecessors list of the its successor `~S'" + block succ)) + (unless (or (boundary-block-p succ) (find succ (component-blocks component))) + (error "Block `~S' is reachable from its predecessor `~S' but it is not in the component `~S'" + succ block component))) + (dolist (pred (block-pred block)) + (unless (find block (block-succ pred)) + (error "The block `~S' does not belong to the successors' list of its predecessor `~S'" + block pred)) + (unless (or (boundary-block-p pred) (find pred (component-blocks component))) + (error "Block `~S' is reachable from its sucessor `~S' but it is not in the component `~S'" + pred block component)))))) + +;;; Prepare a new component with a current empty block ready to start +;;; IR conversion bound in the current cursor. BODY is evaluated and +;;; the value of the last form is returned. +(defmacro with-component-compilation ((&optional name) &body body) + (with-gensyms (block) + `(multiple-value-bind (*component* ,block) + (make-empty-component ,name) + (let ((*cursor* (cursor :block ,block))) + ,@body)))) + +;;; Call function for each reachable block in component in +;;; post-order. The consequences are unspecified if a block is +;;; FUNCTION modifies a block which has not been processed yet. +(defun map-postorder-blocks (function component) + (let ((seen nil)) + (labels ((compute-from (block) + (unless (find block seen) + (push block seen) + (dolist (successor (block-succ block)) + (unless (component-exit-p block) + (compute-from successor))) + (funcall function block)))) + (compute-from (component-entry component)) + nil))) + +;;; Change all the predecessors of BLOCK to precede NEW-BLOCK +;;; instead. As consequence, BLOCK becomes unreachable. +(defun replace-block (block new-block) + (let ((predecessors (block-pred block))) + (setf (block-pred block) nil) + (dolist (pred predecessors) + (pushnew pred (block-pred new-block)) + (setf (block-succ pred) (substitute new-block block (block-succ pred))) + (unless (component-entry-p pred) + (let ((last-node (node-prev (block-exit pred)))) + (when (conditional-p last-node) + (macrolet ((replacef (place) + `(setf ,place (if (eq block ,place) new-block ,place)))) + (replacef (conditional-consequent last-node)) + (replacef (conditional-alternative last-node))))))))) + +(defun delete-block (block) + (when (boundary-block-p block) + (error "Cannot delete entry or exit basic blocks.")) + (unless (null (cdr (block-succ block))) + (error "Cannot delete a basic block with multiple successors.")) + ;; If the block has not successors, then it is already deleted. So + ;; just skip it. + (when (block-succ block) + (let ((successor (unlist (block-succ block)))) + (replace-block block successor) + ;; At this point, block is unreachable, however we could have + ;; backreferences to it from its successors. Let's get rid of + ;; them. + (setf (block-pred successor) (remove block (block-pred successor))) + (setf (block-succ block) nil)))) + + +;;;; Cursors +;;;; +;;;; A cursor is a point between two nodes in some basic block in the +;;;; IR representation where manipulations can take place, similarly +;;;; to the cursors in text editing. +;;;; +;;;; Cursors cannot point to special component's entry and exit basic +;;;; blocks or after a conditional node. Conveniently, the `cursor' +;;;; function will signal an error if the cursor is not positioned +;;;; correctly, so the rest of the code does not need to check once +;;;; and again. + +(defstruct cursor + block next) + +;;; The current cursor. It is the default cursor for many functions +;;; which work on cursors. +(defvar *cursor*) + +;;; Return the current basic block. It is to say, the basic block +;;; where the current cursor is pointint. +(defun current-block () + (cursor-block *cursor*)) + +;;; Create a cursor which points to the basic block BLOCK. If omitted, +;;; then the current block is used. +;;; +;;; The keywords AFTER and BEFORE specify the cursor will point after (or +;;; before) that node respectively. If none is specified, the cursor is +;;; created before the exit node in BLOCK. An error is signaled if both +;;; keywords are specified inconsistently, or if the nodes do not belong +;;; to BLOCK. +;;; +;;; AFTER and BEFORE could also be the special values :ENTRY and :EXIT, +;;; which stand for the entry and exit nodes of the block respectively. +(defun cursor (&key (block (current-block)) + (before nil before-p) + (after nil after-p)) + (when (boundary-block-p block) + (error "Invalid cursor on special entry/exit basic block.")) + ;; Handle special values :ENTRY and :EXIT. + (flet ((node-designator (x) + (case x + (:entry (block-entry block)) + (:exit (block-exit block)) + (t x)))) + (setq before (node-designator before)) + (setq after (node-designator after))) + (let* ((next (or before (and after (node-next after)) (block-exit block))) + (cursor (make-cursor :block block :next next))) + (flet ((out-of-range-cursor () + (error "Out of range cursor.")) + (ambiguous-cursor () + (error "Ambiguous cursor specified between two non-adjacent nodes."))) + (when (conditional-p (node-prev next)) + (error "Invalid cursor after conditional node.")) + (when (or (null next) (block-entry-p next)) + (out-of-range-cursor)) + (when (and before-p after-p (not (eq after before))) + (ambiguous-cursor)) + (do-nodes-backward (node block (out-of-range-cursor) :include-sentinel-p t) + (when (eq next node) (return)))) + cursor)) + +;;; Accept a cursor specification just as described in `cursor' +;;; describing a position in the IR and modify destructively the +;;; current cursor to point there. +(defun set-cursor (&rest cursor-spec) + (let ((newcursor (apply #'cursor cursor-spec))) + (setf (cursor-block *cursor*) (cursor-block newcursor)) + (setf (cursor-next *cursor*) (cursor-next newcursor)) + *cursor*)) + +;;; Insert NODE at cursor. +(defun insert-node (node &optional (cursor *cursor*)) + (link-nodes (node-prev (cursor-next cursor)) node) + (link-nodes node (cursor-next cursor)) + t) + +;;; Split the block at CURSOR. The cursor will point to the end of the +;;; first basic block. Return the three basic blocks as multiple +;;; values. +(defun split-block (&optional (cursor *cursor*)) + ;; ==> -- + (let* ((block (cursor-block cursor)) + (newexit (make-block-exit)) + (newentry (make-block-entry)) + (exit (block-exit block)) + (newblock (make-block :entry newentry + :exit exit + :pred (list block) + :succ (block-succ block) + :component *component*))) + (insert-node newexit) + (insert-node newentry) + (setf (node-next newexit) nil) + (setf (node-prev newentry) nil) + (setf (block-exit block) newexit) + (setf (block-succ block) (list newblock)) + (dolist (succ (block-succ newblock)) + (setf (block-pred succ) (substitute newblock block (block-pred succ)))) + (set-cursor :block block :before newexit) + (push newblock (component-blocks *component*)) + newblock)) + +;;; Split the block at CURSOR if it is in the middle of it. The cursor +;;; will point to the end of the first basic block. Return the three +;;; basic blocks as multiple values. +(defun maybe-split-block (&optional (cursor *cursor*)) + ;; If we are converting IR into the end of the basic block, it's + ;; fine, we don't need to do anything. + (unless (block-exit-p (cursor-next cursor)) + (split-block cursor))) + + +;;;; Lexical environment +;;;; +;;;; It keeps an association between names and the IR entities. It is +;;;; used to guide the translation from the Lisp source code to the +;;;; intermediate representation. -;;; The Lexical environment is compromised of a list of bindings, -;;; which associates information to symbols. It tracks lexical -;;; variables, tags, local declarations and many other information in -;;; order to guide the compiler. (defstruct binding - name type value declarations) + name namespace type value) + +(defvar *lexenv* nil) -(defstruct lexenv - bindings) +(defun find-binding (name namespace) + (find-if (lambda (b) + (and (eq (binding-name b) name) + (eq (binding-namespace b) namespace))) + *lexenv*)) + +(defun push-binding (name namespace value &optional type) + (push (make-binding :name name + :namespace namespace + :type type + :value value) + *lexenv*)) + + +;;;; IR Translation +;;;; +;;;; This code covers the translation from Lisp source code to the +;;;; intermediate representation. The main entry point function to do +;;;; that is the `ir-convert' function, which dispatches to IR +;;;; translators. This function ss intended to do the initial +;;;; conversion as well as insert new IR code during optimizations. ;;; A alist of IR translator functions. (defvar *ir-translator* nil) -;;; Define a IR translator for NAME. -(defmacro define-ir-translator (name (next result) lambda-list &body body) - (let ((fname (intern (format nil "IR-CONVERT-~a" (string name)))) - (form (gensym))) - (check-type name symbol) - (check-type next symbol) - `(progn - (defun ,fname (,form ,next ,result) - (destructuring-bind ,lambda-list ,form - ,@body)) - (push (cons ',name #',fname) *ir-translator*)))) - - -(defun ir-convert-constant (form next result) - (let* ((leaf (make-constant :value form)) - (ref (make-ref :leaf leaf :lvar result))) - (insert-node-before next ref))) - -(define-ir-translator quote (next result) (form) - (ir-convert-constant form next result)) - -(define-ir-translator setq (next result) (variable value) - (let ((var (make-var :name variable)) - (value-lvar (make-lvar))) - (ir-convert value next value-lvar) - (let ((assign (make-assignment :variable var :value value-lvar :lvar result))) - (insert-node-before next assign)))) - -;;; Split BLOCK in two basic blocks. BLOCK ends just before BLOCK. A -;;; new block is created starting at NODE until the exit of the -;;; original block. The successors of BLOCK become the successors of -;;; the new block. -(defun split-basic-block-before (node block) - (let ((exit (node-prev (bblock-exit block))) - (newexit (make-block-exit)) - (newentry (make-block-entry)) - newblock) - (insert-node-before node newentry) - (insert-node-before newentry newexit) - (setf (node-next newexit) nil) - (setf (node-prev newentry) nil) - (setf (bblock-exit block) newexit) - (setq newblock (make-bblock :entry newentry :exit exit)) - (rotatef (bblock-succ block) (bblock-succ newblock)) - newblock)) - -(define-ir-translator if (next result) (test then &optional else) +;;; Define a IR translator for NAME. LAMBDA-LIST is used to +;;; destructure the arguments of the form. Calling the local function +;;; `result-lvar' you can get the LVAR where the compilation of the +;;; expression should store the result of the evaluation. +;;; +;;; The cursor is granted to be at the end of a basic block with a +;;; unique successor, and so it should be when the translator returns. +(defmacro define-ir-translator (name lambda-list &body body) + (check-type name symbol) + (let ((fname (intern (format nil "IR-CONVERT-~a" (string name))))) + (with-gensyms (result form) + `(progn + (defun ,fname (,form ,result) + (flet ((result-lvar () ,result)) + (declare (ignorable (function result-lvar))) + (destructuring-bind ,lambda-list ,form + ,@body))) + (push (cons ',name #',fname) *ir-translator*))))) + +;;; Return the unique successor of the current block. If it is not +;;; unique signal an error. +(defun next-block () + (unlist (block-succ (current-block)))) + +;;; Set the next block of the current one. +(defun (setf next-block) (new-value) + (let ((block (current-block))) + (dolist (succ (block-succ block)) + (setf (block-pred succ) (remove block (block-pred succ)))) + (setf (block-succ block) (list new-value)) + (push block (block-pred new-value)) + new-value)) + +(defun ir-convert-constant (form result) + (let* ((leaf (make-constant :value form))) + (insert-node (make-ref :leaf leaf :lvar result)))) + +(define-ir-translator quote (form) + (ir-convert-constant form (result-lvar))) + +(define-ir-translator setq (variable value) + (let ((b (find-binding variable 'variable))) + (cond + (b + (let ((var (make-var :name variable)) + (value-lvar (make-lvar))) + (ir-convert value value-lvar) + (let ((assign (make-assignment :variable var :value value-lvar :lvar (result-lvar)))) + (insert-node assign)))) + (t + (ir-convert `(set ',variable ,value) (result-lvar)))))) + +(define-ir-translator progn (&body body) + (mapc #'ir-convert (butlast body)) + (ir-convert (car (last body)) (result-lvar))) + +(define-ir-translator if (test then &optional else) + ;; It is the schema of how the basic blocks will look like + ;; + ;; / ..then.. \ + ;; --< >-- <|> -- + ;; \ ..else.. / + ;; + ;; Note that is important to leave the cursor in an empty basic + ;; block, as zzz could be the exit basic block of the component, + ;; which is an invalid position for a cursor. (let ((test-lvar (make-lvar)) - (then-block (make-empty-bblock)) - (else-block (make-empty-bblock)) - (join-block (make-empty-bblock))) - ;; Convert the test into the current basic block. - (ir-convert test next test-lvar) - (let ((cond (make-conditional :test test-lvar :consequent then-block :alternative else-block))) - (insert-node-before next cond)) - ;; If we are not at the end of the content block, split it. - (unless (block-exit-p next) - (setq join-block (split-basic-block-before next *bblock*))) - (dolist (succ (bblock-succ *bblock*)) - (setf (bblock-pred succ) (substitute join-block *bblock* (bblock-pred succ)))) - (psetf (bblock-succ *bblock*) (list then-block else-block) - (bblock-pred else-block) (list *bblock*) - (bblock-pred then-block) (list *bblock*) - (bblock-succ then-block) (list join-block) - (bblock-succ else-block) (list join-block) - (bblock-pred join-block) (list then-block else-block) - (bblock-succ join-block) (bblock-succ *bblock*)) - (let ((*bblock* then-block)) - (ir-convert then (bblock-exit then-block) result)) - (let ((*bblock* else-block)) - (ir-convert else (bblock-exit else-block) result)) - (setq *bblock* join-block))) - - -(defun ir-convert-var (form next result) - (let* ((leaf (make-var :name form)) - (ref (make-ref :leaf leaf :lvar result))) - (insert-node-before next ref))) - -(defun ir-convert-call (form next result) + (then-block (make-empty-block)) + (else-block (make-empty-block)) + (join-block (make-empty-block))) + (ir-convert test test-lvar) + (insert-node (make-conditional :test test-lvar :consequent then-block :alternative else-block)) + (let* ((block (current-block)) + (tail-block (next-block))) + ;; Link together the different created basic blocks. + (setf (block-succ block) (list else-block then-block) + (block-pred else-block) (list block) + (block-pred then-block) (list block) + (block-succ then-block) (list join-block) + (block-succ else-block) (list join-block) + (block-pred join-block) (list else-block then-block) + (block-succ join-block) (list tail-block) + (block-pred tail-block) (substitute join-block block (block-pred tail-block)))) + ;; Convert he consequent and alternative forms and update cursor. + (ir-convert then (result-lvar) (cursor :block then-block)) + (ir-convert else (result-lvar) (cursor :block else-block)) + (set-cursor :block join-block))) + +(define-ir-translator block (name &body body) + (let ((new (split-block))) + (push-binding name 'block (cons (next-block) (result-lvar))) + (ir-convert `(progn ,@body) (result-lvar)) + (set-cursor :block new))) + +(define-ir-translator return-from (name &optional value) + (let ((binding + (or (find-binding name 'block) + (error "Tried to return from unknown block `~S' name" name)))) + (destructuring-bind (jump-block . lvar) + (binding-value binding) + (ir-convert value lvar) + (setf (next-block) jump-block) + ;; This block is really unreachable, even if the following code + ;; is labelled in a tagbody, as tagbody will create a new block + ;; for each label. However, we have to leave the cursor + ;; somewhere to convert new input. + (let ((dummy (make-empty-block))) + (set-cursor :block dummy))))) + +(define-ir-translator tagbody (&rest statements) + (flet ((go-tag-p (x) + (or (integerp x) (symbolp x)))) + (let* ((tags (remove-if-not #'go-tag-p statements)) + (tag-blocks nil)) + ;; Create a chain of basic blocks for the tags, recording each + ;; block in a alist in TAG-BLOCKS. + (let ((*cursor* *cursor*)) + (dolist (tag tags) + (setq *cursor* (cursor :block (split-block))) + (push-binding tag 'tag (current-block)) + (if (assoc tag tag-blocks) + (error "Duplicated tag `~S' in tagbody." tag) + (push (cons tag (current-block)) tag-blocks)))) + ;; Convert the statements into the correct block. + (dolist (stmt statements) + (if (go-tag-p stmt) + (set-cursor :block (cdr (assoc stmt tag-blocks))) + (ir-convert stmt)))))) + +(define-ir-translator go (label) + (let ((tag-binding + (or (find-binding label 'tag) + (error "Unable to jump to the label `~S'" label)))) + (setf (next-block) (binding-value tag-binding)) + ;; Unreachable block. + (let ((dummy (make-empty-block))) + (set-cursor :block dummy)))) + + +(defun ir-convert-functoid (result name arguments &rest body) + (let ((component) + (return-lvar (make-lvar))) + (with-component-compilation (name) + (ir-convert `(progn ,@body) return-lvar) + (ir-normalize) + (setq component *component*)) + (let ((functional + (make-functional + :name name + :arguments arguments + :component component + :return-lvar return-lvar))) + (push functional (component-functions *component*)) + (insert-node (make-ref :leaf functional :lvar result))))) + +(define-ir-translator function (name) + (if (atom name) + (ir-convert `(symbol-function ,name) (result-lvar)) + (ecase (car name) + ((lambda named-lambda) + (let ((desc (cdr name))) + (when (eq 'lambda (car name)) + (push nil desc)) + (apply #'ir-convert-functoid (result-lvar) desc))) + (setf)))) + +(defun ir-convert-var (form result) + (let ((binds (find-binding form 'variable))) + (if binds + (insert-node (make-ref :leaf (binding-value binds) :lvar result)) + (ir-convert `(symbol-value ',form) result)))) + +(defun ir-convert-call (form result) (destructuring-bind (function &rest args) form (let ((func-lvar (make-lvar)) (args-lvars nil)) - (when (symbolp function) - (ir-convert `(%symbol-function ,function) next func-lvar)) + ;; Argument list (dolist (arg args) - (push (make-lvar) args-lvars) - (ir-convert arg next (first args-lvars))) + (let ((arg-lvar (make-lvar))) + (push arg-lvar args-lvars) + (ir-convert arg arg-lvar))) (setq args-lvars (reverse args-lvars)) - (let ((call (make-call :function func-lvar :arguments args-lvars :lvar result))) - (insert-node-before next call))))) - - -;;; Convert the Lisp expression FORM into IR before the NEXT node, it -;;; may create new basic blocks into the current component. During the -;;; initial IR conversion, The NEXT node is the EXIT node of the -;;; current basic block, but optimizations could call it to insert IR -;;; code somewhere. -(defun ir-convert (form next result) - (when (block-entry-p next) - (error "Can't insert IR before the entry node.")) - (cond - ((atom form) - (cond - ((symbolp form) - (ir-convert-var form next result)) - (t - (ir-convert-constant form next result)))) - (t - (destructuring-bind (op &rest args) form - (let ((translator (cdr (assoc op *ir-translator*)))) - (if translator - (funcall translator args next result) - (ir-convert-call form next result)))))) - (values)) + ;; Funcall + (if (find-primitive function) + (insert-node (make-primitive-call + :function (find-primitive function) + :arguments args-lvars + :lvar result)) + (progn + (ir-convert `(symbol-function ,function) func-lvar) + (insert-node (make-call :function func-lvar + :arguments args-lvars + :lvar result))))))) + +;;; Convert the Lisp expression FORM, it may create new basic +;;; blocks. RESULT is the lvar representing the result of the +;;; computation or null if the value should be discarded. The IR is +;;; inserted at *CURSOR*. +(defun ir-convert (form &optional result (*cursor* *cursor*)) + ;; Rebinding the lexical environment here we make sure that the + ;; lexical information introduced by FORM is just available for + ;; subforms. + (let ((*lexenv* *lexenv*)) + ;; Possibly create additional blocks in order to make sure the + ;; cursor is at end the end of a basic block. + (maybe-split-block) + (cond + ((atom form) + (cond + ((symbolp form) + (ir-convert-var form result)) + (t + (ir-convert-constant form result)))) + (t + (destructuring-bind (op &rest args) form + (let ((translator (cdr (assoc op *ir-translator*)))) + (if translator + (funcall translator args result) + (ir-convert-call form result)))))) + (values))) + + +;;;; IR Normalization +;;;; +;;;; IR as generated by `ir-convert' or after some transformations is +;;;; not appropiated. Here, we remove unreachable and empty blocks and +;;;; coallesce blocks when it is possible. + +;;; Try to coalesce BLOCK with the successor if it is unique and block +;;; is its unique predecessor. +(defun maybe-coalesce-block (block) + (when (and (singlep (block-succ block)) (not (component-entry-p block))) + (let ((succ (first (block-succ block)))) + (when (and (not (component-exit-p succ)) (singlep (block-pred succ))) + (link-nodes (node-prev (block-exit block)) + (node-next (block-entry succ))) + (setf (block-exit block) (block-exit succ)) + (setf (block-succ block) (block-succ succ)) + (dolist (next (block-succ succ)) + (setf (block-pred next) (substitute block succ (block-pred next)))) + (setf (block-succ succ) nil + (block-pred succ) nil) + t)))) + +;;; Normalize a component. This function must be called after a batch +;;; of modifications to the flowgraph of the component to make sure it +;;; is a valid input for the possible optimizations and the backend. +(defun ir-normalize (&optional (component *component*)) + ;; Initialize blocks as unreachables and remove empty basic blocks. + (dolist (block (component-blocks component)) + (setf (block-data block) 'unreachable)) + ;; Coalesce and mark blocks as reachable. + (map-postorder-blocks #'maybe-coalesce-block component) + (map-postorder-blocks (lambda (block) + (setf (block-data block) 'reachable)) + component) + (let ((block-list nil)) + (dolist (block (component-blocks component)) + (cond + ;; If the block is unreachable, but it is predeces a reachable + ;; one, then break the link between them. So we discard it + ;; from the flowgraph. + ((eq (block-data block) 'unreachable) + (dolist (succ (block-succ block)) + (when (eq (block-data succ) 'reachable) + (setf (block-pred succ) (remove block (block-pred succ))))) + (setf (block-succ block) nil)) + ;; Delete empty blocks + ((and (empty-block-p block) + (not (boundary-block-p block)) + ;; We cannot delete a block if it is its own successor, + ;; even thought it is empty. + (not (member block (block-succ block)))) + (delete-block block)) + ;; The rest of blocks remain in the component. + (t + (push block block-list)))) + (setf (component-blocks component) block-list)) + (check-ir-consistency)) + + +;;;; IR Analysis +;;;; +;;;; Once IR conversion has been finished. We do some analysis of the +;;;; component to produce information which is useful for both +;;;; optimizations and code generation. Indeed, we provide some +;;;; abstractions to use this information. + +(defun compute-reverse-post-order (component) + (let ((output nil) + (index (length (component-blocks component)))) + (flet ((add-block-to-list (block) + (push block output) + (setf (block-order block) (decf index)))) + (map-postorder-blocks #'add-block-to-list component)) + (setf (component-reverse-post-order-p component) t) + (setf (component-blocks component) output))) + + +(defmacro do-blocks% ((block component &optional reverse ends result) &body body) + (with-gensyms (g!component g!blocks) + `(let* ((,g!component ,component) + (,g!blocks ,(if reverse + `(reverse (component-blocks ,g!component)) + `(component-blocks ,g!component)))) + ;; Do we have the information available? + (unless (component-reverse-post-order-p ,g!component) + (error "Reverse post order was not computed yet.")) + (dolist (,block ,(if (member ends '(:head :both)) + `,g!blocks + `(cdr ,g!blocks)) + ,result) + ,@(if (member ends '(:tail :both)) + nil + `((if (component-exit-p ,block) (return)))) + ,@body)))) + +;;; Iterate across blocks in COMPONENT in reverse post order. +(defmacro do-blocks-forward ((block component &optional ends result) &body body) + `(do-blocks% (,block ,component nil ,ends ,result) + ,@body)) -(defun compute-dfo (component) - (or (component-blocks component) - (let ((output nil)) - (labels ((compute-dfo-from (block) - (unless (or (component-exit-p block) (find block output)) - (dolist (successor (bblock-succ block)) - (unless (component-exit-p block) - (compute-dfo-from successor))) - (push block output)))) - (compute-dfo-from (unlist (bblock-succ (component-entry component)))) - (setf (component-blocks component) output))))) - -(defmacro do-blocks ((bblock component &optional result) &body body) - `(dolist (,bblock (compute-dfo ,component) ,result) +;;; Iterate across blocks in COMPONENT in reverse post order. +(defmacro do-blocks-backward ((block component &optional ends result) &body body) + `(do-blocks% (,block (reverse ,component) t ,ends ,result) ,@body)) -;;; IR Debugging + +(defun compute-dominators (component) + ;; Initialize the dominators of the entry to the component to be + ;; empty and the power set of the set of blocks for proper basic + ;; blocks in the component. + (let ((n (length (component-blocks component)))) + ;; The component entry special block has not predecessors in the + ;; set of (proper) basic blocks. + (setf (block-dominators% (component-entry component)) + (make-array n :element-type 'bit :initial-element 0)) + (setf (aref (block-dominators% (component-entry component)) 0) 1) + (do-blocks-forward (block component :tail) + (setf (block-dominators% block) (make-array n :element-type 'bit :initial-element 1)))) + ;; Iterate across the blocks in the component removing non domintors + ;; until it reaches a fixed point. + (do ((i 1 1) + (changes t)) + ((not changes)) + (setf changes nil) + (do-blocks-forward (block component :tail) + ;; We compute the new set of dominators for this iteration in a + ;; fresh set NEW-DOMINATORS. So we do NOT modify the old + ;; dominators. It is important because the block could predeces + ;; itself. Indeed, it allows us to check if the set of + ;; dominators changed. + (let* ((predecessors (block-pred block)) + (new-dominators (copy-seq (block-dominators% (first predecessors))))) + (dolist (pred (rest predecessors)) + (bit-and new-dominators (block-dominators% pred) t)) + (setf (aref new-dominators i) 1) + (unless changes + (setq changes (not (equal (block-dominators% block) new-dominators)))) + (setf (block-dominators% block) new-dominators) + (incf i))))) + +;;; Return T if BLOCK1 dominates BLOCK2, else return NIL. +(defun dominate-p (block1 block2) + (let ((order (block-order block1))) + (= 1 (aref (block-dominators% block2) order)))) + +;;; Check if BLOCK is a loop header. It is to say if it dominates one +;;; of its predecessors. +(defun loop-header-p (block) + (some (lambda (pred) (dominate-p block pred)) + (block-pred block))) + +;;; This function duplicates the block in component for each input +;;; edge. A technique useful to make a general flowgraph reducible. +(defun node-splitting (block) + (let ((predecessors (block-pred block))) + (when predecessors + (setf (block-pred block) (list (car predecessors))) + (dolist (pred (cdr predecessors)) + (let ((newblock (copy-basic-block block))) + (setf (block-id newblock) (generate-id 'basic-block)) + (push newblock (component-blocks (block-component block))) + (setf (block-pred newblock) (list pred)) + (setf (block-succ pred) (substitute newblock block (block-succ pred)))))))) + + + +;;;; IR Debugging +;;;; +;;;; This section provides a function `/print' which write a textual +;;;; representation of a component to the standard output. Also, a +;;;; `/ir' macro is provided, which takes a form, convert it to IR and +;;;; then print the component as above. They are useful commands if +;;;; you are hacking the front-end of the compiler. +;;;; + +(defun format-block-name (block) + (cond + ((eq block (unlist (block-succ (component-entry (block-component block))))) + (format nil "ENTRY-~a" (component-id (block-component block)))) + ((component-exit-p block) + (format nil "EXIT-~a" (component-id (block-component block)))) + (t + (format nil "BLOCK ~a" (block-id block))))) + (defun print-node (node) (when (node-lvar node) - (format t "~a = " (lvar-id (node-lvar node)))) + (format t "$~a = " (lvar-id (node-lvar node)))) (cond ((ref-p node) (let ((leaf (ref-leaf node))) @@ -379,71 +904,103 @@ ((var-p leaf) (format t "~a" (var-name leaf))) ((constant-p leaf) - (format t "'~a" (constant-value leaf))) + (format t "'~s" (constant-value leaf))) ((functional-p leaf) - (format t "#" - (functional-name leaf) - (functional-entry-point leaf)))))) + (format t "#" (functional-name leaf)))))) ((assignment-p node) - (format t "set ~a ~a" + (format t "set ~a $~a" (var-name (assignment-variable node)) (lvar-id (assignment-value node)))) + ((primitive-call-p node) + (format t "primitive ~a" (primitive-name (primitive-call-function node))) + (dolist (arg (primitive-call-arguments node)) + (format t " $~a" (lvar-id arg)))) ((call-p node) - (format t "call ~a" (lvar-id (call-function node))) + (format t "call $~a" (lvar-id (call-function node))) (dolist (arg (call-arguments node)) - (format t " ~a" (lvar-id arg)))) + (format t " $~a" (lvar-id arg)))) ((conditional-p node) - (format t "if ~a ~a ~a" + (format t "if $~a then ~a else ~a~%" (lvar-id (conditional-test node)) - (bblock-id (conditional-consequent node)) - (bblock-id (conditional-alternative node)))) + (format-block-name (conditional-consequent node)) + (format-block-name (conditional-alternative node)))) (t (error "`print-node' does not support printing ~S as a node." node))) (terpri)) -(defun print-bblock (block) - (flet ((bblock-name (block) - (cond - ((and (singlep (bblock-pred block)) - (component-entry-p (bblock-pred block))) - "ENTRY") - ((component-exit-p block) - "EXIT") - (t (string (bblock-id block)))))) - (format t "BLOCK ~a:~%" (bblock-name block)) - (do-nodes (node block) - (print-node node)) - (when (singlep (bblock-succ block)) - (format t "GO ~a~%" (bblock-name (first (bblock-succ block))))) - (terpri))) - -(defun print-component (component &optional (stream *standard-output*)) +(defun print-block (block) + (write-string (format-block-name block)) + (if (loop-header-p block) + (write-line " [LOOP_HEADER]") + (terpri)) + (do-nodes (node block) + (print-node node)) + (when (singlep (block-succ block)) + (format t "GO ~a~%~%" (format-block-name (unlist (block-succ block)))))) + +(defun /print (component &optional (stream *standard-output*)) + (format t ";;; COMPONENT ~a (~a) ~%~%" (component-name component) (component-id component)) (let ((*standard-output* stream)) - (do-blocks (block component) - (print-bblock block)))) - -(defun check-ir-consistency (&optional (component *component*)) - (with-simple-restart (continue "Continue execution") - (do-blocks (block component) - (dolist (succ (bblock-succ block)) - (unless (find block (bblock-pred succ)) - (error "The block `~S' does not belong to the predecessors list of the its successor `~S'" - (bblock-id block) - (bblock-id succ)))) - (dolist (pred (bblock-pred block)) - (unless (find block (bblock-succ pred)) - (error "The block `~S' does not belong to the successors' list of its predecessor `~S'" - (bblock-id block) - (bblock-id pred))))))) + (do-blocks-forward (block component) + (print-block block))) + (format t ";;; END COMPONENT ~a ~%~%" (component-name component)) + (let ((*standard-output* stream)) + (dolist (func (component-functions component)) + (/print (functional-component func))))) ;;; Translate FORM into IR and print a textual repreresentation of the ;;; component. -(defun describe-ir (form) - (with-component-compilation - (ir-convert form (bblock-exit *bblock*) (make-lvar :id "$out")) - (finish-component) - (check-ir-consistency) - (print-component *component*))) +(defun convert-toplevel-and-print (form) + (let ((*counter-alist* nil)) + (with-component-compilation ('toplevel) + (ir-convert form (make-lvar :id "out")) + (ir-normalize) + (compute-reverse-post-order *component*) + (compute-dominators *component*) + (/print *component*) + *component*))) + +(defmacro /ir (form) + `(convert-toplevel-and-print ',form)) + + + +;;;; Primitives +;;;; +;;;; Primitive functions are a set of functions provided by the +;;;; compiler. They cannot usually be written in terms of other +;;;; functions. When the compiler tries to compile a function call, it +;;;; looks for a primitive function firstly, and if it is found and +;;;; the declarations allow it, a primitive call is inserted in the +;;;; IR. The back-end of the compiler knows how to compile primitive +;;;; calls. +;;;; + +(defvar *primitive-function-table* nil) + +(defstruct primitive + name) + +(defmacro define-primitive (name args &body body) + (declare (ignore args body)) + `(push (make-primitive :name ',name) + *primitive-function-table*)) + +(defun find-primitive (name) + (find name *primitive-function-table* :key #'primitive-name)) + +(define-primitive symbol-function (symbol)) +(define-primitive symbol-value (symbol)) +(define-primitive set (symbol value)) +(define-primitive fset (symbol value)) + +(define-primitive + (&rest numbers)) +(define-primitive - (number &rest other-numbers)) + +(define-primitive consp (x)) +(define-primitive cons (x y)) +(define-primitive car (x)) +(define-primitive cdr (x)) ;;; compiler.lisp ends here