From: David Vázquez Date: Fri, 10 May 2013 18:16:59 +0000 (+0100) Subject: Beginnings of the new compiler X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=b1e6df4cc0e0c5162cc1c85a7977ae6f669b7a5f;p=jscl.git Beginnings of the new compiler WIP: Intermediate representation (IR) data structures and conversion --- diff --git a/experimental/compiler.lisp b/experimental/compiler.lisp new file mode 100644 index 0000000..df40867 --- /dev/null +++ b/experimental/compiler.lisp @@ -0,0 +1,449 @@ +;;; compiler.lisp --- + +;; copyright (C) 2013 David Vazquez + +;; JSCL is free software: you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation, either version 3 of the +;; License, or (at your option) any later version. +;; +;; JSCL is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with JSCL. If not, see . + +(defpackage :jscl + (:use :cl)) + +(in-package :jscl) + +;;;; Utils + +(defun singlep (x) + (and (consp x) (null (cdr x)))) + +(defun unlist (x) + (assert (singlep x)) + (first x)) + +;;;; Intermediate representation +;;;; +;;;; This intermediate representation (IR) is a simplified version of +;;;; first intermediate representation what you will find if you have +;;;; a you have the source code of SBCL. Some terminology is also +;;;; used, but other is changed, so be careful if you assume you know +;;;; what it is because you know the name. +;;;; + +;;; A leaf stands for leaf in the tree of computations. Lexical +;;; variables, constants and literal functions are leafs. Leafs are +;;; not nodes itself, a `ref' node will stands for putting a leaf into +;;; a lvar, which can be used in computations. +(defstruct leaf) + +;;; Reference a lexical variable. Special variables have not a +;;; representation in IR. They are handled via the primitive functions +;;; `%symbol-function' and `%symbol-value'. +(defstruct (var (:include leaf)) + ;; Name is the symbol used to identify this variable in the lexical + ;; environment. + name) + +;;; A constant value, mostly from a quoted form, but maybe introduced +;;; in some pass of the compiler. +(defstruct (constant (:include leaf)) + value) + +;;; A literal function. Why do we use `functional' as name? Well, +;;; function is taken, isn't it? +(defstruct (functional (:include leaf)) + ;; The symbol which names this function in the source code. + name + ;; A list of lvars which are bound to the argument values in a call + ;; to this function. + arguments + ;; LVAR which contains the return values of the function. + return-lvar + ;; The basic block which contain the code which be executed firstly + ;; when you call this function. + entry-point) + + +;;; Used to transfer data between the computations in the intermediate +;;; representation. Each node is valued into a LVar. And nodes which +;;; use resulting values from other nodes use such LVar. +(defstruct lvar + (id (gensym "$"))) + +;;; A computation node. It represents a simple computation in the +;;; intermediate representation. Nodes are grouped in basic blocks, +;;; which are delimited by the special nodes `block-entry' and +;;; `block-exit'. Resulting value of the node is stored in LVAR, which it +;;; could be null if the value is discarded. +(defstruct node + next + prev + lvar) + +;;; Sentinel nodes. No computation really, but they make easier to +;;; manipulate the doubly linked-list. +(defstruct (block-entry (:include node))) +(defstruct (block-exit (:include node))) + +;;; A reference to a leaf. +(defstruct (ref (:include node)) + leaf) + +;;; An assignation of the LVAR VALUE into the var VARIABLE. +(defstruct (assignment (:include node)) + variable + value) + +;;; Call the lvar FUNCTION with a list of lvars as ARGUMENTS. +(defstruct (call (:include node)) + function + arguments) + +;;; A conditional branch. If the LVAR is not NIL, then we will jump to +;;; the basic block CONSEQUENT, jumping to ALTERNATIVE otherwise. By +;;; definition, a conditional must appear at the end of a basic block. +(defstruct (conditional (:include node)) + test + consequent + alternative) + + +;;; BBlock stands for `basic block', which is a maximal sequence of +;;; nodes with an entry point and an exit. Basic blocks are organized +;;; as a control flow graph with some more information in omponents. +(defstruct bblock + (id (gensym "L")) + succ + pred + entry + exit) + +(defstruct (component-entry (:include bblock))) +(defstruct (component-exit (:include bblock))) + +(defun make-empty-bblock () + (let ((entry (make-block-entry)) + (exit (make-block-exit))) + (setf (node-next entry) exit + (node-prev exit) entry) + (make-bblock :entry entry :exit exit))) + +(defun empty-block-p (b) + (block-exit-p (node-next (bblock-entry b)))) + +(defmacro do-nodes ((node block &optional result) &body body) + (check-type node symbol) + `(do ((,node (node-next (bblock-entry ,block)) (node-next ,node))) + ((block-exit-p ,node) ,result) + ,@body)) + +;;; Link FROM and TO nodes together. FROM and TO must belong to the +;;; same basic block and appear in such order. The nodes between FROM +;;; and TO are discarded. +(defun link-nodes (from to) + (setf (node-next from) to + (node-prev to) from) + (values)) + +;;; Insert NODE before NEXT. +(defun insert-node-before (next node) + (link-nodes (node-prev next) node) + (link-nodes node next)) + + +;;; Components are connected pieces of the control flow graph with +;;; some additional information. Components have well-defined entry +;;; and exit nodes. They also track what basic blocks we have and +;;; other useful information. It is the toplevel organizational entity +;;; in the compiler. The IR translation result is accumulated into +;;; components incrementally. +(defstruct (component + #-jscl (:print-object print-component)) + entry + exit + blocks) + +;;; Create a new component, compromised of the sentinel nodes and a +;;; empty basic block, ready to start conversion to IR. It returnes +;;; the component and the basic block as multiple values. +(defun make-empty-component () + (let ((entry (make-component-entry)) + (bblock (make-empty-bblock)) + (exit (make-component-exit))) + (setf (bblock-succ entry) (list bblock) + (bblock-pred exit) (list bblock) + (bblock-succ bblock) (list exit) + (bblock-pred bblock) (list entry)) + (values (make-component :entry entry :exit exit) bblock))) + +;;; Delete an empty block. It is the same as a jump to an +;;; uncondiditonal jump. +(defun delete-empty-block (block) + (when (or (component-entry-p block) (component-exit-p block)) + (error "Cannot delete entry or exit basic blocks.")) + (unless (empty-block-p block) + (error "Block `~S' is not empty!" (bblock-id block))) + (assert (singlep (bblock-succ block))) + (let ((successor (first (bblock-succ block)))) + (dolist (pred (bblock-pred block)) + (setf (bblock-succ pred) + (substitute successor block (bblock-succ pred))) + (pushnew pred (bblock-pred successor))))) + +(defun finish-component (&optional (component *component*)) + (dolist (blk (bblock-pred (component-exit component))) + (when (empty-block-p blk) + (delete-empty-block blk)))) + +;;; IR Translation + +;;; The current component. We accumulate the results of the IR +;;; conversion in this component. +(defvar *component*) + +;;; The current block in the current component. IR conversion usually +;;; append nodes to this block. Branching instructions will modify +;;; this variable. +(defvar *bblock*) + +;;; Prepare a new component with a current empty content block ready +;;; to start IR conversion. Then BODY is evaluated and the value of +;;; the last form is returned. +(defmacro with-component-compilation (&body body) + `(multiple-value-bind (*component* *bblock*) + (make-empty-component) + ,@body)) + +;;; The Lexical environment is compromised of a list of bindings, +;;; which associates information to symbols. It tracks lexical +;;; variables, tags, local declarations and many other information in +;;; order to guide the compiler. +(defstruct binding + name type value declarations) + +(defstruct lexenv + bindings) + +;;; A alist of IR translator functions. +(defvar *ir-translator* nil) + +;;; Define a IR translator for NAME. +(defmacro define-ir-translator (name (next result) lambda-list &body body) + (let ((fname (intern (format nil "IR-CONVERT-~a" (string name)))) + (form (gensym))) + (check-type name symbol) + (check-type next symbol) + `(progn + (defun ,fname (,form ,next ,result) + (destructuring-bind ,lambda-list ,form + ,@body)) + (push (cons ',name #',fname) *ir-translator*)))) + + +(defun ir-convert-constant (form next result) + (let* ((leaf (make-constant :value form)) + (ref (make-ref :leaf leaf :lvar result))) + (insert-node-before next ref))) + +(define-ir-translator quote (next result) (form) + (ir-convert-constant form next result)) + +(define-ir-translator setq (next result) (variable value) + (let ((var (make-var :name variable)) + (value-lvar (make-lvar))) + (ir-convert value next value-lvar) + (let ((assign (make-assignment :variable var :value value-lvar :lvar result))) + (insert-node-before next assign)))) + +;;; Split BLOCK in two basic blocks. BLOCK ends just before BLOCK. A +;;; new block is created starting at NODE until the exit of the +;;; original block. The successors of BLOCK become the successors of +;;; the new block. +(defun split-basic-block-before (node block) + (let ((exit (node-prev (bblock-exit block))) + (newexit (make-block-exit)) + (newentry (make-block-entry)) + newblock) + (insert-node-before node newentry) + (insert-node-before newentry newexit) + (setf (node-next newexit) nil) + (setf (node-prev newentry) nil) + (setf (bblock-exit block) newexit) + (setq newblock (make-bblock :entry newentry :exit exit)) + (rotatef (bblock-succ block) (bblock-succ newblock)) + newblock)) + +(define-ir-translator if (next result) (test then &optional else) + (let ((test-lvar (make-lvar)) + (then-block (make-empty-bblock)) + (else-block (make-empty-bblock)) + (join-block (make-empty-bblock))) + ;; Convert the test into the current basic block. + (ir-convert test next test-lvar) + (let ((cond (make-conditional :test test-lvar :consequent then-block :alternative else-block))) + (insert-node-before next cond)) + ;; If we are not at the end of the content block, split it. + (unless (block-exit-p next) + (setq join-block (split-basic-block-before next *bblock*))) + (dolist (succ (bblock-succ *bblock*)) + (setf (bblock-pred succ) (substitute join-block *bblock* (bblock-pred succ)))) + (psetf (bblock-succ *bblock*) (list then-block else-block) + (bblock-pred else-block) (list *bblock*) + (bblock-pred then-block) (list *bblock*) + (bblock-succ then-block) (list join-block) + (bblock-succ else-block) (list join-block) + (bblock-pred join-block) (list then-block else-block) + (bblock-succ join-block) (bblock-succ *bblock*)) + (let ((*bblock* then-block)) + (ir-convert then (bblock-exit then-block) result)) + (let ((*bblock* else-block)) + (ir-convert else (bblock-exit else-block) result)) + (setq *bblock* join-block))) + + +(defun ir-convert-var (form next result) + (let* ((leaf (make-var :name form)) + (ref (make-ref :leaf leaf :lvar result))) + (insert-node-before next ref))) + +(defun ir-convert-call (form next result) + (destructuring-bind (function &rest args) form + (let ((func-lvar (make-lvar)) + (args-lvars nil)) + (when (symbolp function) + (ir-convert `(%symbol-function ,function) next func-lvar)) + (dolist (arg args) + (push (make-lvar) args-lvars) + (ir-convert arg next (first args-lvars))) + (setq args-lvars (reverse args-lvars)) + (let ((call (make-call :function func-lvar :arguments args-lvars :lvar result))) + (insert-node-before next call))))) + + +;;; Convert the Lisp expression FORM into IR before the NEXT node, it +;;; may create new basic blocks into the current component. During the +;;; initial IR conversion, The NEXT node is the EXIT node of the +;;; current basic block, but optimizations could call it to insert IR +;;; code somewhere. +(defun ir-convert (form next result) + (when (block-entry-p next) + (error "Can't insert IR before the entry node.")) + (cond + ((atom form) + (cond + ((symbolp form) + (ir-convert-var form next result)) + (t + (ir-convert-constant form next result)))) + (t + (destructuring-bind (op &rest args) form + (let ((translator (cdr (assoc op *ir-translator*)))) + (if translator + (funcall translator args next result) + (ir-convert-call form next result)))))) + (values)) + +(defun compute-dfo (component) + (or (component-blocks component) + (let ((output nil)) + (labels ((compute-dfo-from (block) + (unless (or (component-exit-p block) (find block output)) + (dolist (successor (bblock-succ block)) + (unless (component-exit-p block) + (compute-dfo-from successor))) + (push block output)))) + (compute-dfo-from (unlist (bblock-succ (component-entry component)))) + (setf (component-blocks component) output))))) + +(defmacro do-blocks ((bblock component &optional result) &body body) + `(dolist (,bblock (compute-dfo ,component) ,result) + ,@body)) + +;;; IR Debugging + +(defun print-node (node) + (when (node-lvar node) + (format t "~a = " (lvar-id (node-lvar node)))) + (cond + ((ref-p node) + (let ((leaf (ref-leaf node))) + (cond + ((var-p leaf) + (format t "~a" (var-name leaf))) + ((constant-p leaf) + (format t "'~a" (constant-value leaf))) + ((functional-p leaf) + (format t "#" + (functional-name leaf) + (functional-entry-point leaf)))))) + ((assignment-p node) + (format t "set ~a ~a" + (var-name (assignment-variable node)) + (lvar-id (assignment-value node)))) + ((call-p node) + (format t "call ~a" (lvar-id (call-function node))) + (dolist (arg (call-arguments node)) + (format t " ~a" (lvar-id arg)))) + ((conditional-p node) + (format t "if ~a ~a ~a" + (lvar-id (conditional-test node)) + (bblock-id (conditional-consequent node)) + (bblock-id (conditional-alternative node)))) + (t + (error "`print-node' does not support printing ~S as a node." node))) + (terpri)) + +(defun print-bblock (block) + (flet ((bblock-name (block) + (cond + ((and (singlep (bblock-pred block)) + (component-entry-p (bblock-pred block))) + "ENTRY") + ((component-exit-p block) + "EXIT") + (t (string (bblock-id block)))))) + (format t "BLOCK ~a:~%" (bblock-name block)) + (do-nodes (node block) + (print-node node)) + (when (singlep (bblock-succ block)) + (format t "GO ~a~%" (bblock-name (first (bblock-succ block))))) + (terpri))) + +(defun print-component (component &optional (stream *standard-output*)) + (let ((*standard-output* stream)) + (do-blocks (block component) + (print-bblock block)))) + +(defun check-ir-consistency (&optional (component *component*)) + (with-simple-restart (continue "Continue execution") + (do-blocks (block component) + (dolist (succ (bblock-succ block)) + (unless (find block (bblock-pred succ)) + (error "The block `~S' does not belong to the predecessors list of the its successor `~S'" + (bblock-id block) + (bblock-id succ)))) + (dolist (pred (bblock-pred block)) + (unless (find block (bblock-succ pred)) + (error "The block `~S' does not belong to the successors' list of its predecessor `~S'" + (bblock-id block) + (bblock-id pred))))))) + +;;; Translate FORM into IR and print a textual repreresentation of the +;;; component. +(defun describe-ir (form) + (with-component-compilation + (ir-convert form (bblock-exit *bblock*) (make-lvar :id "$out")) + (finish-component) + (check-ir-consistency) + (print-component *component*))) + + +;;; compiler.lisp ends here