X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fread.lisp;h=6e9eb68cc751bcb7e6d6c411e08e43bbfcdc061b;hb=5bccb7e18630391bfb2e65a39627899daf4d2edf;hp=d642be0c68d24e2724690f5d3dafb7c769a91c21;hpb=b8ba637d2f859227622422089345d29a3c46c3f7;p=jscl.git diff --git a/src/read.lisp b/src/read.lisp index d642be0..6e9eb68 100644 --- a/src/read.lisp +++ b/src/read.lisp @@ -3,25 +3,73 @@ ;; Copyright (C) 2012, 2013 David Vazquez ;; Copyright (C) 2012 Raimon Grau -;; This program is free software: you can redistribute it and/or +;; JSCL is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; -;; This program is distributed in the hope that it will be useful, but +;; JSCL is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License -;; along with this program. If not, see . +;; along with JSCL. If not, see . +(/debug "loading read.lisp!") ;;;; Reader ;;; The Lisp reader, parse strings and return Lisp objects. The main ;;; entry points are `ls-read' and `ls-read-from-string'. +;;; #= / ## implementation + +;; For now associations label->object are kept in a plist +;; May be it makes sense to use a vector instead if speed +;; is considered a problem with many labelled objects +(defvar *labelled-objects* nil) + +(defun new-labelled-objects-table () + (setf *labelled-objects* nil)) + +(defun find-labelled-object (id) + (assoc id *labelled-objects*)) + +(defun add-labelled-object (id value) + (push (cons id value) *labelled-objects*)) + +;; A unique value used to mark in the labelled objects +;; table an object that is being constructed +;; (e.g. #1# while reading elements of "#1=(#1# #1# #1#)") +(defvar *future-value* (make-symbol "future")) + +;; A unique value used to mark temporary values that will +;; be replaced when fixups are run. +(defvar *fixup-value* (make-symbol "fixup")) + +;; Fixup locations keeps a list of conses where the CAR +;; is a callable to be called with the value of the object +;; associated to label stored in CDR once reading is completed +(defvar *fixup-locations* nil) + +(defun fixup-backrefs () + (while *fixup-locations* + (let* ((fixup (pop *fixup-locations*)) + (callable (car fixup)) + (cell (find-labelled-object (cdr fixup)))) + (if cell + (funcall callable (cdr cell)) + (error "Internal error in fixup-backrefs: object #~S# not found" + (cdr fixup)))))) + +;; A function that will need to return a fixup callback +;; for the object that is being read. The returned callback will +;; be called with the result of reading. +(defvar *make-fixup-function* + (lambda () + (error "Internal error in fixup creation during read"))) + (defun make-string-stream (string) (cons string 0)) @@ -45,7 +93,7 @@ (setq ch (%peek-char stream))))) (defun terminalp (ch) - (or (null ch) (whitespacep ch) (char= #\) ch) (char= #\( ch))) + (or (null ch) (whitespacep ch) (char= #\" ch) (char= #\) ch) (char= #\( ch))) (defun read-until (stream func) (let ((string "") @@ -57,6 +105,28 @@ (setq ch (%peek-char stream))) string)) +(defun read-escaped-until (stream func) + (let ((string "") + (ch (%peek-char stream)) + (multi-escape nil)) + (while (and ch (or multi-escape (not (funcall func ch)))) + (cond + ((char= ch #\|) + (if multi-escape + (setf multi-escape nil) + (setf multi-escape t))) + ((char= ch #\\) + (%read-char stream) + (setf ch (%peek-char stream)) + (setf string (concat string "\\" (string ch)))) + (t + (if multi-escape + (setf string (concat string "\\" (string ch))) + (setf string (concat string (string ch)))))) + (%read-char stream) + (setf ch (%peek-char stream))) + string)) + (defun skip-whitespaces-and-comments (stream) (let (ch) (skip-whitespaces stream) @@ -66,23 +136,55 @@ (skip-whitespaces stream) (setq ch (%peek-char stream))))) -(defun %read-list (stream) +(defun discard-char (stream expected) + (let ((ch (%read-char stream))) + (when (null ch) + (error "End of file when character ~S was expected." expected)) + (unless (char= ch expected) + (error "Character ~S was found but ~S was expected." ch expected)))) + +(defun %read-list (stream &optional (eof-error-p t) eof-value) (skip-whitespaces-and-comments stream) (let ((ch (%peek-char stream))) (cond ((null ch) (error "Unspected EOF")) ((char= ch #\)) - (%read-char stream) + (discard-char stream #\)) nil) - ((char= ch #\.) - (%read-char stream) - (prog1 (ls-read-1 stream) - (skip-whitespaces-and-comments stream) - (unless (char= (%read-char stream) #\)) - (error "')' was expected.")))) (t - (cons (ls-read-1 stream) (%read-list stream)))))) + (let* ((cell (cons nil nil)) + (*make-fixup-function* (lambda () + (lambda (obj) + (rplaca cell obj)))) + (eof (gensym)) + (next (ls-read stream nil eof t))) + (rplaca cell next) + (skip-whitespaces-and-comments stream) + (cond + ((eq next eof) + (discard-char stream #\)) + nil) + (t + (if (char= (%peek-char stream) #\.) + (progn + (discard-char stream #\.) + (if (terminalp (%peek-char stream)) + (let ((*make-fixup-function* (lambda () + (lambda (obj) + (rplacd cell obj))))) + ;; Dotted pair notation + (rplacd cell (ls-read stream eof-error-p eof-value t)) + (skip-whitespaces-and-comments stream) + (let ((ch (%peek-char stream))) + (if (or (null ch) (char= #\) ch)) + (discard-char stream #\)) + (error "Multiple objects following . in a list")))) + (let ((token (concat "." (read-escaped-until stream #'terminalp)))) + (rplacd cell (cons (interpret-token token) + (%read-list stream eof-error-p eof-value)))))) + (rplacd cell (%read-list stream eof-error-p eof-value))) + cell))))))) (defun read-string (stream) (let ((string "") @@ -97,32 +199,122 @@ (setq ch (%read-char stream))) string)) -(defun read-sharp (stream) +(defun read-sharp (stream &optional eof-error-p eof-value) (%read-char stream) - (ecase (%read-char stream) - (#\' - (list 'function (ls-read-1 stream))) - (#\( (list-to-vector (%read-list stream))) - (#\: (make-symbol (string-upcase (read-until stream #'terminalp)))) - (#\\ - (let ((cname - (concat (string (%read-char stream)) - (read-until stream #'terminalp)))) - (cond - ((string= cname "space") (char-code #\space)) - ((string= cname "tab") (char-code #\tab)) - ((string= cname "newline") (char-code #\newline)) - (t (char-code (char cname 0)))))) - (#\+ - (let ((feature (read-until stream #'terminalp))) + (let ((ch (%read-char stream))) + (case ch + (#\' + (list 'function (ls-read stream eof-error-p eof-value t))) + (#\( + (do ((elements nil) + (result nil) + (index 0 (1+ index))) + ((progn (skip-whitespaces-and-comments stream) + (or (null (%peek-char stream)) + (char= (%peek-char stream) #\)))) + (discard-char stream #\)) + (setf result (make-array index)) + (dotimes (i index) + (aset result (decf index) (pop elements))) + result) + (let* ((ix index) ; Can't just use index: the same var would be captured in all fixups + (*make-fixup-function* (lambda () + (lambda (obj) + (aset result ix obj)))) + (eof (gensym)) + (value (ls-read stream nil eof t))) + (push value elements)))) + (#\: + (make-symbol + (unescape-token + (string-upcase-noescaped + (read-escaped-until stream #'terminalp))))) + (#\\ + (let ((cname + (concat (string (%read-char stream)) + (read-until stream #'terminalp)))) + (cond + ((string= cname "space") #\space) + ((string= cname "tab") #\tab) + ((string= cname "newline") #\newline) + (t (char cname 0))))) + ((#\+ #\-) + (let ((feature (let ((symbol (ls-read stream eof-error-p eof-value t))) + (unless (symbolp symbol) + (error "Invalid feature ~S" symbol)) + (intern (string symbol) "KEYWORD")))) + (if (eql (char= ch #\+) + (and (find feature *features*) t)) + (ls-read stream eof-error-p eof-value t) + (prog2 (ls-read stream) + (ls-read stream eof-error-p eof-value t))))) + ((#\J #\j) + (unless (char= (%peek-char stream) #\:) + (error "FFI descriptor must start with a semicolon.")) + (let ((descriptor (subseq (read-until stream #'terminalp) 1)) + (subdescriptors nil)) + (do* ((start 0 (1+ end)) + (end (position #\: descriptor :start start) + (position #\: descriptor :start start))) + ((null end) + (push (subseq descriptor start) subdescriptors) + `(oget *root* ,@(reverse subdescriptors))) + (push (subseq descriptor start end) subdescriptors)))) + (otherwise (cond - ((string= feature "common-lisp") - (ls-read-1 stream) ;ignore - (ls-read-1 stream)) - ((string= feature "ecmalisp") - (ls-read-1 stream)) + ((and ch (digit-char-p ch)) + (let ((id (digit-char-p ch))) + (while (and (%peek-char stream) + (digit-char-p (%peek-char stream))) + (setf id (+ (* id 10) (digit-char-p (%read-char stream))))) + (ecase (%peek-char stream) + (#\= + (%read-char stream) + (if (find-labelled-object id) + (error "Duplicated label #~S=" id) + (progn + (add-labelled-object id *future-value*) + (let ((obj (ls-read stream eof-error-p eof-value t))) + ;; FIXME: somehow the more natural + ;; (setf (cdr (find-labelled-object id)) obj) + ;; doesn't work + (rplacd (find-labelled-object id) obj) + obj)))) + (#\# + (%read-char stream) + (let ((cell (find-labelled-object id))) + (if cell + (if (eq (cdr cell) *future-value*) + (progn + (push (cons (funcall *make-fixup-function*) + id) + *fixup-locations*) + *fixup-value*) + (cdr cell)) + (error "Invalid labelled object #~S#" id))))))) (t - (error "Unknown reader form."))))))) + (error "Invalid dispatch character after #"))))))) + +(defun unescape-token (x) + (let ((result "")) + (dotimes (i (length x)) + (unless (char= (char x i) #\\) + (setq result (concat result (string (char x i)))))) + result)) + +(defun string-upcase-noescaped (s) + (let ((result "") + (last-escape nil)) + (dotimes (i (length s)) + (let ((ch (char s i))) + (if last-escape + (progn + (setf last-escape nil) + (setf result (concat result (string ch)))) + (if (char= ch #\\) + (setf last-escape t) + (setf result (concat result (string-upcase (string ch)))))))) + result)) ;;; Parse a string of the form NAME, PACKAGE:NAME or ;;; PACKAGE::NAME and return the name. If the string is of the @@ -134,34 +326,57 @@ (setq index 0) (while (and (< index size) (not (char= (char string index) #\:))) + (when (char= (char string index) #\\) + (incf index)) (incf index)) (cond ;; No package prefix ((= index size) (setq name string) - (setq package *package*) + (setq package (package-name *package*)) (setq internalp t)) (t ;; Package prefix (if (zerop index) (setq package "KEYWORD") - (setq package (string-upcase (subseq string 0 index)))) + (setq package (string-upcase-noescaped (subseq string 0 index)))) (incf index) (when (char= (char string index) #\:) (setq internalp t) (incf index)) (setq name (subseq string index)))) ;; Canonalize symbol name and package - (when (not (eq package "JS")) - (setq name (string-upcase name))) + (setq name (if (string= package "JS") + (setq name (unescape-token name)) + (setq name (string-upcase-noescaped name)))) (setq package (find-package package)) - ;; TODO: PACKAGE:SYMBOL should signal error if SYMBOL is not an - ;; external symbol from PACKAGE. (if (or internalp (eq package (find-package "KEYWORD")) (eq package (find-package "JS"))) (intern name package) - (find-symbol name package)))) + (multiple-value-bind (symbol external) + (find-symbol name package) + (if (eq external :external) + symbol + (error "The symbol `~S' is not external in the package ~S." name package)))))) + +(defun read-integer (string) + (let ((sign 1) + (number nil) + (size (length string))) + (dotimes (i size) + (let ((elt (char string i))) + (cond + ((digit-char-p elt) + (setq number (+ (* (or number 0) 10) (digit-char-p elt)))) + ((zerop i) + (case elt + (#\+ nil) + (#\- (setq sign -1)) + (t (return-from read-integer)))) + ((and (= i (1- size)) (char= elt #\.)) nil) + (t (return-from read-integer))))) + (and number (* sign number)))) (defun read-float (string) (block nil @@ -182,13 +397,12 @@ (incf index))) (unless (< index size) (return)) ;; Optional integer part - (let ((value (digit-char-p (char string index)))) - (when value - (setq integer-part t) - (while (and (< index size) - (setq value (digit-char-p (char string index)))) - (setq number (+ (* number 10) value)) - (incf index)))) + (awhen (digit-char-p (char string index)) + (setq integer-part t) + (while (and (< index size) + (setq it (digit-char-p (char string index)))) + (setq number (+ (* number 10) it)) + (incf index))) (unless (< index size) (return)) ;; Decimal point is mandatory if there's no integer part (unless (or integer-part (char= #\. (char string index))) (return)) @@ -196,14 +410,13 @@ (when (char= #\. (char string index)) (incf index) (unless (< index size) (return)) - (let ((value (digit-char-p (char string index)))) - (when value - (setq fractional-part t) - (while (and (< index size) - (setq value (digit-char-p (char string index)))) - (setq number (+ (* number 10) value)) - (setq divisor (* divisor 10)) - (incf index))))) + (awhen (digit-char-p (char string index)) + (setq fractional-part t) + (while (and (< index size) + (setq it (digit-char-p (char string index)))) + (setq number (+ (* number 10) it)) + (setq divisor (* divisor 10)) + (incf index)))) ;; Either left or right part of the dot must be present (unless (or integer-part fractional-part) (return)) ;; Exponent is mandatory if there is no fractional part @@ -211,8 +424,7 @@ ;; Optional exponent part (when (< index size) ;; Exponent-marker - (unless (member (string-upcase (string (char string index))) - '("E" "S" "F" "D" "L")) + (unless (find (char-upcase (char string index)) "ESFDL") (return)) (incf index) (unless (< index size) (return)) @@ -231,8 +443,8 @@ (incf index)))) (unless (= index size) (return)) ;; Everything went ok, we have a float - (/ (* sign (expt 10 (* exponent-sign exponent)) number) divisor)))) - + ;; XXX: Use FLOAT when implemented. + (/ (* sign (expt 10.0 (* exponent-sign exponent)) number) divisor 1.0)))) (defun !parse-integer (string junk-allow) (block nil @@ -270,51 +482,66 @@ (values (* sign value) index) (values nil index))))) -#+ecmalisp +#+jscl (defun parse-integer (string &key junk-allowed) (multiple-value-bind (num index) (!parse-integer string junk-allowed) - (when num - (values num index) - (error "junk detected.")))) + (if num + (values num index) + (error "Junk detected.")))) -(defvar *eof* (gensym)) -(defun ls-read-1 (stream) - (skip-whitespaces-and-comments stream) - (let ((ch (%peek-char stream))) - (cond - ((or (null ch) (char= ch #\))) - *eof*) - ((char= ch #\() - (%read-char stream) - (%read-list stream)) - ((char= ch #\') - (%read-char stream) - (list 'quote (ls-read-1 stream))) - ((char= ch #\`) - (%read-char stream) - (list 'backquote (ls-read-1 stream))) - ((char= ch #\") - (%read-char stream) - (read-string stream)) - ((char= ch #\,) - (%read-char stream) - (if (eql (%peek-char stream) #\@) - (progn (%read-char stream) (list 'unquote-splicing (ls-read-1 stream))) - (list 'unquote (ls-read-1 stream)))) - ((char= ch #\#) - (read-sharp stream)) - (t - (let ((string (read-until stream #'terminalp))) - (or (values (!parse-integer string nil)) - (read-float string) - (read-symbol string))))))) -(defun ls-read (stream &optional (eof-error-p t) eof-value) - (let ((x (ls-read-1 stream))) - (if (eq x *eof*) - (if eof-error-p (error "EOF") eof-value) - x))) +(defun interpret-token (string) + (or (read-integer string) + (read-float string) + (read-symbol string))) + +(defun ls-read (stream &optional (eof-error-p t) eof-value recursive-p) + (let ((save-labelled-objects *labelled-objects*) + (save-fixup-locations *fixup-locations*)) + (unless recursive-p + (setf *fixup-locations* nil) + (setf *labelled-objects* (new-labelled-objects-table))) + (prog1 + (progn + (skip-whitespaces-and-comments stream) + (let ((ch (%peek-char stream))) + (cond + ((or (null ch) (char= ch #\))) + (if eof-error-p + (error "End of file") + eof-value)) + ((char= ch #\() + (%read-char stream) + (%read-list stream eof-error-p eof-value)) + ((char= ch #\') + (%read-char stream) + (list 'quote (ls-read stream eof-error-p eof-value t))) + ((char= ch #\`) + (%read-char stream) + (list 'backquote (ls-read stream eof-error-p eof-value t))) + ((char= ch #\") + (%read-char stream) + (read-string stream)) + ((char= ch #\,) + (%read-char stream) + (if (eql (%peek-char stream) #\@) + (progn (%read-char stream) (list 'unquote-splicing + (ls-read stream eof-error-p eof-value t))) + (list 'unquote (ls-read stream eof-error-p eof-value t)))) + ((char= ch #\#) + (read-sharp stream eof-error-p eof-value)) + (t + (let ((string (read-escaped-until stream #'terminalp))) + (interpret-token string)))))) + (unless recursive-p + (fixup-backrefs) + (setf *labelled-objects* save-labelled-objects) + (setf *fixup-locations* save-fixup-locations))))) (defun ls-read-from-string (string &optional (eof-error-p t) eof-value) (ls-read (make-string-stream string) eof-error-p eof-value)) + +#+jscl +(defun read-from-string (string &optional (eof-errorp t) eof-value) + (ls-read-from-string string eof-errorp eof-value))