From ed53de3c94faddfdc7447b3d61fef821c250e8d1 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Wed, 11 Nov 2009 17:10:40 +0000 Subject: [PATCH] 1.0.32.19: Support for :REPLACEMENT external-format / encoding modifier For a given keyword :foo naming an encoding, allow an external-format (:foo :replacement ) such that any stream or octet coding errors are automatically treated by using the as replacement instead. To do this, wrap each of the functions in the external-format object named by the keyword with a function establishing handlers for the exceptional conditions. At the moment, the output restarts for c-string external format conversion are not implemented (so handle specific condition types like STREAM-FOOCODING-ERROR, not general FOOCODING-ERROR). --- NEWS | 5 ++ package-data-list.lisp-expr | 2 +- src/code/deftypes-for-target.lisp | 5 ++ src/code/fd-stream.lisp | 122 +++++++++++++++++++++++++++++-------- src/code/octets.lisp | 7 +-- src/code/stream.lisp | 2 +- src/code/target-c-call.lisp | 1 - src/compiler/fndb.lisp | 6 +- version.lisp-expr | 2 +- 9 files changed, 116 insertions(+), 36 deletions(-) diff --git a/NEWS b/NEWS index 58ec040..4ebd920 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,11 @@ changes relative to sbcl-1.0.32: transformations. ** improvement: restarts for providing replacement input/output on coding errors for fd-stream external formats. + ** improvement: where : is a keyword corresponding to an + external format the system supports, it is now possible to specify + (: :replacement ) as an external format which will + automatically substitute on encoding or decoding errors for + streams and for STRING-TO-OCTETS and its inverse. ** minor incompatible change: the utf-8 external format now correctly refuses to encode Lisp characters in the surrogate range (char-codes between #xd800 and #xdfff). diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 01d4d37..aea5140 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1390,7 +1390,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "FLOAT-WAIT" "DYNAMIC-SPACE-FREE-POINTER" "DYNAMIC-USAGE" "EFFECTIVE-FIND-POSITION-TEST" "EFFECTIVE-FIND-POSITION-KEY" "ERROR-NUMBER-OR-LOSE" - "EXTENDED-CHAR-P" + "EXTENDED-CHAR-P" "EXTERNAL-FORMAT-DESIGNATOR" "FDEFINITION-OBJECT" "FDOCUMENTATION" "FILENAME" "FIND-AND-INIT-OR-CHECK-LAYOUT" diff --git a/src/code/deftypes-for-target.lisp b/src/code/deftypes-for-target.lisp index d26547f..aa62b99 100644 --- a/src/code/deftypes-for-target.lisp +++ b/src/code/deftypes-for-target.lisp @@ -167,6 +167,11 @@ ;;; a valid argument to a stream function (sb!xc:deftype stream-designator () '(or stream (member nil t))) +;;; something valid as the :EXTERNAL-FORMAT argument to OPEN, LOAD, +;;; COMPILE-FILE and friends. +(sb!xc:deftype external-format-designator () + '(or keyword (cons keyword))) + ;;; an object suitable for input to standard functions that accept ;;; "environment objects" (of the ANSI glossary) (sb!xc:deftype lexenv-designator () '(or lexenv null)) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 5daab69..534c42c 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -730,7 +730,7 @@ (position #\newline thing :from-end t :start start :end end)))) (if (and (typep thing 'base-string) - (eq (fd-stream-external-format stream) :latin-1)) + (eq (fd-stream-external-format-keyword stream) :latin-1)) (ecase (fd-stream-buffering stream) (:full (buffer-output stream thing start end)) @@ -755,28 +755,47 @@ (:constructor %make-external-format) (:conc-name ef-) (:predicate external-format-p) - (:copier nil)) + (:copier %copy-external-format)) ;; All the names that can refer to this external format. The first ;; one is the canonical name. (names (missing-arg) :type list :read-only t) - (read-n-chars-fun (missing-arg) :type function :read-only t) - (read-char-fun (missing-arg) :type function :read-only t) - (write-n-bytes-fun (missing-arg) :type function :read-only t) - (write-char-none-buffered-fun (missing-arg) :type function :read-only t) - (write-char-line-buffered-fun (missing-arg) :type function :read-only t) - (write-char-full-buffered-fun (missing-arg) :type function :read-only t) + (read-n-chars-fun (missing-arg) :type function) + (read-char-fun (missing-arg) :type function) + (write-n-bytes-fun (missing-arg) :type function) + (write-char-none-buffered-fun (missing-arg) :type function) + (write-char-line-buffered-fun (missing-arg) :type function) + (write-char-full-buffered-fun (missing-arg) :type function) ;; Can be nil for fixed-width formats. - (resync-fun nil :type (or function null) :read-only t) - (bytes-for-char-fun (missing-arg) :type function :read-only t) - (read-c-string-fun (missing-arg) :type function :read-only t) - (write-c-string-fun (missing-arg) :type function :read-only t) - ;; We make these symbols so that a developer working on the octets - ;; code can easily redefine things and use the new function definition - ;; without redefining the external format as well. The slots above - ;; are functions because a developer working with those slots would be + (resync-fun nil :type (or function null)) + (bytes-for-char-fun (missing-arg) :type function) + (read-c-string-fun (missing-arg) :type function) + (write-c-string-fun (missing-arg) :type function) + ;; We indirect through symbols in these functions so that a + ;; developer working on the octets code can easily redefine things + ;; and use the new function definition without redefining the + ;; external format as well. The slots above don't do any + ;; indirection because a developer working with those slots would be ;; redefining the external format anyway. - (octets-to-string-sym (missing-arg) :type symbol :read-only t) - (string-to-octets-sym (missing-arg) :type symbol :read-only t)) + (octets-to-string-fun (missing-arg) :type function) + (string-to-octets-fun (missing-arg) :type function)) + +(defun wrap-external-format-functions (external-format fun) + (let ((result (%copy-external-format external-format))) + (macrolet ((frob (accessor) + `(setf (,accessor result) (funcall fun (,accessor result))))) + (frob ef-read-n-chars-fun) + (frob ef-read-char-fun) + (frob ef-write-n-bytes-fun) + (frob ef-write-char-none-buffered-fun) + (frob ef-write-char-line-buffered-fun) + (frob ef-write-char-full-buffered-fun) + (frob ef-resync-fun) + (frob ef-bytes-for-char-fun) + (frob ef-read-c-string-fun) + (frob ef-write-c-string-fun) + (frob ef-octets-to-string-fun) + (frob ef-string-to-octets-fun)) + result)) (defvar *external-formats* (make-hash-table) #!+sb-doc @@ -784,12 +803,57 @@ external-format names to EXTERNAL-FORMAT structures.") (defun get-external-format (external-format) - (gethash external-format *external-formats*)) + (flet ((keyword-external-format (keyword) + (declare (type keyword keyword)) + (gethash keyword *external-formats*)) + (replacement-handlerify (entry replacement) + (when entry + (wrap-external-format-functions + entry + (lambda (fun) + (and fun + (lambda (&rest rest) + (declare (dynamic-extent rest)) + (handler-bind + ((stream-decoding-error + (lambda (c) + (declare (ignore c)) + (invoke-restart 'input-replacement replacement))) + (stream-encoding-error + (lambda (c) + (declare (ignore c)) + (invoke-restart 'output-replacement replacement))) + (octets-encoding-error + (lambda (c) (use-value replacement c))) + (octet-decoding-error + (lambda (c) (use-value replacement c)))) + (apply fun rest))))))))) + (typecase external-format + (keyword (keyword-external-format external-format)) + ((cons keyword) + (let ((entry (keyword-external-format (car external-format))) + (replacement (getf (cdr external-format) :replacement))) + (if replacement + (replacement-handlerify entry replacement) + entry)))))) (defun get-external-format-or-lose (external-format) (or (get-external-format external-format) (error "Undefined external-format ~A" external-format))) +(defun external-format-keyword (external-format) + (typecase external-format + (keyword external-format) + ((cons keyword) (car external-format)))) + +(defun fd-stream-external-format-keyword (stream) + (external-format-keyword (fd-stream-external-format stream))) + +(defun canonize-external-format (external-format entry) + (typecase external-format + (keyword (first (ef-names entry))) + ((cons keyword) (cons (first (ef-names entry)) (rest external-format))))) + ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the ;;; number of bytes per element. @@ -805,7 +869,7 @@ 'character 1 (ef-write-n-bytes-fun entry) - (first (ef-names entry))))))) + (canonize-external-format external-format entry)))))) (dolist (entry *output-routines*) (when (and (subtypep type (first entry)) (eq buffering (second entry)) @@ -1156,7 +1220,7 @@ 'character 1 (ef-read-n-chars-fun entry) - (first (ef-names entry))))))) + (canonize-external-format external-format entry)))))) (dolist (entry *input-routines*) (when (and (subtypep type (first entry)) (or (not (fourth entry)) @@ -1456,8 +1520,12 @@ :bytes-for-char-fun #',size-function :read-c-string-fun #',read-c-string-function :write-c-string-fun #',output-c-string-function - :octets-to-string-sym ',octets-to-string-sym - :string-to-octets-sym ',string-to-octets-sym))) + :octets-to-string-fun (lambda (&rest rest) + (declare (dynamic-extent rest)) + (apply ',octets-to-string-sym rest)) + :string-to-octets-fun (lambda (&rest rest) + (declare (dynamic-extent rest)) + (apply ',string-to-octets-sym rest))))) (dolist (ef ',external-format) (setf (gethash ef *external-formats*) entry)))))) @@ -1723,8 +1791,12 @@ :bytes-for-char-fun #',size-function :read-c-string-fun #',read-c-string-function :write-c-string-fun #',output-c-string-function - :octets-to-string-sym ',octets-to-string-sym - :string-to-octets-sym ',string-to-octets-sym))) + :octets-to-string-fun (lambda (&rest rest) + (declare (dynamic-extent rest)) + (apply ',octets-to-string-sym rest)) + :string-to-octets-fun (lambda (&rest rest) + (declare (dynamic-extent rest)) + (apply ',string-to-octets-sym rest))))) (dolist (ef ',external-format) (setf (gethash ef *external-formats*) entry)))))) diff --git a/src/code/octets.lisp b/src/code/octets.lisp index 0d155f1..d8c0376 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -392,8 +392,7 @@ one-past-the-end" :check-fill-pointer t) (declare (type (simple-array (unsigned-byte 8) (*)) vector)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-octets-to-string-sym ef)) - vector start end)))) + (funcall (sb!impl::ef-octets-to-string-fun ef) vector start end)))) (defun string-to-octets (string &key (external-format :default) (start 0) end null-terminate) @@ -404,8 +403,8 @@ one-past-the-end" :check-fill-pointer t) (declare (type simple-string string)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-string-to-octets-sym ef)) - string start end (if null-terminate 1 0))))) + (funcall (sb!impl::ef-string-to-octets-fun ef) string start end + (if null-terminate 1 0))))) #!+sb-unicode (defvar +unicode-replacement-character+ (string (code-char #xfffd))) diff --git a/src/code/stream.lisp b/src/code/stream.lisp index a0a0cb4..7ffffc2 100644 --- a/src/code/stream.lisp +++ b/src/code/stream.lisp @@ -135,7 +135,7 @@ ;;;; file position and file length (defun external-format-char-size (external-format) - (let ((ef-entry (find-external-format external-format))) + (let ((ef-entry (get-external-format external-format))) (if (variable-width-external-format-p ef-entry) (bytes-for-char-fun ef-entry) (funcall (bytes-for-char-fun ef-entry) #\x)))) diff --git a/src/code/target-c-call.lisp b/src/code/target-c-call.lisp index 09d9258..7a83d10 100644 --- a/src/code/target-c-call.lisp +++ b/src/code/target-c-call.lisp @@ -81,4 +81,3 @@ (declare (optimize (speed 3) (safety 0))) (let ((external-format (sb!impl::get-external-format-or-lose external-format))) (funcall (sb!impl::ef-read-c-string-fun external-format) sap element-type)))) - diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index 10c8613..fc5daa7 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -1232,7 +1232,7 @@ :rename-and-delete :overwrite :append :supersede nil)) (:if-does-not-exist (member :error :create nil)) - (:external-format keyword)) + (:external-format external-format-designator)) (or stream null)) (defknown rename-file (pathname-designator filename) @@ -1255,7 +1255,7 @@ (:verbose t) (:print t) (:if-does-not-exist t) - (:external-format keyword)) + (:external-format external-format-designator)) t) (defknown directory (pathname-designator &key (:resolve-symlinks t)) @@ -1307,7 +1307,7 @@ (member t))) (:verbose t) (:print t) - (:external-format keyword) + (:external-format external-format-designator) ;; extensions (:trace-file t) diff --git a/version.lisp-expr b/version.lisp-expr index 4b4873e..7a8e631 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.32.18" +"1.0.32.19" -- 1.7.10.4