X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fcode%2Foctets.lisp;h=4a98d8082a196b82f16d4ab4090438a1a54ca0df;hb=0395c15ff8394bfaaed03050c1a7a131f197a732;hp=ce032f6c3a7e9ee2cb6a091dda436b9edb20f1c8;hpb=f2db6743b1fadeea9e72cb583d857851c87efcd4;p=sbcl.git diff --git a/src/code/octets.lisp b/src/code/octets.lisp index ce032f6..4a98d80 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -38,12 +38,6 @@ one-past-the-end" (octets-encoding-error-position c))) (octets-encoding-error-external-format c))))) -(defun read-replacement-character () - (format *query-io* - "Replacement byte, bytes, character, or string (evaluated): ") - (finish-output *query-io*) - (list (eval (read *query-io*)))) - (defun encoding-error (external-format string pos) (restart-case (error 'octets-encoding-error @@ -52,7 +46,10 @@ one-past-the-end" :position pos) (use-value (replacement) :report "Supply a set of bytes to use in place of the invalid one." - :interactive read-replacement-character + :interactive + (lambda () + (read-evaluated-form + "Replacement byte, bytes, character, or string (evaluated): ")) (typecase replacement ((unsigned-byte 8) (make-array 1 :element-type '(unsigned-byte 8) :initial-element replacement)) @@ -74,20 +71,13 @@ one-past-the-end" ;;; character-out-of-range ;;; invalid-utf8-starter-byte ;;; invalid-utf8-continuation-byte -;;; overlong-utf8-sequence ;;; ;;; Of these, the only one truly likely to be of interest to calling ;;; code is end-of-input-in-character (in which case it's likely to ;;; want to make a note of octet-decoding-error-start, supply "" as a ;;; replacement string, and then move that last chunk of bytes to the ;;; beginning of its buffer for the next go round) but they're all -;;; provided on the off chance they're of interest. The next most -;;; likely interesting option is overlong-utf8-sequence -- the -;;; application, if it cares to, can decode this itself (taking care -;;; to ensure that the result isn't out of range of CHAR-CODE-LIMIT) -;;; and return that result. This library doesn't provide support for -;;; that as a conforming UTF-8-using program is supposed to treat it -;;; as an error. +;;; provided on the off chance they're of interest. (define-condition octet-decoding-error (character-decoding-error) ((array :initarg :array :accessor octet-decoding-error-array) @@ -110,11 +100,6 @@ one-past-the-end" (define-condition malformed-ascii (octet-decoding-error) ()) -(defun read-replacement-string () - (format *query-io* "Enter a replacement string designator (evaluated): ") - (finish-output *query-io*) - (list (eval (read *query-io*)))) - (defun decoding-error (array start end external-format reason pos) (restart-case (error reason @@ -125,7 +110,10 @@ one-past-the-end" :pos pos) (use-value (s) :report "Supply a replacement string designator." - :interactive read-replacement-string + :interactive + (lambda () + (read-evaluated-form + "Enter a replacement string designator (evaluated): ")) (string s)))) ;;; Utilities used in both to-string and to-octet conversions @@ -190,9 +178,8 @@ one-past-the-end" finally (return elements))) ;; Find the smallest character code such that the corresponding ;; byte is != to the code. - (lowest-non-equivalent-code (position-if-not #'(lambda (pair) - (apply #'= pair)) - pairs)) + (lowest-non-equivalent-code + (caar (sort (copy-seq exceptions) #'< :key #'car))) ;; Sort them for our lookup table. (sorted-pairs (sort (subseq pairs lowest-non-equivalent-code) #'< :key #'car)) @@ -206,9 +193,9 @@ one-past-the-end" ,(make-array 256 :element-type t #+nil 'char-code :initial-contents (loop for byte below 256 collect - (let ((exception (cadr (assoc byte exceptions)))) + (let ((exception (cdr (assoc byte exceptions)))) (if exception - exception + (car exception) byte))))) (code-to-byte-table ,(make-array (length sorted-lookup-table) @@ -260,7 +247,8 @@ one-past-the-end" :initial-element 0 :element-type '(unsigned-byte 8))) (index 0) - (error-position 0)) + (error-position 0) + (error-replacement)) (tagbody :no-error (loop for pos of-type index from sstart below send @@ -273,30 +261,32 @@ one-past-the-end" ;; KLUDGE: We ran into encoding errors. Bail and do ;; things the slow way (does anybody actually use this ;; functionality besides our own test suite?). - (setf error-position pos) + (setf error-position pos error-replacement byte) (go :error))) (incf index)) finally (return-from string->latin% octets)) :error - ;; We have encoded INDEX octets so far and we ran into an encoding - ;; error at ERROR-POSITION. + ;; We have encoded INDEX octets so far and we ran into an + ;; encoding error at ERROR-POSITION; the user has asked us to + ;; replace the expected output with ERROR-REPLACEMENT. (let ((new-octets (make-array (* index 2) :element-type '(unsigned-byte 8) :adjustable t :fill-pointer index))) (replace new-octets octets) - (loop for pos of-type index from error-position below send - do (let ((thing (funcall get-bytes string pos))) + (flet ((extend (thing) (typecase thing - ((unsigned-byte 8) - (vector-push-extend thing new-octets)) + ((unsigned-byte 8) (vector-push-extend thing new-octets)) ((simple-array (unsigned-byte 8) (*)) (dotimes (i (length thing)) - (vector-push-extend (aref thing i) new-octets))))) - finally (return-from string->latin% - (progn - (unless (zerop null-padding) - (vector-push-extend 0 new-octets)) - (copy-seq new-octets)))))))) + (vector-push-extend (aref thing i) new-octets)))))) + (extend error-replacement) + (loop for pos of-type index from (1+ error-position) below send + do (extend (funcall get-bytes string pos)) + finally (return-from string->latin% + (progn + (unless (zerop null-padding) + (vector-push-extend 0 new-octets)) + (copy-seq new-octets))))))))) ;;;; to-string conversions @@ -396,8 +386,7 @@ one-past-the-end" :check-fill-pointer t) (declare (type (simple-array (unsigned-byte 8) (*)) vector)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-octets-to-string-sym ef)) - vector start end)))) + (funcall (sb!impl::ef-octets-to-string-fun ef) vector start end)))) (defun string-to-octets (string &key (external-format :default) (start 0) end null-terminate) @@ -408,8 +397,8 @@ one-past-the-end" :check-fill-pointer t) (declare (type simple-string string)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-string-to-octets-sym ef)) - string start end (if null-terminate 1 0))))) + (funcall (sb!impl::ef-string-to-octets-fun ef) string start end + (if null-terminate 1 0))))) #!+sb-unicode (defvar +unicode-replacement-character+ (string (code-char #xfffd)))