X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Foctets.lisp;h=bb6367396cf55672b5390bff5c6b91f044fcb9ae;hb=22de9286aa239843ab7bc2cb772009fba6bcd080;hp=ce032f6c3a7e9ee2cb6a091dda436b9edb20f1c8;hpb=f2db6743b1fadeea9e72cb583d857851c87efcd4;p=sbcl.git diff --git a/src/code/octets.lisp b/src/code/octets.lisp index ce032f6..bb63673 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -74,20 +74,13 @@ one-past-the-end" ;;; character-out-of-range ;;; invalid-utf8-starter-byte ;;; invalid-utf8-continuation-byte -;;; overlong-utf8-sequence ;;; ;;; Of these, the only one truly likely to be of interest to calling ;;; code is end-of-input-in-character (in which case it's likely to ;;; want to make a note of octet-decoding-error-start, supply "" as a ;;; replacement string, and then move that last chunk of bytes to the ;;; beginning of its buffer for the next go round) but they're all -;;; provided on the off chance they're of interest. The next most -;;; likely interesting option is overlong-utf8-sequence -- the -;;; application, if it cares to, can decode this itself (taking care -;;; to ensure that the result isn't out of range of CHAR-CODE-LIMIT) -;;; and return that result. This library doesn't provide support for -;;; that as a conforming UTF-8-using program is supposed to treat it -;;; as an error. +;;; provided on the off chance they're of interest. (define-condition octet-decoding-error (character-decoding-error) ((array :initarg :array :accessor octet-decoding-error-array) @@ -190,9 +183,8 @@ one-past-the-end" finally (return elements))) ;; Find the smallest character code such that the corresponding ;; byte is != to the code. - (lowest-non-equivalent-code (position-if-not #'(lambda (pair) - (apply #'= pair)) - pairs)) + (lowest-non-equivalent-code + (caar (sort (copy-seq exceptions) #'< :key #'car))) ;; Sort them for our lookup table. (sorted-pairs (sort (subseq pairs lowest-non-equivalent-code) #'< :key #'car)) @@ -206,9 +198,9 @@ one-past-the-end" ,(make-array 256 :element-type t #+nil 'char-code :initial-contents (loop for byte below 256 collect - (let ((exception (cadr (assoc byte exceptions)))) + (let ((exception (cdr (assoc byte exceptions)))) (if exception - exception + (car exception) byte))))) (code-to-byte-table ,(make-array (length sorted-lookup-table) @@ -260,7 +252,8 @@ one-past-the-end" :initial-element 0 :element-type '(unsigned-byte 8))) (index 0) - (error-position 0)) + (error-position 0) + (error-replacement)) (tagbody :no-error (loop for pos of-type index from sstart below send @@ -273,30 +266,32 @@ one-past-the-end" ;; KLUDGE: We ran into encoding errors. Bail and do ;; things the slow way (does anybody actually use this ;; functionality besides our own test suite?). - (setf error-position pos) + (setf error-position pos error-replacement byte) (go :error))) (incf index)) finally (return-from string->latin% octets)) :error - ;; We have encoded INDEX octets so far and we ran into an encoding - ;; error at ERROR-POSITION. + ;; We have encoded INDEX octets so far and we ran into an + ;; encoding error at ERROR-POSITION; the user has asked us to + ;; replace the expected output with ERROR-REPLACEMENT. (let ((new-octets (make-array (* index 2) :element-type '(unsigned-byte 8) :adjustable t :fill-pointer index))) (replace new-octets octets) - (loop for pos of-type index from error-position below send - do (let ((thing (funcall get-bytes string pos))) + (flet ((extend (thing) (typecase thing - ((unsigned-byte 8) - (vector-push-extend thing new-octets)) + ((unsigned-byte 8) (vector-push-extend thing new-octets)) ((simple-array (unsigned-byte 8) (*)) (dotimes (i (length thing)) - (vector-push-extend (aref thing i) new-octets))))) - finally (return-from string->latin% - (progn - (unless (zerop null-padding) - (vector-push-extend 0 new-octets)) - (copy-seq new-octets)))))))) + (vector-push-extend (aref thing i) new-octets)))))) + (extend error-replacement) + (loop for pos of-type index from (1+ error-position) below send + do (extend (funcall get-bytes string pos)) + finally (return-from string->latin% + (progn + (unless (zerop null-padding) + (vector-push-extend 0 new-octets)) + (copy-seq new-octets))))))))) ;;;; to-string conversions @@ -396,8 +391,7 @@ one-past-the-end" :check-fill-pointer t) (declare (type (simple-array (unsigned-byte 8) (*)) vector)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-octets-to-string-sym ef)) - vector start end)))) + (funcall (sb!impl::ef-octets-to-string-fun ef) vector start end)))) (defun string-to-octets (string &key (external-format :default) (start 0) end null-terminate) @@ -408,8 +402,8 @@ one-past-the-end" :check-fill-pointer t) (declare (type simple-string string)) (let ((ef (maybe-defaulted-external-format external-format))) - (funcall (symbol-function (sb!impl::ef-string-to-octets-sym ef)) - string start end (if null-terminate 1 0))))) + (funcall (sb!impl::ef-string-to-octets-fun ef) string start end + (if null-terminate 1 0))))) #!+sb-unicode (defvar +unicode-replacement-character+ (string (code-char #xfffd)))