(octets-encoding-error-position c)))
(octets-encoding-error-external-format c)))))
-(defun read-replacement-character ()
- (format *query-io*
- "Replacement byte, bytes, character, or string (evaluated): ")
- (finish-output *query-io*)
- (list (eval (read *query-io*))))
-
(defun encoding-error (external-format string pos)
(restart-case
(error 'octets-encoding-error
:position pos)
(use-value (replacement)
:report "Supply a set of bytes to use in place of the invalid one."
- :interactive read-replacement-character
+ :interactive
+ (lambda ()
+ (read-evaluated-form
+ "Replacement byte, bytes, character, or string (evaluated): "))
(typecase replacement
((unsigned-byte 8)
(make-array 1 :element-type '(unsigned-byte 8) :initial-element replacement))
;;; character-out-of-range
;;; invalid-utf8-starter-byte
;;; invalid-utf8-continuation-byte
-;;; overlong-utf8-sequence
;;;
;;; Of these, the only one truly likely to be of interest to calling
;;; code is end-of-input-in-character (in which case it's likely to
;;; want to make a note of octet-decoding-error-start, supply "" as a
;;; replacement string, and then move that last chunk of bytes to the
;;; beginning of its buffer for the next go round) but they're all
-;;; provided on the off chance they're of interest. The next most
-;;; likely interesting option is overlong-utf8-sequence -- the
-;;; application, if it cares to, can decode this itself (taking care
-;;; to ensure that the result isn't out of range of CHAR-CODE-LIMIT)
-;;; and return that result. This library doesn't provide support for
-;;; that as a conforming UTF-8-using program is supposed to treat it
-;;; as an error.
+;;; provided on the off chance they're of interest.
(define-condition octet-decoding-error (character-decoding-error)
((array :initarg :array :accessor octet-decoding-error-array)
(define-condition malformed-ascii (octet-decoding-error) ())
-(defun read-replacement-string ()
- (format *query-io* "Enter a replacement string designator (evaluated): ")
- (finish-output *query-io*)
- (list (eval (read *query-io*))))
-
(defun decoding-error (array start end external-format reason pos)
(restart-case
(error reason
:pos pos)
(use-value (s)
:report "Supply a replacement string designator."
- :interactive read-replacement-string
+ :interactive
+ (lambda ()
+ (read-evaluated-form
+ "Enter a replacement string designator (evaluated): "))
(string s))))
;;; Utilities used in both to-string and to-octet conversions
finally (return elements)))
;; Find the smallest character code such that the corresponding
;; byte is != to the code.
- (lowest-non-equivalent-code (position-if-not #'(lambda (pair)
- (apply #'= pair))
- pairs))
+ (lowest-non-equivalent-code
+ (caar (sort (copy-seq exceptions) #'< :key #'car)))
;; Sort them for our lookup table.
(sorted-pairs (sort (subseq pairs lowest-non-equivalent-code)
#'< :key #'car))
,(make-array 256 :element-type t #+nil 'char-code
:initial-contents (loop for byte below 256
collect
- (let ((exception (cadr (assoc byte exceptions))))
+ (let ((exception (cdr (assoc byte exceptions))))
(if exception
- exception
+ (car exception)
byte)))))
(code-to-byte-table
,(make-array (length sorted-lookup-table)
:initial-element 0
:element-type '(unsigned-byte 8)))
(index 0)
- (error-position 0))
+ (error-position 0)
+ (error-replacement))
(tagbody
:no-error
(loop for pos of-type index from sstart below send
;; KLUDGE: We ran into encoding errors. Bail and do
;; things the slow way (does anybody actually use this
;; functionality besides our own test suite?).
- (setf error-position pos)
+ (setf error-position pos error-replacement byte)
(go :error)))
(incf index))
finally (return-from string->latin% octets))
:error
- ;; We have encoded INDEX octets so far and we ran into an encoding
- ;; error at ERROR-POSITION.
+ ;; We have encoded INDEX octets so far and we ran into an
+ ;; encoding error at ERROR-POSITION; the user has asked us to
+ ;; replace the expected output with ERROR-REPLACEMENT.
(let ((new-octets (make-array (* index 2)
:element-type '(unsigned-byte 8)
:adjustable t :fill-pointer index)))
(replace new-octets octets)
- (loop for pos of-type index from error-position below send
- do (let ((thing (funcall get-bytes string pos)))
+ (flet ((extend (thing)
(typecase thing
- ((unsigned-byte 8)
- (vector-push-extend thing new-octets))
+ ((unsigned-byte 8) (vector-push-extend thing new-octets))
((simple-array (unsigned-byte 8) (*))
(dotimes (i (length thing))
- (vector-push-extend (aref thing i) new-octets)))))
- finally (return-from string->latin%
- (progn
- (unless (zerop null-padding)
- (vector-push-extend 0 new-octets))
- (copy-seq new-octets))))))))
+ (vector-push-extend (aref thing i) new-octets))))))
+ (extend error-replacement)
+ (loop for pos of-type index from (1+ error-position) below send
+ do (extend (funcall get-bytes string pos))
+ finally (return-from string->latin%
+ (progn
+ (unless (zerop null-padding)
+ (vector-push-extend 0 new-octets))
+ (copy-seq new-octets)))))))))
\f
;;;; to-string conversions
:check-fill-pointer t)
(declare (type (simple-array (unsigned-byte 8) (*)) vector))
(let ((ef (maybe-defaulted-external-format external-format)))
- (funcall (symbol-function (sb!impl::ef-octets-to-string-sym ef))
- vector start end))))
+ (funcall (sb!impl::ef-octets-to-string-fun ef) vector start end))))
(defun string-to-octets (string &key (external-format :default)
(start 0) end null-terminate)
:check-fill-pointer t)
(declare (type simple-string string))
(let ((ef (maybe-defaulted-external-format external-format)))
- (funcall (symbol-function (sb!impl::ef-string-to-octets-sym ef))
- string start end (if null-terminate 1 0)))))
+ (funcall (sb!impl::ef-string-to-octets-fun ef) string start end
+ (if null-terminate 1 0)))))
#!+sb-unicode
(defvar +unicode-replacement-character+ (string (code-char #xfffd)))