1.0.32.16: external-format restart enhancements
authorChristophe Rhodes <csr21@cantab.net>
Wed, 11 Nov 2009 13:21:37 +0000 (13:21 +0000)
committerChristophe Rhodes <csr21@cantab.net>
Wed, 11 Nov 2009 13:21:37 +0000 (13:21 +0000)
* OUTPUT-REPLACEMENT restart for fd-stream external-formats, taking a
  string designator argument and attempting to encode that instead of the
  erroneous output;

* fixes for the FORCE-END-OF-FILE fd-stream external-format restart, using
  a somewhat involved call / return protocol for communicating out-of-band
  information between output routines and drivers;

* INPUT-REPLACEMENT restart for fd-stream external-formats, again with
  complicated out-of-band information communication.  This also interacts
  with UNREAD-CHAR;

* fix the ATTEMPT-RESYNC restart (and similar) at or near the end of file,
  ensuring that there is always a valid CATCH tag to be THROWN to;

* fix a double-error case in the USE-VALUE restart for unibyte octet
  conversions;

* bandage fix for mb-util decoding-error USE-VALUE restart -- there's more
  factoring to be done, but this fixes lp #314939

NEWS
src/code/external-formats/mb-util.lisp
src/code/fd-stream.lisp
src/code/octets.lisp
src/code/stream.lisp
tests/external-format.impure.lisp
tests/octets.pure.lisp
version.lisp-expr

diff --git a/NEWS b/NEWS
index 90dfc8a..db51631 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,9 +12,19 @@ changes relative to sbcl-1.0.32:
        Unicode 5.2 standard, giving names and properties to a number of new
        characters, and providing a few extra characters with case
        transformations.
+    ** improvement: restarts for providing replacement input/output on coding
+       errors for fd-stream external formats.
     ** fix a typo preventing conversion of strings into octet vectors
        in the latin-2 encoding.  (reported by Attila Lendvai; launchpad bug
        #471689)
+    ** fix a bug in the octet multibyte handling of decoding errors and the
+       USE-VALUE restart.  (launchpad bug #314939)
+    ** fix the bug underlying the expected failure in the FORCE-END-OF-FILE
+       restart on fd-stream decoding errors.
+    ** fix a bug in the ATTEMPT-RESYNC fd-stream decoding restart when the
+       error is near the end of file
+    ** fix a double-error case in unibyte octet conversions, when the first
+       use of USE-VALUE is ignored.
   * bug fix: uses of slot accessors on specialized method parameters within
     the bodies of SLOT-VALUE-USING-CLASS methods no longer triggers a type
     error while finalizing the class.  This fix may cause classes with slot
index 0cb711d..c76bc42 100644 (file)
                      (declare (type (or null string) invalid))
                      (cond
                        ((null invalid)
-                        (vector-push-extend (,simple-get-mb-char array pos bytes) string))
+                        (let ((thing (,simple-get-mb-char array pos bytes)))
+                          (typecase thing
+                            (character (vector-push-extend thing string))
+                            (string
+                               (dotimes (i (length thing))
+                                 (vector-push-extend (char thing i) string))))))
                        (t
                         (dotimes (i (length invalid))
                           (vector-push-extend (char invalid i) string))))
index 655ee64..5daab69 100644 (file)
   (listen nil :type (member nil t :eof))
 
   ;; the input buffer
-  (unread nil)
+  (instead (make-array 0 :element-type 'character :adjustable t :fill-pointer t) :type (array character (*)))
   (ibuf nil :type (or buffer null))
+  (eof-forced-p nil :type (member t nil))
 
   ;; the output buffer
   (obuf nil :type (or buffer null))
     (force-end-of-file ()
       :report (lambda (stream)
                 (format stream "~@<Force an end of file.~@:>"))
-      t)))
+      (setf (fd-stream-eof-forced-p stream) t))
+    (input-replacement (string)
+      :report (lambda (stream)
+                (format stream "~@<Use string as replacement input, ~
+                               attempt to resync at a character ~
+                               boundary and continue.~@:>"))
+      :interactive (lambda ()
+                     (format *query-io* "~@<Enter a string: ~@:>")
+                     (finish-output *query-io*)
+                     (list (read *query-io*)))
+      (let ((string (reverse (string string)))
+            (instead (fd-stream-instead stream)))
+        (dotimes (i (length string))
+          (vector-push-extend (char string i) instead))
+        (fd-stream-resync stream)
+        (when (> (length string) 0)
+          (setf (fd-stream-listen stream) t)))
+      nil)))
 
 (defun stream-encoding-error-and-handle (stream code)
   (restart-case
     (output-nothing ()
       :report (lambda (stream)
                 (format stream "~@<Skip output of this character.~@:>"))
+      (throw 'output-nothing nil))
+    (output-replacement (string)
+      :report (lambda (stream)
+                (format stream "~@<Output replacement string.~@:>"))
+      :interactive (lambda ()
+                     (format *query-io* "~@<Enter a string: ~@:>")
+                     (finish-output *query-io*)
+                     (list (read *query-io*)))
+      (let ((string (string string)))
+        (fd-sout stream (string string) 0 (length string)))
       (throw 'output-nothing nil))))
 
 (defun external-format-encoding-error (stream code)
     `(let* ((,stream-var ,stream)
             (ibuf (fd-stream-ibuf ,stream-var))
             (size nil))
-       (if (fd-stream-unread ,stream-var)
-           (prog1
-               (fd-stream-unread ,stream-var)
-             (setf (fd-stream-unread ,stream-var) nil)
-             (setf (fd-stream-listen ,stream-var) nil))
-           (let ((,element-var nil)
-                 (decode-break-reason nil))
-             (do ((,retry-var t))
-                 ((not ,retry-var))
-               (unless
-                   (catch 'eof-input-catcher
-                     (setf decode-break-reason
-                           (block decode-break-reason
-                             (input-at-least ,stream-var 1)
-                             (let* ((byte (sap-ref-8 (buffer-sap ibuf)
-                                                     (buffer-head ibuf))))
-                               (declare (ignorable byte))
-                               (setq size ,bytes)
-                               (input-at-least ,stream-var size)
-                               (setq ,element-var (locally ,@read-forms))
-                               (setq ,retry-var nil))
-                             nil))
-                     (when decode-break-reason
-                       (stream-decoding-error-and-handle stream
-                                                         decode-break-reason))
-                     t)
-                 (let ((octet-count (- (buffer-tail ibuf)
-                                       (buffer-head ibuf))))
-                   (when (or (zerop octet-count)
-                             (and (not ,element-var)
-                                  (not decode-break-reason)
-                                  (stream-decoding-error-and-handle
-                                   stream octet-count)))
-                     (setq ,retry-var nil)))))
-             (cond (,element-var
-                    (incf (buffer-head ibuf) size)
-                    ,element-var)
-                   (t
-                    (eof-or-lose ,stream-var ,eof-error ,eof-value))))))))
+       (block use-instead
+         (when (fd-stream-eof-forced-p ,stream-var)
+           (setf (fd-stream-eof-forced-p ,stream-var) nil)
+           (return-from use-instead
+             (eof-or-lose ,stream-var ,eof-error ,eof-value)))
+         (let ((,element-var nil)
+               (decode-break-reason nil))
+           (do ((,retry-var t))
+               ((not ,retry-var))
+             (if (> (length (fd-stream-instead ,stream-var)) 0)
+                 (let* ((instead (fd-stream-instead ,stream-var))
+                        (result (vector-pop instead))
+                        (pointer (fill-pointer instead)))
+                   (when (= pointer 0)
+                     (setf (fd-stream-listen ,stream-var) nil))
+                   (return-from use-instead result))
+                 (unless
+                     (catch 'eof-input-catcher
+                       (setf decode-break-reason
+                             (block decode-break-reason
+                               (input-at-least ,stream-var 1)
+                               (let* ((byte (sap-ref-8 (buffer-sap ibuf)
+                                                       (buffer-head ibuf))))
+                                 (declare (ignorable byte))
+                                 (setq size ,bytes)
+                                 (input-at-least ,stream-var size)
+                                 (setq ,element-var (locally ,@read-forms))
+                                 (setq ,retry-var nil))
+                               nil))
+                       (when decode-break-reason
+                         (when (stream-decoding-error-and-handle
+                                stream decode-break-reason)
+                           (setq ,retry-var nil)
+                           (throw 'eof-input-catcher nil)))
+                       t)
+                   (let ((octet-count (- (buffer-tail ibuf)
+                                         (buffer-head ibuf))))
+                     (when (or (zerop octet-count)
+                               (and (not ,element-var)
+                                    (not decode-break-reason)
+                                    (stream-decoding-error-and-handle
+                                     stream octet-count)))
+                       (setq ,retry-var nil))))))
+           (cond (,element-var
+                  (incf (buffer-head ibuf) size)
+                  ,element-var)
+                 (t
+                  (eof-or-lose ,stream-var ,eof-error ,eof-value))))))))
 
 ;;; a macro to wrap around all input routines to handle EOF-ERROR noise
 (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms)
         (element-var (gensym "ELT")))
     `(let* ((,stream-var ,stream)
             (ibuf (fd-stream-ibuf ,stream-var)))
-       (if (fd-stream-unread ,stream-var)
-           (prog1
-               (fd-stream-unread ,stream-var)
-             (setf (fd-stream-unread ,stream-var) nil)
-             (setf (fd-stream-listen ,stream-var) nil))
+       (if (> (length (fd-stream-instead ,stream-var)) 0)
+           (bug "INSTEAD not empty in INPUT-WRAPPER for ~S" ,stream-var)
            (let ((,element-var
                   (catch 'eof-input-catcher
                     (input-at-least ,stream-var ,bytes)
                                &aux (total-copied 0))
   (declare (type fd-stream stream))
   (declare (type index start requested total-copied))
-  (let ((unread (fd-stream-unread stream)))
-    (when unread
-      ;; AVERs designed to fail when we have more complicated
-      ;; character representations.
-      (aver (typep unread 'base-char))
-      (aver (= (fd-stream-element-size stream) 1))
-      ;; KLUDGE: this is a slightly-unrolled-and-inlined version of
-      ;; %BYTE-BLT
-      (etypecase buffer
-        (system-area-pointer
-         (setf (sap-ref-8 buffer start) (char-code unread)))
-        ((simple-unboxed-array (*))
-         (setf (aref buffer start) unread)))
-      (setf (fd-stream-unread stream) nil)
-      (setf (fd-stream-listen stream) nil)
-      (incf total-copied)))
+  (aver (= (length (fd-stream-instead stream)) 0))
   (do ()
       (nil)
     (let* ((remaining-request (- requested total-copied))
           (do ()
               ((= end start))
             (let ((obuf (fd-stream-obuf stream)))
-              (setf (buffer-tail obuf)
-                    (string-dispatch (simple-base-string
-                                      #!+sb-unicode
-                                      (simple-array character (*))
-                                      string)
-                        string
-                      (let ((sap (buffer-sap obuf))
-                            (len (buffer-length obuf))
-                            ;; FIXME: rename
-                            (tail (buffer-tail obuf)))
-                       (declare (type index tail)
-                                ;; STRING bounds have already been checked.
-                                (optimize (safety 0)))
-                       (loop
-                         (,@(if output-restart
-                                `(catch 'output-nothing)
-                                `(progn))
-                            (do* ()
-                                 ((or (= start end) (< (- len tail) 4)))
-                              (let* ((byte (aref string start))
-                                     (bits (char-code byte)))
-                                ,out-expr
-                                (incf tail ,size)
-                                (incf start)))
-                            ;; Exited from the loop normally
-                            (return tail))
-                         ;; Exited via CATCH. Skip the current character
-                         ;; and try the inner loop again.
-                         (incf start))))))
+              (string-dispatch (simple-base-string
+                                #!+sb-unicode
+                                (simple-array character (*))
+                                string)
+                  string
+                (let ((sap (buffer-sap obuf))
+                      (len (buffer-length obuf))
+                      ;; FIXME: rename
+                      (tail (buffer-tail obuf)))
+                  (declare (type index tail)
+                           ;; STRING bounds have already been checked.
+                           (optimize (safety 0)))
+                  (,@(if output-restart
+                         `(catch 'output-nothing)
+                         `(progn))
+                     (do* ()
+                          ((or (= start end) (< (- len tail) 4)))
+                       (let* ((byte (aref string start))
+                              (bits (char-code byte)))
+                         ,out-expr
+                         (incf tail ,size)
+                         (setf (buffer-tail obuf) tail)
+                         (incf start)))
+                     ;; Exited from the loop normally
+                     (go flush))
+                  ;; Exited via CATCH. Skip the current character.
+                  (incf start))))
+           flush
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
                  (type
                   (simple-array character (#.+ansi-stream-in-buffer-length+))
                   buffer))
-        (let ((unread (fd-stream-unread stream)))
-          (when unread
-            (setf (aref buffer index) unread)
-            (setf (fd-stream-unread stream) nil)
-            (setf (fd-stream-listen stream) nil)
-            (incf index)))
+        (when (fd-stream-eof-forced-p stream)
+          (setf (fd-stream-eof-forced-p stream) nil)
+          (return-from ,in-function 0))
+        (do ((instead (fd-stream-instead stream)))
+            ((= (fill-pointer instead) 0)
+             (setf (fd-stream-listen stream) nil))
+          (setf (aref buffer index) (vector-pop instead))
+          (incf index)
+          (when (= index end)
+            (return-from ,in-function (- index start))))
         (do ()
             (nil)
           (let* ((ibuf (fd-stream-ibuf stream))
           (do ()
               ((= end start))
             (let ((obuf (fd-stream-obuf stream)))
-              (setf (buffer-tail obuf)
-                    (string-dispatch (simple-base-string
-                                      #!+sb-unicode
-                                      (simple-array character (*))
-                                      string)
-                        string
-                      (let ((len (buffer-length obuf))
-                            (sap (buffer-sap obuf))
-                            ;; FIXME: Rename
-                            (tail (buffer-tail obuf)))
-                        (declare (type index tail)
-                                 ;; STRING bounds have already been checked.
-                                 (optimize (safety 0)))
-                        (loop
-                          (,@(if output-restart
-                                 `(catch 'output-nothing)
-                                 `(progn))
-                             (do* ()
-                                  ((or (= start end) (< (- len tail) 4)))
-                               (let* ((byte (aref string start))
-                                      (bits (char-code byte))
-                                      (size ,out-size-expr))
-                                 ,out-expr
-                                 (incf tail size)
-                                 (incf start)))
-                             ;; Exited from the loop normally
-                             (return tail))
-                          ;; Exited via CATCH. Skip the current character
-                          ;; and try the inner loop again.
-                          (incf start))))))
+              (string-dispatch (simple-base-string
+                                #!+sb-unicode (simple-array character (*))
+                                string)
+                  string
+                (let ((len (buffer-length obuf))
+                      (sap (buffer-sap obuf))
+                      ;; FIXME: Rename
+                      (tail (buffer-tail obuf)))
+                  (declare (type index tail)
+                           ;; STRING bounds have already been checked.
+                           (optimize (safety 0)))
+                  (,@(if output-restart
+                         `(catch 'output-nothing)
+                         `(progn))
+                     (do* ()
+                          ((or (= start end) (< (- len tail) 4)))
+                       (let* ((byte (aref string start))
+                              (bits (char-code byte))
+                              (size ,out-size-expr))
+                         ,out-expr
+                         (incf tail size)
+                         (setf (buffer-tail obuf) tail)
+                         (incf start)))
+                     (go flush))
+                  ;; Exited via CATCH: skip the current character.
+                  (incf start))))
+           flush
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
                  (type
                   (simple-array character (#.+ansi-stream-in-buffer-length+))
                   buffer))
-        (let ((unread (fd-stream-unread stream)))
-          (when unread
-            (setf (aref buffer start) unread)
-            (setf (fd-stream-unread stream) nil)
-            (setf (fd-stream-listen stream) nil)
-            (incf total-copied)))
+        (when (fd-stream-eof-forced-p stream)
+          (setf (fd-stream-eof-forced-p stream) nil)
+          (return-from ,in-function 0))
+        (do ((instead (fd-stream-instead stream)))
+            ((= (fill-pointer instead) 0)
+             (setf (fd-stream-listen stream) nil))
+          (setf (aref buffer (+ start total-copied)) (vector-pop instead))
+          (incf total-copied)
+          (when (= requested total-copied)
+            (return-from ,in-function total-copied)))
         (do ()
             (nil)
           (let* ((ibuf (fd-stream-ibuf stream))
                   (if eof-error-p
                       (error 'end-of-file :stream stream)
                       (return-from ,in-function total-copied)))
-                (setf head (buffer-head ibuf))
-                (setf tail (buffer-tail ibuf))))
+                ;; we might have been given stuff to use instead, so
+                ;; we have to return (and trust our caller to know
+                ;; what to do about TOTAL-COPIED being 0).
+                (return-from ,in-function total-copied)))
             (setf (buffer-head ibuf) head)
             ;; Maybe we need to refill the stream buffer.
             (cond ( ;; If there were enough data in the stream buffer, we're done.
           ,in-expr))
       (defun ,resync-function (stream)
         (let ((ibuf (fd-stream-ibuf stream)))
-          (loop
-            (input-at-least stream 2)
-            (incf (buffer-head ibuf))
-            (unless (block decode-break-reason
-                      (let* ((sap (buffer-sap ibuf))
-                             (head (buffer-head ibuf))
-                             (byte (sap-ref-8 sap head))
-                             (size ,in-size-expr))
-                        (declare (ignorable byte))
-                        (input-at-least stream size)
-                        (setf head (buffer-head ibuf))
-                        ,in-expr)
-                     nil)
-             (return)))))
+          (catch 'eof-input-catcher
+            (loop
+               (incf (buffer-head ibuf))
+               (input-at-least stream 1)
+               (unless (block decode-break-reason
+                         (let* ((sap (buffer-sap ibuf))
+                                (head (buffer-head ibuf))
+                                (byte (sap-ref-8 sap head))
+                                (size ,in-size-expr))
+                           (declare (ignorable byte))
+                           (input-at-least stream size)
+                           (setf head (buffer-head ibuf))
+                           ,in-expr)
+                         nil)
+                 (return))))))
       (defun ,read-c-string-function (sap element-type)
         (declare (type system-area-pointer sap))
         (locally
   ;; we're still safe: buffers have finalizers of their own.
   (release-fd-stream-buffers fd-stream))
 
-;;; Flushes the current input buffer and unread chatacter, and returns
-;;; the input buffer, and the amount of of flushed input in bytes.
+;;; Flushes the current input buffer and any supplied replacements,
+;;; and returns the input buffer, and the amount of of flushed input
+;;; in bytes.
 (defun flush-input-buffer (stream)
-  (let ((unread (if (fd-stream-unread stream)
-                    1
-                    0)))
-    (setf (fd-stream-unread stream) nil)
+  (let ((unread (length (fd-stream-instead stream))))
+    (setf (fill-pointer (fd-stream-instead stream)) 0)
     (let ((ibuf (fd-stream-ibuf stream)))
       (if ibuf
           (let ((head (buffer-head ibuf))
                                  (do-listen)))))))
        (do-listen)))
     (:unread
-     ;; If the stream is bivalent, the user might follow an
-     ;; unread-char with a read-byte.  In this case, the bookkeeping
-     ;; is simpler if we adjust the buffer head by the number of code
-     ;; units in the character.
-     ;; FIXME: there has to be a proper way to check for bivalence,
-     ;; right?
-     (if (fd-stream-bivalent-p fd-stream)
-         (decf (buffer-head (fd-stream-ibuf fd-stream))
-               (fd-stream-character-size fd-stream arg1))
-         (setf (fd-stream-unread fd-stream) arg1))
+     (decf (buffer-head (fd-stream-ibuf fd-stream))
+           (fd-stream-character-size fd-stream arg1))
      (setf (fd-stream-listen fd-stream) t))
     (:close
      ;; Drop input buffers
         (let ((ibuf (fd-stream-ibuf stream)))
           (when ibuf
             (decf posn (- (buffer-tail ibuf) (buffer-head ibuf)))))
-        (when (fd-stream-unread stream)
-          (decf posn))
         ;; Divide bytes by element size.
         (truncate posn (fd-stream-element-size stream))))))
 
index ce032f6..d4d5817 100644 (file)
@@ -260,7 +260,8 @@ one-past-the-end"
                             :initial-element 0
                             :element-type '(unsigned-byte 8)))
         (index 0)
-        (error-position 0))
+        (error-position 0)
+        (error-replacement))
     (tagbody
      :no-error
        (loop for pos of-type index from sstart below send
@@ -273,30 +274,32 @@ one-past-the-end"
                   ;; KLUDGE: We ran into encoding errors.  Bail and do
                   ;; things the slow way (does anybody actually use this
                   ;; functionality besides our own test suite?).
-                  (setf error-position pos)
+                  (setf error-position pos error-replacement byte)
                   (go :error)))
                (incf index))
           finally (return-from string->latin% octets))
      :error
-       ;; We have encoded INDEX octets so far and we ran into an encoding
-       ;; error at ERROR-POSITION.
+       ;; We have encoded INDEX octets so far and we ran into an
+       ;; encoding error at ERROR-POSITION; the user has asked us to
+       ;; replace the expected output with ERROR-REPLACEMENT.
        (let ((new-octets (make-array (* index 2)
                                      :element-type '(unsigned-byte 8)
                                      :adjustable t :fill-pointer index)))
          (replace new-octets octets)
-         (loop for pos of-type index from error-position below send
-            do (let ((thing (funcall get-bytes string pos)))
+         (flet ((extend (thing)
                  (typecase thing
-                   ((unsigned-byte 8)
-                    (vector-push-extend thing new-octets))
+                   ((unsigned-byte 8) (vector-push-extend thing new-octets))
                    ((simple-array (unsigned-byte 8) (*))
                     (dotimes (i (length thing))
-                      (vector-push-extend (aref thing i) new-octets)))))
-            finally (return-from string->latin%
-                      (progn
-                        (unless (zerop null-padding)
-                          (vector-push-extend 0 new-octets))
-                        (copy-seq new-octets))))))))
+                      (vector-push-extend (aref thing i) new-octets))))))
+           (extend error-replacement)
+           (loop for pos of-type index from (1+ error-position) below send
+                 do (extend (funcall get-bytes string pos))
+                 finally (return-from string->latin%
+                           (progn
+                             (unless (zerop null-padding)
+                               (vector-push-extend 0 new-octets))
+                             (copy-seq new-octets)))))))))
 \f
 ;;;; to-string conversions
 
index 9003e42..a0a0cb4 100644 (file)
   (declare (type stream stream))
   (declare (type (or index (alien sb!unix:off-t) (member nil :start :end))
                  position))
-  ;; FIXME: It woud be good to comment on the stuff that is done here...
+  ;; FIXME: It would be good to comment on the stuff that is done here...
   ;; FIXME: This doesn't look interrupt safe.
   (cond
     (position
            ;; An empty count does not necessarily mean that we reached
            ;; the EOF, it's also possible that it's e.g. due to a
            ;; invalid octet sequence in a multibyte stream. To handle
-           ;; the resyncing case correctly we need to call the
-           ;; single-character reading function and check whether an
-           ;; EOF was really reached. If not, we can just fill the
-           ;; buffer by one character, and hope that the next refill
-           ;; will not need to resync.
-           (let* ((value (funcall (ansi-stream-in stream) stream nil :eof))
-                  (index (1- +ansi-stream-in-buffer-length+)))
-             (case value
-               ((:eof)
-                ;; Mark buffer as empty.
+           ;; the resyncing case correctly we need to call the reading
+           ;; function and check whether an EOF was really reached. If
+           ;; not, we can just fill the buffer by one character, and
+           ;; hope that the next refill will not need to resync.
+           ;;
+           ;; KLUDGE: we can't use FD-STREAM functions (which are the
+           ;; only ones which will give us decoding errors) here,
+           ;; because this code is generic.  We can't call the N-BIN
+           ;; function, because near the end of a real file that can
+           ;; legitimately bounce us to the IN function.  So we have
+           ;; to call ANSI-STREAM-IN.
+           (let* ((index (1- +ansi-stream-in-buffer-length+))
+                  (value (funcall (ansi-stream-in stream) stream nil :eof)))
+             (cond
+               ((eql value :eof)
+                ;; definitely EOF now
                 (setf (ansi-stream-in-index stream)
                       +ansi-stream-in-buffer-length+)
-                ;; EOF. Redo the read, this time with the real eof parameters.
-                (values t (funcall (ansi-stream-in stream)
-                                   stream eof-error-p eof-value)))
-               (otherwise
+                (values t (eof-or-lose stream eof-error-p eof-value)))
+               ;; we resynced or were given something instead
+               (t
                 (setf (aref ibuf index) value)
                 (values nil (setf (ansi-stream-in-index stream) index))))))
           (t
index 78285f7..0845e6b 100644 (file)
   (write-byte 67 s))
 (with-open-file (s *test-path* :direction :input
                  :external-format :utf-8)
-  (handler-bind
-      ((sb-int:character-decoding-error #'(lambda (decoding-error)
-                                            (declare (ignore decoding-error))
-                                            (invoke-restart
-                                             'sb-int:attempt-resync))))
-    (assert (equal (read-line s nil s) "ABC"))
-    (assert (equal (read-line s nil s) s))))
+  (let ((count 0))
+    (handler-bind
+        ((sb-int:character-decoding-error #'(lambda (decoding-error)
+                                              (declare (ignore decoding-error))
+                                              (when (> (incf count) 1)
+                                                (error "too many errors"))
+                                              (invoke-restart
+                                               'sb-int:attempt-resync))))
+      (assert (equal (read-line s nil s) "ABC"))
+      (assert (equal (read-line s nil s) s)))))
 (with-open-file (s *test-path* :direction :input
                  :external-format :utf-8)
-  (handler-bind
-      ((sb-int:character-decoding-error #'(lambda (decoding-error)
-                                            (declare (ignore decoding-error))
-                                            (invoke-restart
-                                             'sb-int:force-end-of-file))))
-    (assert (equal (read-line s nil s) "AB"))
-    (assert (equal (read-line s nil s) s))))
+  (let ((count 0))
+    (handler-bind
+        ((sb-int:character-decoding-error #'(lambda (decoding-error)
+                                              (declare (ignore decoding-error))
+                                              (when (> (incf count) 1)
+                                                (error "too many errors"))
+                                              (invoke-restart
+                                               'sb-int:force-end-of-file))))
+      (assert (equal (read-line s nil s) "AB"))
+      (setf count 0)
+      (assert (equal (read-line s nil s) s)))))
 
 ;;; And again with more data to account for buffering (this was briefly)
 ;;; broken in early 0.9.6.
 (with-test (:name (:character-decode-large :attempt-resync))
   (with-open-file (s *test-path* :direction :input
                      :external-format :utf-8)
-    (handler-bind
-        ((sb-int:character-decoding-error #'(lambda (decoding-error)
+    (let ((count 0))
+      (handler-bind
+          ((sb-int:character-decoding-error (lambda (decoding-error)
                                               (declare (ignore decoding-error))
+                                              (when (> (incf count) 1)
+                                                (error "too many errors"))
                                               (invoke-restart
                                                'sb-int:attempt-resync)))
-         ;; The failure mode is an infinite loop, add a timeout to detetct it.
-         (sb-ext:timeout (lambda () (error "Timeout"))))
-      (sb-ext:with-timeout 5
-        (dotimes (i 80)
-          (assert (equal (read-line s nil s)
-                         "1234567890123456789012345678901234567890123456789")))))))
+           ;; The failure mode is an infinite loop, add a timeout to
+           ;; detetct it.
+           (sb-ext:timeout (lambda () (error "Timeout"))))
+        (sb-ext:with-timeout 5
+          (dotimes (i 80)
+            (assert (equal (read-line s nil s)
+                           "1234567890123456789012345678901234567890123456789"))))))))
 
-(with-test (:name (:character-decode-large :force-end-of-file)
-            :fails-on :sbcl)
-  (error "We can't reliably test this due to WITH-TIMEOUT race condition")
-  ;; This test will currently fail. But sometimes it will fail in
-  ;; ungracefully due to the WITH-TIMEOUT race mentioned above. This
-  ;; rightfully confuses some people, so we'll skip running the code
-  ;; for now. -- JES, 2006-01-27
-  #+nil
+(with-test (:name (:character-decode-large :force-end-of-file))
   (with-open-file (s *test-path* :direction :input
                      :external-format :utf-8)
-    (handler-bind
-        ((sb-int:character-decoding-error #'(lambda (decoding-error)
+    (let ((count 0))
+      (handler-bind
+          ((sb-int:character-decoding-error (lambda (decoding-error)
                                               (declare (ignore decoding-error))
+                                              (when (> (incf count) 1)
+                                                (error "too many errors"))
                                               (invoke-restart
                                                'sb-int:force-end-of-file)))
-         ;; The failure mode is an infinite loop, add a timeout to detetct it.
-         (sb-ext:timeout (lambda () (error "Timeout"))))
-      (sb-ext:with-timeout 5
-        (dotimes (i 80)
-          (assert (equal (read-line s nil s)
-                         "1234567890123456789012345678901234567890123456789")))
-        (assert (equal (read-line s nil s) s))))))
+           ;; The failure mode is an infinite loop, add a timeout to detetct it.
+           (sb-ext:timeout (lambda () (error "Timeout"))))
+        (sb-ext:with-timeout 5
+          (dotimes (i 40)
+            (assert (equal (read-line s nil s)
+                           "1234567890123456789012345678901234567890123456789")))
+          (setf count 0)
+          (assert (equal (read-line s nil s) s)))))))
 
 ;;; Test character encode restarts.
 (with-open-file (s *test-path* :direction :output
     (str (c-string :external-format :ebcdic-us)))
   (assert (typep (strdup "foo") 'simple-base-string)))
 
+(with-test (:name (:input-replacement :at-end-of-file))
+  (dotimes (i 256)
+    (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8))
+      (write-byte i s))
+    (handler-bind ((sb-int:character-decoding-error
+                    (lambda (c)
+                      (invoke-restart 'sb-impl::input-replacement #\?))))
+      (with-open-file (s *test-path* :external-format :utf-8)
+        (cond
+          ((char= (read-char s) #\?)
+           (assert (or (= i (char-code #\?)) (> i 127))))
+          (t (assert (and (not (= i (char-code #\?))) (< i 128)))))))))
+
 ;;;; success
index 192f0ee..4bf7f18 100644 (file)
                 l9c))))
   (ensure-roundtrip-utf8)
 
-  (let ((non-ascii-bytes (make-array 128
-                                     :element-type '(unsigned-byte 8)
-                                     :initial-contents (loop for i from 128 below 256
-                                                             collect i))))
-    (handler-bind ((sb-int:character-decoding-error
-                    (lambda (c)
-                      (use-value "??" c))))
-      (assert (string= (octets-to-string non-ascii-bytes :external-format :ascii)
-                       (make-string 256 :initial-element #\?)))))
-  (let ((non-ascii-chars (make-array 128
-                                     :element-type 'character
-                                     :initial-contents (loop for i from 128 below 256
-                                                             collect (code-char i)))))
-    (handler-bind ((sb-int:character-encoding-error
-                    (lambda (c)
-                      (use-value "??" c))))
-      (assert (equalp (string-to-octets non-ascii-chars :external-format :ascii)
-                      (make-array 256 :initial-element (char-code #\?))))))
+  (with-test (:name (:ascii :decoding-error use-value))
+    (let ((non-ascii-bytes (make-array 128
+                                       :element-type '(unsigned-byte 8)
+                                       :initial-contents (loop for i from 128 below 256 collect i)))
+        (error-count 0))
+      (handler-bind ((sb-int:character-decoding-error
+                      (lambda (c)
+                        (incf error-count)
+                        (use-value "??" c))))
+        (assert (string= (octets-to-string non-ascii-bytes :external-format :ascii)
+                         (make-string 256 :initial-element #\?)))
+        (assert (= error-count 128)))))
+  (with-test (:name (:ascii :encoding-error use-value))
+    (let ((non-ascii-chars (make-array 128
+                                       :element-type 'character
+                                       :initial-contents (loop for i from 128 below 256 collect (code-char i))))
+          (error-count 0))
+      (handler-bind ((sb-int:character-encoding-error
+                      (lambda (c)
+                        (incf error-count)
+                        (use-value "??" c))))
+        (assert (equalp (string-to-octets non-ascii-chars :external-format :ascii)
+                        (make-array 256 :initial-element (char-code #\?))))
+        (assert (= error-count 128)))))
 
   ;; From Markus Kuhn's UTF-8 test file:
   ;; http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
 #+sb-unicode
 (assert (equalp #(251) (string-to-octets (string (code-char 369))
                                          :external-format :latin-2)))
+
+#+sb-unicode
+(with-test (:name (:euc-jp :decoding-errors))
+  (handler-bind ((sb-int:character-decoding-error
+                  (lambda (c) (use-value #\? c))))
+    (assert (string= "?{?"
+                     (octets-to-string
+                      (coerce #(182 123 253 238) '(vector (unsigned-byte 8)))
+                      :external-format :euc-jp)))))
+
index f0c64af..d484356 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.32.15"
+"1.0.32.16"