0.9.0.38:
[sbcl.git] / src / code / fd-stream.lisp
index ae79940..5c17dd1 100644 (file)
       (pop *available-buffers*)
       (allocate-system-memory bytes-per-buffer)))
 \f
-;;;; the FILE-STREAM structure
+;;;; the FD-STREAM structure
 
-(defstruct (file-stream
+(defstruct (fd-stream
            (:constructor %make-fd-stream)
-           ;; KLUDGE: in an ideal world, maybe we'd rewrite
-           ;; everything to use FILE-STREAM rather than simply
-           ;; providing this hack for compatibility with the old
-           ;; code.  However, CVS doesn't deal terribly well with
-           ;; file renaming, so for now we use this
-           ;; backward-compatibility feature.
            (:conc-name fd-stream-)
            (:predicate fd-stream-p)
            (:include ansi-stream
   (fd -1 :type fixnum)       
   ;; controls when the output buffer is flushed
   (buffering :full :type (member :full :line :none))
+  ;; controls whether the input buffer must be cleared before output
+  ;; (must be done for files, not for sockets, pipes and other data
+  ;; sources where input and output aren't related).  non-NIL means
+  ;; don't clear input buffer.
+  (dual-channel-p nil)
   ;; character position (if known)
   (char-pos nil :type (or index null))
   ;; T if input is waiting on FD. :EOF if we hit EOF.
   ;; timeout specified for this stream, or NIL if none
   (timeout nil :type (or index null))
   ;; pathname of the file this stream is opened to (returned by PATHNAME)
-  (pathname nil :type (or pathname null)))
-(def!method print-object ((fd-stream file-stream) stream)
+  (pathname nil :type (or pathname null))
+  (external-format :default)
+  (output-bytes #'ill-out :type function))
+(def!method print-object ((fd-stream fd-stream) stream)
   (declare (type stream stream))
   (print-unreadable-object (fd-stream stream :type t :identity t)
     (format stream "for ~S" (fd-stream-name fd-stream))))
         :format-arguments
         (list note-format (list pathname) (strerror errno))))
 
+(defun stream-decoding-error (stream octets)
+  (error 'stream-decoding-error
+        :stream stream
+         ;; FIXME: dunno how to get at OCTETS currently, or even if
+         ;; that's the right thing to report.
+         :octets octets))
+(defun stream-encoding-error (stream code)
+  (error 'stream-encoding-error
+        :stream stream
+         :code code))
+
+;;; Returning true goes into end of file handling, false will enter another
+;;; round of input buffer filling followed by re-entering character decode.
+(defun stream-decoding-error-and-handle (stream octet-count)
+  (restart-case
+      (stream-decoding-error stream
+                            (let ((sap (fd-stream-ibuf-sap stream))
+                                  (head (fd-stream-ibuf-head stream)))
+                              (loop for i from 0 below octet-count
+                                    collect (sap-ref-8 sap (+ head i)))))
+    (attempt-resync ()
+      :report (lambda (stream)
+               (format stream
+                       "~@<Attempt to resync the stream at a character ~
+                        character boundary and continue.~@:>"))
+      (fd-stream-resync stream)
+      nil)
+    (force-end-of-file ()
+      :report (lambda (stream)
+               (format stream "~@<Force an end of file.~@:>"))
+      t)))
+
+(defun stream-encoding-error-and-handle (stream code)
+  (restart-case
+      (stream-encoding-error stream code)
+    (output-nothing ()
+      :report (lambda (stream)
+               (format stream "~@<Skip output of this character.~@:>"))
+      (throw 'output-nothing nil))))
+
 ;;; This is called by the server when we can write to the given file
 ;;; descriptor. Attempt to write the data again. If it worked, remove
 ;;; the data from the OUTPUT-LATER list. If it didn't work, something
 ;;; writes. If so, just queue this one. Otherwise, try to write it. If
 ;;; this would block, queue it.
 (defun frob-output (stream base start end reuse-sap)
-  (declare (type file-stream stream)
+  (declare (type fd-stream stream)
           (type (or system-area-pointer (simple-array * (*))) base)
           (type index start end))
   (if (not (null (fd-stream-output-later stream))) ; something buffered.
       (frob-output stream (fd-stream-obuf-sap stream) 0 length t)
       (setf (fd-stream-obuf-tail stream) 0))))
 
+(defmacro output-wrapper/variable-width ((stream size buffering restart)
+                                        &body body)
+  (let ((stream-var (gensym)))
+    `(let ((,stream-var ,stream)
+          (size ,size))
+      ,(unless (eq (car buffering) :none)
+        `(when (< (fd-stream-obuf-length ,stream-var)
+                  (+ (fd-stream-obuf-tail ,stream-var)
+                      size))
+            (flush-output-buffer ,stream-var)))
+      ,(unless (eq (car buffering) :none)
+        `(when (and (not (fd-stream-dual-channel-p ,stream-var))
+                    (> (fd-stream-ibuf-tail ,stream-var)
+                       (fd-stream-ibuf-head ,stream-var)))
+            (file-position ,stream-var (file-position ,stream-var))))
+      ,(if restart
+           `(catch 'output-nothing
+             ,@body
+             (incf (fd-stream-obuf-tail ,stream-var) size))
+           `(progn
+             ,@body
+             (incf (fd-stream-obuf-tail ,stream-var) size)))
+      ,(ecase (car buffering)
+        (:none
+         `(flush-output-buffer ,stream-var))
+        (:line
+         `(when (eq (char-code byte) (char-code #\Newline))
+            (flush-output-buffer ,stream-var)))
+        (:full))
+    (values))))
+
+(defmacro output-wrapper ((stream size buffering restart) &body body)
+  (let ((stream-var (gensym)))
+    `(let ((,stream-var ,stream))
+      ,(unless (eq (car buffering) :none)
+        `(when (< (fd-stream-obuf-length ,stream-var)
+                  (+ (fd-stream-obuf-tail ,stream-var)
+                      ,size))
+            (flush-output-buffer ,stream-var)))
+      ,(unless (eq (car buffering) :none)
+        `(when (and (not (fd-stream-dual-channel-p ,stream-var))
+                    (> (fd-stream-ibuf-tail ,stream-var)
+                       (fd-stream-ibuf-head ,stream-var)))
+            (file-position ,stream-var (file-position ,stream-var))))
+      ,(if restart
+          `(catch 'output-nothing
+             ,@body
+             (incf (fd-stream-obuf-tail ,stream-var) ,size))
+           `(progn
+             ,@body
+             (incf (fd-stream-obuf-tail ,stream-var) ,size)))
+      ,(ecase (car buffering)
+        (:none
+         `(flush-output-buffer ,stream-var))
+        (:line
+         `(when (eq (char-code byte) (char-code #\Newline))
+            (flush-output-buffer ,stream-var)))
+        (:full))
+    (values))))
+
+(defmacro def-output-routines/variable-width
+    ((name-fmt size restart external-format &rest bufferings)
+     &body body)
+  (declare (optimize (speed 1)))
+  (cons 'progn
+       (mapcar
+           (lambda (buffering)
+             (let ((function
+                    (intern (format nil name-fmt (string (car buffering))))))
+               `(progn
+                  (defun ,function (stream byte)
+                    (output-wrapper/variable-width (stream ,size ,buffering ,restart)
+                      ,@body))
+                  (setf *output-routines*
+                        (nconc *output-routines*
+                               ',(mapcar
+                                  (lambda (type)
+                                    (list type
+                                          (car buffering)
+                                          function
+                                          1
+                                          external-format))
+                                  (cdr buffering)))))))
+           bufferings)))
+
 ;;; Define output routines that output numbers SIZE bytes long for the
 ;;; given bufferings. Use BODY to do the actual output.
-(defmacro def-output-routines ((name-fmt size &rest bufferings) &body body)
+(defmacro def-output-routines ((name-fmt size restart &rest bufferings)
+                               &body body)
   (declare (optimize (speed 1)))
   (cons 'progn
        (mapcar
            (lambda (buffering)
              (let ((function
-                    (intern (let ((*print-case* :upcase))
-                              (format nil name-fmt (car buffering))))))
+                    (intern (format nil name-fmt (string (car buffering))))))
                `(progn
                   (defun ,function (stream byte)
-                    ,(unless (eq (car buffering) :none)
-                       `(when (< (fd-stream-obuf-length stream)
-                                 (+ (fd-stream-obuf-tail stream)
-                                    ,size))
-                          (flush-output-buffer stream)))
-                    ,@body
-                    (incf (fd-stream-obuf-tail stream) ,size)
-                    ,(ecase (car buffering)
-                       (:none
-                        `(flush-output-buffer stream))
-                       (:line
-                        `(when (eq (char-code byte) (char-code #\Newline))
-                           (flush-output-buffer stream)))
-                       (:full
-                        ))
-                    (values))
+                    (output-wrapper (stream ,size ,buffering ,restart)
+                      ,@body))
                   (setf *output-routines*
                         (nconc *output-routines*
                                ',(mapcar
                                     (list type
                                           (car buffering)
                                           function
-                                          size))
+                                          size
+                                          nil))
                                   (cdr buffering)))))))
            bufferings)))
 
+;;; FIXME: is this used anywhere any more?
 (def-output-routines ("OUTPUT-CHAR-~A-BUFFERED"
                      1
+                      t
                      (:none character)
                      (:line character)
                      (:full character))
-  (if (and (base-char-p byte) (char= byte #\Newline))
+  (if (char= byte #\Newline)
       (setf (fd-stream-char-pos stream) 0)
       (incf (fd-stream-char-pos stream)))
   (setf (sap-ref-8 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream))
 
 (def-output-routines ("OUTPUT-UNSIGNED-BYTE-~A-BUFFERED"
                      1
+                      nil
                      (:none (unsigned-byte 8))
                      (:full (unsigned-byte 8)))
   (setf (sap-ref-8 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream))
 
 (def-output-routines ("OUTPUT-SIGNED-BYTE-~A-BUFFERED"
                      1
+                      nil
                      (:none (signed-byte 8))
                      (:full (signed-byte 8)))
   (setf (signed-sap-ref-8 (fd-stream-obuf-sap stream)
 
 (def-output-routines ("OUTPUT-UNSIGNED-SHORT-~A-BUFFERED"
                      2
+                      nil
                      (:none (unsigned-byte 16))
                      (:full (unsigned-byte 16)))
   (setf (sap-ref-16 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream))
 
 (def-output-routines ("OUTPUT-SIGNED-SHORT-~A-BUFFERED"
                      2
+                      nil
                      (:none (signed-byte 16))
                      (:full (signed-byte 16)))
   (setf (signed-sap-ref-16 (fd-stream-obuf-sap stream)
 
 (def-output-routines ("OUTPUT-UNSIGNED-LONG-~A-BUFFERED"
                      4
+                      nil
                      (:none (unsigned-byte 32))
                      (:full (unsigned-byte 32)))
   (setf (sap-ref-32 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream))
 
 (def-output-routines ("OUTPUT-SIGNED-LONG-~A-BUFFERED"
                      4
+                      nil
                      (:none (signed-byte 32))
                      (:full (signed-byte 32)))
   (setf (signed-sap-ref-32 (fd-stream-obuf-sap stream)
   (let ((start (or start 0))
        (end (or end (length (the (simple-array * (*)) thing)))))
     (declare (type index start end))
+    (when (and (not (fd-stream-dual-channel-p fd-stream))
+              (> (fd-stream-ibuf-tail fd-stream)
+                 (fd-stream-ibuf-head fd-stream)))
+      (file-position fd-stream (file-position fd-stream)))
     (let* ((len (fd-stream-obuf-length fd-stream))
           (tail (fd-stream-obuf-tail fd-stream))
           (space (- len tail))
            ((zerop bytes)) ; easy case
            ((<= bytes space)
             (if (system-area-pointer-p thing)
-                (system-area-copy thing
-                                  (* start sb!vm:n-byte-bits)
-                                  (fd-stream-obuf-sap fd-stream)
-                                  (* tail sb!vm:n-byte-bits)
-                                  (* bytes sb!vm:n-byte-bits))
+                (system-area-ub8-copy thing start
+                                       (fd-stream-obuf-sap fd-stream)
+                                       tail
+                                       bytes)
                 ;; FIXME: There should be some type checking somewhere to
                 ;; verify that THING here is a vector, not just <not a SAP>.
-                (copy-to-system-area thing
-                                     (+ (* start sb!vm:n-byte-bits)
-                                        (* sb!vm:vector-data-offset
-                                           sb!vm:n-word-bits))
-                                     (fd-stream-obuf-sap fd-stream)
-                                     (* tail sb!vm:n-byte-bits)
-                                     (* bytes sb!vm:n-byte-bits)))
+                (copy-ub8-to-system-area thing start
+                                          (fd-stream-obuf-sap fd-stream)
+                                          tail
+                                          bytes))
             (setf (fd-stream-obuf-tail fd-stream) newtail))
            ((<= bytes len)
             (flush-output-buffer fd-stream)
             (if (system-area-pointer-p thing)
-                (system-area-copy thing
-                                  (* start sb!vm:n-byte-bits)
-                                  (fd-stream-obuf-sap fd-stream)
-                                  0
-                                  (* bytes sb!vm:n-byte-bits))
+                (system-area-ub8-copy thing
+                                       start
+                                       (fd-stream-obuf-sap fd-stream)
+                                       0
+                                       bytes)
                 ;; FIXME: There should be some type checking somewhere to
                 ;; verify that THING here is a vector, not just <not a SAP>.
-                (copy-to-system-area thing
-                                     (+ (* start sb!vm:n-byte-bits)
-                                        (* sb!vm:vector-data-offset
-                                           sb!vm:n-word-bits))
-                                     (fd-stream-obuf-sap fd-stream)
-                                     0
-                                     (* bytes sb!vm:n-byte-bits)))
+                (copy-ub8-to-system-area thing
+                                          start
+                                          (fd-stream-obuf-sap fd-stream)
+                                          0
+                                          bytes))
             (setf (fd-stream-obuf-tail fd-stream) bytes))
            (t
             (flush-output-buffer fd-stream)
                                           :from-end t
                                           :start start
                                           :end end))))
-         (ecase (fd-stream-buffering stream)
-           (:full
-            (output-raw-bytes stream thing start end))
-           (:line
-            (output-raw-bytes stream thing start end)
-            (when last-newline
-              (flush-output-buffer stream)))
-           (:none
-            (frob-output stream thing start end nil)))
+         (if (and (typep thing 'base-string)
+                  (eq (fd-stream-external-format stream) :latin-1))
+              (ecase (fd-stream-buffering stream)
+                (:full
+                 (output-raw-bytes stream thing start end))
+                (:line
+                 (output-raw-bytes stream thing start end)
+                 (when last-newline
+                   (flush-output-buffer stream)))
+                (:none
+                 (frob-output stream thing start end nil)))
+             (ecase (fd-stream-buffering stream)
+               (:full (funcall (fd-stream-output-bytes stream)
+                               stream thing nil start end))
+               (:line (funcall (fd-stream-output-bytes stream)
+                               stream thing last-newline start end))
+               (:none (funcall (fd-stream-output-bytes stream)
+                               stream thing t start end))))
          (if last-newline
              (setf (fd-stream-char-pos stream)
                    (- end last-newline 1))
          (:none
           (frob-output stream thing start end nil))))))
 
+(defvar *external-formats* ()
+  #!+sb-doc
+  "List of all available external formats. Each element is a list of the
+  element-type, string input function name, character input function name,
+  and string output function name.")
+
 ;;; Find an output routine to use given the type and buffering. Return
 ;;; as multiple values the routine, the real type transfered, and the
 ;;; number of bytes per element.
-(defun pick-output-routine (type buffering)
+(defun pick-output-routine (type buffering &optional external-format)
+  (when (subtypep type 'character)
+    (dolist (entry *external-formats*)
+      (when (member external-format (first entry))
+       (return-from pick-output-routine
+         (values (symbol-function (nth (ecase buffering
+                                         (:none 4)
+                                         (:line 5)
+                                         (:full 6))
+                                       entry))
+                 'character
+                 1
+                 (symbol-function (fourth entry))
+                 (first (first entry)))))))
   (dolist (entry *output-routines*)
-    (when (and (subtypep type (car entry))
-              (eq buffering (cadr entry)))
-      (return (values (symbol-function (caddr entry))
-                     (car entry)
-                     (cadddr entry))))))
+    (when (and (subtypep type (first entry))
+              (eq buffering (second entry))
+              (or (not (fifth entry))
+                  (eq external-format (fifth entry))))
+      (return-from pick-output-routine
+       (values (symbol-function (third entry))
+               (first entry)
+               (fourth entry)))))
+  ;; KLUDGE: dealing with the buffering here leads to excessive code
+  ;; explosion.
+  ;;
+  ;; KLUDGE: also see comments in PICK-INPUT-ROUTINE
+  (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE)
+       if (subtypep type `(unsigned-byte ,i))
+       do (return-from pick-output-routine
+            (values
+             (ecase buffering
+               (:none
+                (lambda (stream byte)
+                  (output-wrapper (stream (/ i 8) (:none) nil)
+                    (loop for j from 0 below (/ i 8)
+                          do (setf (sap-ref-8 
+                                    (fd-stream-obuf-sap stream)
+                                    (+ j (fd-stream-obuf-tail stream)))
+                                   (ldb (byte 8 (- i 8 (* j 8))) byte))))))
+               (:full
+                (lambda (stream byte)
+                  (output-wrapper (stream (/ i 8) (:full) nil)
+                    (loop for j from 0 below (/ i 8)
+                          do (setf (sap-ref-8 
+                                    (fd-stream-obuf-sap stream)
+                                    (+ j (fd-stream-obuf-tail stream)))
+                                   (ldb (byte 8 (- i 8 (* j 8))) byte)))))))
+             `(unsigned-byte ,i)
+             (/ i 8))))
+  (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE)
+       if (subtypep type `(signed-byte ,i))
+       do (return-from pick-output-routine
+            (values
+             (ecase buffering
+               (:none
+                (lambda (stream byte)
+                  (output-wrapper (stream (/ i 8) (:none) nil)
+                    (loop for j from 0 below (/ i 8)
+                          do (setf (sap-ref-8 
+                                    (fd-stream-obuf-sap stream)
+                                    (+ j (fd-stream-obuf-tail stream)))
+                                   (ldb (byte 8 (- i 8 (* j 8))) byte))))))
+               (:full
+                (lambda (stream byte)
+                  (output-wrapper (stream (/ i 8) (:full) nil)
+                    (loop for j from 0 below (/ i 8)
+                          do (setf (sap-ref-8 
+                                    (fd-stream-obuf-sap stream)
+                                    (+ j (fd-stream-obuf-tail stream)))
+                                   (ldb (byte 8 (- i 8 (* j 8))) byte)))))))
+             `(signed-byte ,i)
+             (/ i 8)))))
 \f
 ;;;; input routines and related noise
 
 ;;; per element.
 (defvar *input-routines* ())
 
-;;; Fill the input buffer, and return the first character. Throw to
-;;; EOF-INPUT-CATCHER if the eof was reached. Drop into SYSTEM:SERVER
-;;; if necessary.
-(defun frob-input (stream)
+;;; Fill the input buffer, and return the number of bytes read. Throw
+;;; to EOF-INPUT-CATCHER if the eof was reached. Drop into
+;;; SYSTEM:SERVER if necessary.
+(defun refill-buffer/fd (stream)
   (let ((fd (fd-stream-fd stream))
        (ibuf-sap (fd-stream-ibuf-sap stream))
        (buflen (fd-stream-ibuf-length stream))
             (setf (fd-stream-ibuf-tail stream) 0))
            (t
             (decf tail head)
-            (system-area-copy ibuf-sap (* head sb!vm:n-byte-bits)
-                              ibuf-sap 0 (* tail sb!vm:n-byte-bits))
+            (system-area-ub8-copy ibuf-sap head
+                                   ibuf-sap 0 tail)
             (setf head 0)
             (setf (fd-stream-ibuf-head stream) 0)
             (setf (fd-stream-ibuf-tail stream) tail))))
                   (unless (sb!sys:wait-until-fd-usable
                            fd :input (fd-stream-timeout stream))
                     (error 'io-timeout :stream stream :direction :read))
-                  (frob-input stream))
+                  (refill-buffer/fd stream))
                 (simple-stream-perror "couldn't read from ~S" stream errno)))
            ((zerop count)
             (setf (fd-stream-listen stream) :eof)
             (/show0 "THROWing EOF-INPUT-CATCHER")
             (throw 'eof-input-catcher nil))
            (t
-            (incf (fd-stream-ibuf-tail stream) count))))))
+            (incf (fd-stream-ibuf-tail stream) count)
+             count)))))
                        
 ;;; Make sure there are at least BYTES number of bytes in the input
-;;; buffer. Keep calling FROB-INPUT until that condition is met.
+;;; buffer. Keep calling REFILL-BUFFER/FD until that condition is met.
 (defmacro input-at-least (stream bytes)
   (let ((stream-var (gensym))
        (bytes-var (gensym)))
                      (fd-stream-ibuf-head ,stream-var))
                   ,bytes-var)
           (return))
-        (frob-input ,stream-var)))))
+        (refill-buffer/fd ,stream-var)))))
+
+(defmacro input-wrapper/variable-width ((stream bytes eof-error eof-value)
+                                       &body read-forms)
+  (let ((stream-var (gensym))
+       (retry-var (gensym))
+       (element-var (gensym)))
+    `(let ((,stream-var ,stream)
+          (size nil))
+       (if (fd-stream-unread ,stream-var)
+          (prog1
+              (fd-stream-unread ,stream-var)
+            (setf (fd-stream-unread ,stream-var) nil)
+            (setf (fd-stream-listen ,stream-var) nil))
+          (let ((,element-var nil)
+                (decode-break-reason nil))
+            (do ((,retry-var t))
+                ((not ,retry-var))
+              (unless
+                  (catch 'eof-input-catcher
+                    (setf decode-break-reason
+                          (block decode-break-reason
+                            (input-at-least ,stream-var 1)
+                            (let* ((byte (sap-ref-8 (fd-stream-ibuf-sap
+                                                     ,stream-var)
+                                                    (fd-stream-ibuf-head
+                                                     ,stream-var))))
+                              (setq size ,bytes)
+                              (input-at-least ,stream-var size)
+                              (setq ,element-var (locally ,@read-forms))
+                              (setq ,retry-var nil))
+                            nil))
+                    (when decode-break-reason
+                      (stream-decoding-error-and-handle stream
+                                                        decode-break-reason))
+                    t)
+                (let ((octet-count (- (fd-stream-ibuf-tail ,stream-var)
+                                     (fd-stream-ibuf-head ,stream-var))))
+                  (when (or (zerop octet-count)
+                            (and (not ,element-var)
+                                 (not decode-break-reason)
+                                 (stream-decoding-error-and-handle
+                                  stream octet-count)))
+                    (setq ,retry-var nil)))))
+            (cond (,element-var
+                   (incf (fd-stream-ibuf-head ,stream-var) size)
+                   ,element-var)
+                  (t
+                   (eof-or-lose ,stream-var ,eof-error ,eof-value))))))))
 
 ;;; a macro to wrap around all input routines to handle EOF-ERROR noise
 (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms)
           (let ((,element-var
                  (catch 'eof-input-catcher
                    (input-at-least ,stream-var ,bytes)
-                   ,@read-forms)))
+                   (locally ,@read-forms))))
             (cond (,element-var
                    (incf (fd-stream-ibuf-head ,stream-var) ,bytes)
                    ,element-var)
                   (t
                    (eof-or-lose ,stream-var ,eof-error ,eof-value))))))))
 
+(defmacro def-input-routine/variable-width (name
+                                           (type external-format size sap head)
+                                           &rest body)
+  `(progn
+     (defun ,name (stream eof-error eof-value)
+       (input-wrapper/variable-width (stream ,size eof-error eof-value)
+        (let ((,sap (fd-stream-ibuf-sap stream))
+              (,head (fd-stream-ibuf-head stream)))
+          ,@body)))
+     (setf *input-routines*
+          (nconc *input-routines*
+                 (list (list ',type ',name 1 ',external-format))))))
+
 (defmacro def-input-routine (name
                             (type size sap head)
                             &rest body)
           ,@body)))
      (setf *input-routines*
           (nconc *input-routines*
-                 (list (list ',type ',name ',size))))))
+                 (list (list ',type ',name ',size nil))))))
 
 ;;; STREAM-IN routine for reading a string char
 (def-input-routine input-character
                   ((signed-byte 32) 4 sap head)
   (signed-sap-ref-32 sap head))
 
+
+
 ;;; Find an input routine to use given the type. Return as multiple
 ;;; values the routine, the real type transfered, and the number of
-;;; bytes per element.
-(defun pick-input-routine (type)
+;;; bytes per element (and for character types string input routine).
+(defun pick-input-routine (type &optional external-format)
+  (when (subtypep type 'character)
+    (dolist (entry *external-formats*)
+      (when (member external-format (first entry))
+       (return-from pick-input-routine
+         (values (symbol-function (third entry))
+                 'character
+                 1
+                 (symbol-function (second entry))
+                 (first (first entry)))))))
   (dolist (entry *input-routines*)
-    (when (subtypep type (car entry))
-      (return (values (symbol-function (cadr entry))
-                     (car entry)
-                     (caddr entry))))))
+    (when (and (subtypep type (first entry))
+              (or (not (fourth entry))
+                  (eq external-format (fourth entry))))
+      (return-from pick-input-routine
+       (values (symbol-function (second entry))
+               (first entry)
+               (third entry)))))
+  ;; FIXME: let's do it the hard way, then (but ignore things like
+  ;; endianness, efficiency, and the necessary coupling between these
+  ;; and the output routines).  -- CSR, 2004-02-09
+  (loop for i from 40 by 8 to 1024 ; ARB (well, KLUDGE really)
+       if (subtypep type `(unsigned-byte ,i))
+       do (return-from pick-input-routine
+            (values
+             (lambda (stream eof-error eof-value)
+               (input-wrapper (stream (/ i 8) eof-error eof-value)
+                 (let ((sap (fd-stream-ibuf-sap stream))
+                       (head (fd-stream-ibuf-head stream)))
+                   (loop for j from 0 below (/ i 8)
+                         with result = 0
+                         do (setf result
+                                  (+ (* 256 result)
+                                     (sap-ref-8 sap (+ head j))))
+                         finally (return result)))))
+             `(unsigned-byte ,i)
+             (/ i 8))))
+  (loop for i from 40 by 8 to 1024 ; ARB (well, KLUDGE really)
+       if (subtypep type `(signed-byte ,i))
+       do (return-from pick-input-routine
+            (values
+             (lambda (stream eof-error eof-value)
+               (input-wrapper (stream (/ i 8) eof-error eof-value)
+                 (let ((sap (fd-stream-ibuf-sap stream))
+                       (head (fd-stream-ibuf-head stream)))
+                   (loop for j from 0 below (/ i 8)
+                         with result = 0
+                         do (setf result
+                                  (+ (* 256 result)
+                                     (sap-ref-8 sap (+ head j))))
+                         finally (return (if (logbitp (1- i) result)
+                                              (dpb result (byte i 0) -1)
+                                              result))))))
+             `(signed-byte ,i)
+             (/ i 8)))))
 
 ;;; Return a string constructed from SAP, START, and END.
 (defun string-from-sap (sap start end)
   (declare (type index start end))
   (let* ((length (- end start))
         (string (make-string length)))
-    (copy-from-system-area sap (* start sb!vm:n-byte-bits)
-                          string (* sb!vm:vector-data-offset
-                                    sb!vm:n-word-bits)
-                          (* length sb!vm:n-byte-bits))
+    (copy-ub8-from-system-area sap start
+                               string 0
+                               length)
     string))
 
 ;;; the N-BIN method for FD-STREAMs
 ;;; Note that this blocks in UNIX-READ. It is generally used where
 ;;; there is a definite amount of reading to be done, so blocking
 ;;; isn't too problematical.
-(defun fd-stream-read-n-bytes (stream buffer start requested eof-error-p)
-  (declare (type file-stream stream))
-  (declare (type index start requested))
-  (do ((total-copied 0))
+(defun fd-stream-read-n-bytes (stream buffer start requested eof-error-p
+                              &aux (total-copied 0))
+  (declare (type fd-stream stream))
+  (declare (type index start requested total-copied))
+  (let ((unread (fd-stream-unread stream)))
+    (when unread
+      ;; AVERs designed to fail when we have more complicated
+      ;; character representations.
+      (aver (typep unread 'base-char))
+      (aver (= (fd-stream-element-size stream) 1))
+      ;; KLUDGE: this is a slightly-unrolled-and-inlined version of
+      ;; %BYTE-BLT
+      (etypecase buffer
+       (system-area-pointer
+        (setf (sap-ref-8 buffer start) (char-code unread)))
+       ((simple-unboxed-array (*))
+        (setf (aref buffer start) unread)))
+      (setf (fd-stream-unread stream) nil)
+      (setf (fd-stream-listen stream) nil)
+      (incf total-copied)))
+  (do ()
       (nil)
-    (declare (type index total-copied))
     (let* ((remaining-request (- requested total-copied))
           (head (fd-stream-ibuf-head stream))
           (tail (fd-stream-ibuf-tail stream))
             (= total-copied requested)
             (return total-copied))
            (;; If EOF, we're done in another way.
-            (zerop (refill-fd-stream-buffer stream))
+             (null (catch 'eof-input-catcher (refill-buffer/fd stream)))
             (if eof-error-p
                 (error 'end-of-file :stream stream)
                 (return total-copied)))
            ;; through into another pass of the loop.
            ))))
 
-;;; Try to refill the stream buffer. Return the number of bytes read.
-;;; (For EOF, the return value will be zero, otherwise positive.)
-(defun refill-fd-stream-buffer (stream)
-  ;; We don't have any logic to preserve leftover bytes in the buffer,
-  ;; so we should only be called when the buffer is empty.
-  (aver (= (fd-stream-ibuf-head stream) (fd-stream-ibuf-tail stream)))
-  (multiple-value-bind (count err)
-      (sb!unix:unix-read (fd-stream-fd stream)
-                        (fd-stream-ibuf-sap stream)
-                        (fd-stream-ibuf-length stream))
-    (declare (type (or index null) count))
-    (when (null count)
-      (simple-stream-perror "couldn't read from ~S" stream err))
-    (setf (fd-stream-listen stream) nil
-         (fd-stream-ibuf-head stream) 0
-         (fd-stream-ibuf-tail stream) count)
-    count))
+(defun fd-stream-resync (stream)
+  (dolist (entry *external-formats*)
+    (when (member (fd-stream-external-format stream) (first entry))
+      (return-from fd-stream-resync
+       (funcall (symbol-function (eighth entry)) stream)))))
+
+;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp
+(defmacro define-external-format (external-format size output-restart
+                                  out-expr in-expr)
+  (let* ((name (first external-format))
+         (out-function (symbolicate "OUTPUT-BYTES/" name))
+         (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
+         (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
+         (in-char-function (symbolicate "INPUT-CHAR/" name)))
+    `(progn
+      (defun ,out-function (stream string flush-p start end)
+       (let ((start (or start 0))
+             (end (or end (length string))))
+         (declare (type index start end))
+         (when (and (not (fd-stream-dual-channel-p stream))
+                    (> (fd-stream-ibuf-tail stream)
+                       (fd-stream-ibuf-head stream)))
+           (file-position stream (file-position stream)))
+         (when (< end start)
+           (error ":END before :START!"))
+         (do ()
+             ((= end start))
+           (setf (fd-stream-obuf-tail stream)
+                 (do* ((len (fd-stream-obuf-length stream))
+                       (sap (fd-stream-obuf-sap stream))
+                       (tail (fd-stream-obuf-tail stream)))
+                      ((or (= start end) (< (- len tail) 4)) tail)
+                    ,(if output-restart
+                        `(catch 'output-nothing
+                           (let* ((byte (aref string start))
+                                  (bits (char-code byte)))
+                             ,out-expr
+                             (incf tail ,size)))
+                         `(let* ((byte (aref string start))
+                                  (bits (char-code byte)))
+                             ,out-expr
+                             (incf tail ,size)))
+                   (incf start)))
+           (when (< start end)
+             (flush-output-buffer stream)))
+         (when flush-p
+           (flush-output-buffer stream))))
+      (def-output-routines (,format
+                           ,size
+                            ,output-restart
+                           (:none character)
+                           (:line character)
+                           (:full character))
+         (if (char= byte #\Newline)
+             (setf (fd-stream-char-pos stream) 0)
+             (incf (fd-stream-char-pos stream)))
+       (let ((bits (char-code byte))
+             (sap (fd-stream-obuf-sap stream))
+             (tail (fd-stream-obuf-tail stream)))
+         ,out-expr))
+      (defun ,in-function (stream buffer start requested eof-error-p
+                          &aux (total-copied 0))
+       (declare (type fd-stream stream))
+       (declare (type index start requested total-copied))
+       (let ((unread (fd-stream-unread stream)))
+         (when unread
+           (setf (aref buffer start) unread)
+           (setf (fd-stream-unread stream) nil)
+           (setf (fd-stream-listen stream) nil)
+           (incf total-copied)))
+       (do ()
+           (nil)
+         (let* ((head (fd-stream-ibuf-head stream))
+                (tail (fd-stream-ibuf-tail stream))
+                (sap (fd-stream-ibuf-sap stream)))
+           (declare (type index head tail))
+           ;; Copy data from stream buffer into user's buffer.
+           (do ()
+               ((or (= tail head) (= requested total-copied)))
+             (let* ((byte (sap-ref-8 sap head)))
+               (when (> ,size (- tail head))
+                 (return))
+               (setf (aref buffer (+ start total-copied)) ,in-expr)
+               (incf total-copied)
+               (incf head ,size)))
+           (setf (fd-stream-ibuf-head stream) head)
+           ;; Maybe we need to refill the stream buffer.
+           (cond ( ;; If there were enough data in the stream buffer, we're done.
+                  (= total-copied requested)
+                  (return total-copied))
+                 ( ;; If EOF, we're done in another way.
+                   (null (catch 'eof-input-catcher (refill-buffer/fd stream)))
+                  (if eof-error-p
+                      (error 'end-of-file :stream stream)
+                      (return total-copied)))
+                 ;; Otherwise we refilled the stream buffer, so fall
+                 ;; through into another pass of the loop.
+                 ))))
+      (def-input-routine ,in-char-function (character ,size sap head)
+       (let ((byte (sap-ref-8 sap head)))
+         ,in-expr))
+      (setf *external-formats*
+       (cons '(,external-format ,in-function ,in-char-function ,out-function
+              ,@(mapcar #'(lambda (buffering)
+                            (intern (format nil format (string buffering))))
+                        '(:none :line :full)))
+       *external-formats*)))))
+
+(defmacro define-external-format/variable-width
+    (external-format output-restart out-size-expr
+     out-expr in-size-expr in-expr)
+  (let* ((name (first external-format))
+        (out-function (symbolicate "OUTPUT-BYTES/" name))
+        (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
+        (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
+        (in-char-function (symbolicate "INPUT-CHAR/" name))
+        (resync-function (symbolicate "RESYNC/" name)))
+    `(progn
+      (defun ,out-function (fd-stream string flush-p start end)
+       (let ((start (or start 0))
+             (end (or end (length string))))
+         (declare (type index start end))
+         (when (and (not (fd-stream-dual-channel-p fd-stream))
+                    (> (fd-stream-ibuf-tail fd-stream)
+                       (fd-stream-ibuf-head fd-stream)))
+           (file-position fd-stream (file-position fd-stream)))
+         (when (< end start)
+           (error ":END before :START!"))
+         (do ()
+             ((= end start))
+           (setf (fd-stream-obuf-tail fd-stream)
+                 (do* ((len (fd-stream-obuf-length fd-stream))
+                       (sap (fd-stream-obuf-sap fd-stream))
+                       (tail (fd-stream-obuf-tail fd-stream)))
+                      ((or (= start end) (< (- len tail) 4)) tail)
+                   ,(if output-restart
+                        `(catch 'output-nothing
+                           (let* ((byte (aref string start))
+                                  (bits (char-code byte))
+                                  (size ,out-size-expr))
+                             ,out-expr
+                             (incf tail size)
+                             (incf start)))
+                        `(let* ((byte (aref string start))
+                                (bits (char-code byte))
+                                (size ,out-size-expr))
+                           ,out-expr
+                           (incf tail size)))
+                   (incf start)))
+           (when (< start end)
+             (flush-output-buffer fd-stream)))
+         (when flush-p
+           (flush-output-buffer fd-stream))))
+      (def-output-routines/variable-width (,format
+                                          ,out-size-expr
+                                           ,output-restart
+                                          ,external-format
+                                          (:none character)
+                                          (:line character)
+                                          (:full character))
+         (if (char= byte #\Newline)
+             (setf (fd-stream-char-pos stream) 0)
+             (incf (fd-stream-char-pos stream)))
+       (let ((bits (char-code byte))
+             (sap (fd-stream-obuf-sap stream))
+             (tail (fd-stream-obuf-tail stream)))
+         ,out-expr))
+      (defun ,in-function (stream buffer start requested eof-error-p
+                          &aux (total-copied 0))
+       (declare (type fd-stream stream))
+       (declare (type index start requested total-copied))
+       (let ((unread (fd-stream-unread stream)))
+         (when unread
+           (setf (aref buffer start) unread)
+           (setf (fd-stream-unread stream) nil)
+           (setf (fd-stream-listen stream) nil)
+           (incf total-copied)))
+       (do ()
+           (nil)
+         (let* ((head (fd-stream-ibuf-head stream))
+                (tail (fd-stream-ibuf-tail stream))
+                (sap (fd-stream-ibuf-sap stream))
+                (head-start head)
+                (decode-break-reason nil))
+           (declare (type index head tail))
+           ;; Copy data from stream buffer into user's buffer.
+           (do ((size nil nil))
+               ((or (= tail head) (= requested total-copied)))
+             (setf decode-break-reason
+                   (block decode-break-reason
+                     (let ((byte (sap-ref-8 sap head)))
+                       (setq size ,in-size-expr)
+                       (when (> size (- tail head))
+                         (return))
+                       (setf (aref buffer (+ start total-copied)) ,in-expr)
+                       (incf total-copied)
+                       (incf head size))
+                     nil))
+             (setf (fd-stream-ibuf-head stream) head)
+             (when (and decode-break-reason
+                        (= head head-start))
+               (when (stream-decoding-error-and-handle
+                      stream decode-break-reason)
+                 (if eof-error-p
+                     (error 'end-of-file :stream stream)
+                     (return-from ,in-function total-copied)))
+               (setf head (fd-stream-ibuf-head stream))
+               (setf tail (fd-stream-ibuf-tail stream)))
+             (when (plusp total-copied)
+               (return-from ,in-function total-copied)))
+           (setf (fd-stream-ibuf-head stream) head)
+           ;; Maybe we need to refill the stream buffer.
+           (cond ( ;; If there were enough data in the stream buffer, we're done.
+                  (= total-copied requested)
+                  (return total-copied))
+                 ( ;; If EOF, we're done in another way.
+                  (or (eq decode-break-reason 'eof)
+                       (null (catch 'eof-input-catcher 
+                               (refill-buffer/fd stream))))
+                  (if eof-error-p
+                      (error 'end-of-file :stream stream)
+                      (return total-copied)))
+                 ;; Otherwise we refilled the stream buffer, so fall
+                 ;; through into another pass of the loop.
+                 ))))
+      (def-input-routine/variable-width ,in-char-function (character
+                                                          ,external-format
+                                                          ,in-size-expr
+                                                          sap head)
+       (let ((byte (sap-ref-8 sap head)))
+         ,in-expr))
+      (defun ,resync-function (stream)
+        (loop (input-at-least stream 1)
+              (incf (fd-stream-ibuf-head stream))
+              (unless (block decode-break-reason
+                       (let* ((sap (fd-stream-ibuf-sap stream))
+                              (head (fd-stream-ibuf-head stream))
+                              (byte (sap-ref-8 sap head))
+                              (size ,in-size-expr))
+                         ,in-expr)
+                       nil)
+                (return))))
+      (setf *external-formats*
+       (cons '(,external-format ,in-function ,in-char-function ,out-function
+              ,@(mapcar #'(lambda (buffering)
+                            (intern (format nil format (string buffering))))
+                        '(:none :line :full))
+              ,resync-function)
+       *external-formats*)))))
+
+(define-external-format (:latin-1 :latin1 :iso-8859-1)
+    1 t
+  (if (>= bits 256)
+      (stream-encoding-error-and-handle stream bits)
+      (setf (sap-ref-8 sap tail) bits))
+  (code-char byte))
+
+(define-external-format (:ascii :us-ascii :ansi_x3.4-1968 
+                         :iso-646 :iso-646-us :|646|)
+    1 t
+  (if (>= bits 128)
+      (stream-encoding-error-and-handle stream bits)
+      (setf (sap-ref-8 sap tail) bits))
+  (code-char byte))
+
+(let* ((table (let ((s (make-string 256)))
+               (map-into s #'code-char
+                         '(#x00 #x01 #x02 #x03 #x9c #x09 #x86 #x7f #x97 #x8d #x8e #x0b #x0c #x0d #x0e #x0f
+                           #x10 #x11 #x12 #x13 #x9d #x85 #x08 #x87 #x18 #x19 #x92 #x8f #x1c #x1d #x1e #x1f
+                           #x80 #x81 #x82 #x83 #x84 #x0a #x17 #x1b #x88 #x89 #x8a #x8b #x8c #x05 #x06 #x07
+                           #x90 #x91 #x16 #x93 #x94 #x95 #x96 #x04 #x98 #x99 #x9a #x9b #x14 #x15 #x9e #x1a
+                           #x20 #xa0 #xe2 #xe4 #xe0 #xe1 #xe3 #xe5 #xe7 #xf1 #xa2 #x2e #x3c #x28 #x2b #x7c
+                           #x26 #xe9 #xea #xeb #xe8 #xed #xee #xef #xec #xdf #x21 #x24 #x2a #x29 #x3b #xac
+                           #x2d #x2f #xc2 #xc4 #xc0 #xc1 #xc3 #xc5 #xc7 #xd1 #xa6 #x2c #x25 #x5f #x3e #x3f
+                           #xf8 #xc9 #xca #xcb #xc8 #xcd #xce #xcf #xcc #x60 #x3a #x23 #x40 #x27 #x3d #x22
+                           #xd8 #x61 #x62 #x63 #x64 #x65 #x66 #x67 #x68 #x69 #xab #xbb #xf0 #xfd #xfe #xb1
+                           #xb0 #x6a #x6b #x6c #x6d #x6e #x6f #x70 #x71 #x72 #xaa #xba #xe6 #xb8 #xc6 #xa4
+                           #xb5 #x7e #x73 #x74 #x75 #x76 #x77 #x78 #x79 #x7a #xa1 #xbf #xd0 #xdd #xde #xae
+                           #x5e #xa3 #xa5 #xb7 #xa9 #xa7 #xb6 #xbc #xbd #xbe #x5b #x5d #xaf #xa8 #xb4 #xd7
+                           #x7b #x41 #x42 #x43 #x44 #x45 #x46 #x47 #x48 #x49 #xad #xf4 #xf6 #xf2 #xf3 #xf5
+                           #x7d #x4a #x4b #x4c #x4d #x4e #x4f #x50 #x51 #x52 #xb9 #xfb #xfc #xf9 #xfa #xff
+                           #x5c #xf7 #x53 #x54 #x55 #x56 #x57 #x58 #x59 #x5a #xb2 #xd4 #xd6 #xd2 #xd3 #xd5
+                           #x30 #x31 #x32 #x33 #x34 #x35 #x36 #x37 #x38 #x39 #xb3 #xdb #xdc #xd9 #xda #x9f))
+               s))
+       (reverse-table (let ((rt (make-array 256 :element-type '(unsigned-byte 8) :initial-element 0)))
+                         (loop for char across table for i from 0
+                              do (aver (= 0 (aref rt (char-code char))))
+                              do (setf (aref rt (char-code char)) i))
+                         rt)))
+  (define-external-format (:ebcdic-us :ibm-037 :ibm037)
+      1 t
+    (if (>= bits 256)
+       (stream-encoding-error-and-handle stream bits)
+       (setf (sap-ref-8 sap tail) (aref reverse-table bits)))
+    (aref table byte)))
+    
+
+#!+sb-unicode
+(let ((latin-9-table (let ((table (make-string 256)))
+                       (do ((i 0 (1+ i)))
+                           ((= i 256))
+                         (setf (aref table i) (code-char i)))
+                       (setf (aref table #xa4) (code-char #x20ac))
+                       (setf (aref table #xa6) (code-char #x0160))
+                       (setf (aref table #xa8) (code-char #x0161))
+                       (setf (aref table #xb4) (code-char #x017d))
+                       (setf (aref table #xb8) (code-char #x017e))
+                       (setf (aref table #xbc) (code-char #x0152))
+                       (setf (aref table #xbd) (code-char #x0153))
+                       (setf (aref table #xbe) (code-char #x0178))
+                       table))
+      (latin-9-reverse-1 (make-array 16
+                                     :element-type '(unsigned-byte 21)
+                                     :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0)))
+      (latin-9-reverse-2 (make-array 16
+                                     :element-type '(unsigned-byte 8)
+                                     :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0))))
+  (define-external-format (:latin-9 :latin9 :iso-8859-15)
+      1 t
+    (setf (sap-ref-8 sap tail)
+          (if (< bits 256)
+              (if (= bits (char-code (aref latin-9-table bits)))
+                  bits
+                  (stream-encoding-error-and-handle stream byte))
+              (if (= (aref latin-9-reverse-1 (logand bits 15)) bits)
+                  (aref latin-9-reverse-2 (logand bits 15))
+                  (stream-encoding-error-and-handle stream byte))))
+    (aref latin-9-table byte)))
+
+(define-external-format/variable-width (:utf-8 :utf8) nil
+  (let ((bits (char-code byte)))
+    (cond ((< bits #x80) 1)
+         ((< bits #x800) 2)
+         ((< bits #x10000) 3)
+         (t 4)))
+  (ecase size
+    (1 (setf (sap-ref-8 sap tail) bits))
+    (2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits))
+            (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 0) bits))))
+    (3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits))
+            (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 6) bits))
+            (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits))))
+    (4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits))
+            (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 12) bits))
+            (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits))
+            (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits)))))
+  (cond ((< byte #x80) 1)
+       ((< byte #xc2) (return-from decode-break-reason 1))
+       ((< byte #xe0) 2)
+       ((< byte #xf0) 3)
+       (t 4))
+  (code-char (ecase size
+              (1 byte)
+              (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
+                   (unless (<= #x80 byte2 #xbf)
+                     (return-from decode-break-reason 2))
+                   (dpb byte (byte 5 6) byte2)))
+              (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
+                       (byte3 (sap-ref-8 sap (+ 2 head))))
+                   (unless (and (<= #x80 byte2 #xbf)
+                                (<= #x80 byte3 #xbf))
+                     (return-from decode-break-reason 3))
+                   (dpb byte (byte 4 12) (dpb byte2 (byte 6 6) byte3))))
+              (4 (let ((byte2 (sap-ref-8 sap (1+ head)))
+                       (byte3 (sap-ref-8 sap (+ 2 head)))
+                       (byte4 (sap-ref-8 sap (+ 3 head))))
+                   (unless (and (<= #x80 byte2 #xbf)
+                                (<= #x80 byte3 #xbf)
+                                (<= #x80 byte4 #xbf))
+                     (return-from decode-break-reason 4))
+                   (dpb byte (byte 3 18)
+                        (dpb byte2 (byte 6 12)
+                             (dpb byte3 (byte 6 6) byte4))))))))
 \f
 ;;;; utility functions (misc routines, etc)
 
        (input-type nil)
        (output-type nil)
        (input-size nil)
-       (output-size nil))
+       (output-size nil)
+       (character-stream-p (subtypep type 'character)))
 
     (when (fd-stream-obuf-sap fd-stream)
       (push (fd-stream-obuf-sap fd-stream) *available-buffers*)
       (push (fd-stream-ibuf-sap fd-stream) *available-buffers*)
       (setf (fd-stream-ibuf-sap fd-stream) nil))
 
+    (when (and character-stream-p
+              (eq (fd-stream-external-format fd-stream) :default))
+      (/show0 "/getting default external format")
+      (setf (fd-stream-external-format fd-stream)
+            (default-external-format))
+      (/show0 "cold-printing defaulted external-format:")
+      #!+sb-show
+      (cold-print (fd-stream-external-format fd-stream))
+      (/show0 "matching to known aliases")
+      (dolist (entry *external-formats*
+                    (restart-case
+                         (error "Invalid external-format ~A" 
+                                (fd-stream-external-format fd-stream))
+                     (use-default ()
+                        :report "Set external format to LATIN-1"
+                        (setf (fd-stream-external-format fd-stream) :latin-1))))
+        (/show0 "cold printing known aliases:")
+        #!+sb-show
+        (dolist (alias (first entry)) (cold-print alias))
+        (/show0 "done cold-printing known aliases")
+       (when (member (fd-stream-external-format fd-stream) (first entry))
+          (/show0 "matched")
+         (return)))
+      (/show0 "/default external format ok"))
+    
     (when input-p
-      (multiple-value-bind (routine type size)
-         (pick-input-routine target-type)
+      (multiple-value-bind (routine type size read-n-characters
+                                    normalized-external-format)
+         (pick-input-routine target-type
+                              (fd-stream-external-format fd-stream))
+        (when normalized-external-format
+          (setf (fd-stream-external-format fd-stream)
+                normalized-external-format))
        (unless routine
          (error "could not find any input routine for ~S" target-type))
        (setf (fd-stream-ibuf-sap fd-stream) (next-available-buffer))
        (setf (fd-stream-ibuf-length fd-stream) bytes-per-buffer)
        (setf (fd-stream-ibuf-tail fd-stream) 0)
-       (if (subtypep type 'character)
+       (if character-stream-p
            (setf (fd-stream-in fd-stream) routine
                  (fd-stream-bin fd-stream) #'ill-bin)
            (setf (fd-stream-in fd-stream) #'ill-in
                  (fd-stream-bin fd-stream) routine))
        (when (eql size 1)
-         (setf (fd-stream-n-bin fd-stream) #'fd-stream-read-n-bytes)
-         (when buffer-p
-           (setf (ansi-stream-in-buffer fd-stream)
-                 (make-array +ansi-stream-in-buffer-length+
-                             :element-type '(unsigned-byte 8)))))
+         (setf (fd-stream-n-bin fd-stream)
+                (if character-stream-p
+                    read-n-characters
+                    #'fd-stream-read-n-bytes))
+         (when (and buffer-p
+                    ;; We only create this buffer for streams of type
+                    ;; (unsigned-byte 8).  Because there's no buffer, the
+                    ;; other element-types will dispatch to the appropriate
+                    ;; input (output) routine in fast-read-byte.
+                    (or character-stream-p
+                        (equal target-type '(unsigned-byte 8)))
+                    (not output-p) ; temporary disable on :io streams
+                    #+(or)
+                    (or (eq type 'unsigned-byte)
+                        (eq type :default)))
+            (if character-stream-p
+                (setf (ansi-stream-cin-buffer fd-stream)
+                      (make-array +ansi-stream-in-buffer-length+
+                                  :element-type 'character))
+                (setf (ansi-stream-in-buffer fd-stream)
+                      (make-array +ansi-stream-in-buffer-length+
+                                  :element-type '(unsigned-byte 8))))))
        (setf input-size size)
        (setf input-type type)))
 
     (when output-p
-      (multiple-value-bind (routine type size)
-         (pick-output-routine target-type (fd-stream-buffering fd-stream))
+      (multiple-value-bind (routine type size output-bytes
+                                   normalized-external-format)
+         (pick-output-routine target-type
+                              (fd-stream-buffering fd-stream)
+                              (fd-stream-external-format fd-stream))
+       (when normalized-external-format
+         (setf (fd-stream-external-format fd-stream)
+               normalized-external-format))
        (unless routine
          (error "could not find any output routine for ~S buffered ~S"
                 (fd-stream-buffering fd-stream)
        (setf (fd-stream-obuf-sap fd-stream) (next-available-buffer))
        (setf (fd-stream-obuf-length fd-stream) bytes-per-buffer)
        (setf (fd-stream-obuf-tail fd-stream) 0)
-       (if (subtypep type 'character)
+       (when character-stream-p
+         (setf (fd-stream-output-bytes fd-stream) output-bytes))
+       (if character-stream-p
          (setf (fd-stream-out fd-stream) routine
                (fd-stream-bout fd-stream) #'ill-bout)
          (setf (fd-stream-out fd-stream)
                (or (if (eql size 1)
-                     (pick-output-routine 'base-char
-                                          (fd-stream-buffering fd-stream)))
+                         (pick-output-routine
+                          'base-char (fd-stream-buffering fd-stream)))
                    #'ill-out)
                (fd-stream-bout fd-stream) routine))
        (setf (fd-stream-sout fd-stream)
      (setf (fd-stream-unread fd-stream) arg1)
      (setf (fd-stream-listen fd-stream) t))
     (:close
-     (cond (arg1
-           ;; We got us an abort on our hands.
+     (cond (arg1 ; We got us an abort on our hands.
            (when (fd-stream-handler fd-stream)
-                 (sb!sys:remove-fd-handler (fd-stream-handler fd-stream))
-                 (setf (fd-stream-handler fd-stream) nil))
+             (sb!sys:remove-fd-handler (fd-stream-handler fd-stream))
+             (setf (fd-stream-handler fd-stream) nil))
+           ;; We can't do anything unless we know what file were
+           ;; dealing with, and we don't want to do anything
+           ;; strange unless we were writing to the file.
            (when (and (fd-stream-file fd-stream)
                       (fd-stream-obuf-sap fd-stream))
-             ;; We can't do anything unless we know what file were
-             ;; dealing with, and we don't want to do anything
-             ;; strange unless we were writing to the file.
              (if (fd-stream-original fd-stream)
-                 ;; We have a handle on the original, just revert.
-                 (multiple-value-bind (okay err)
-                     (sb!unix:unix-rename (fd-stream-original fd-stream)
-                                          (fd-stream-file fd-stream))
-                   (unless okay
-                     (simple-stream-perror
-                      "couldn't restore ~S to its original contents"
-                      fd-stream
-                      err)))
-                 ;; We can't restore the original, so nuke that puppy.
+                 ;; If the original is EQ to file we are appending
+                 ;; and can just close the file without renaming.
+                 (unless (eq (fd-stream-original fd-stream)
+                             (fd-stream-file fd-stream))
+                   ;; We have a handle on the original, just revert.
+                   (multiple-value-bind (okay err)
+                       (sb!unix:unix-rename (fd-stream-original fd-stream)
+                                            (fd-stream-file fd-stream))
+                     (unless okay
+                       (simple-stream-perror
+                        "couldn't restore ~S to its original contents"
+                        fd-stream
+                        err))))
+                 ;; We can't restore the original, and aren't
+                 ;; appending, so nuke that puppy.
+                 ;;
+                 ;; FIXME: This is currently the fate of superseded
+                 ;; files, and according to the CLOSE spec this is
+                 ;; wrong. However, there seems to be no clean way to
+                 ;; do that that doesn't involve either copying the
+                 ;; data (bad if the :abort resulted from a full
+                 ;; disk), or renaming the old file temporarily
+                 ;; (probably bad because stream opening becomes more
+                 ;; racy).
                  (multiple-value-bind (okay err)
                      (sb!unix:unix-unlink (fd-stream-file fd-stream))
                    (unless okay
                                                 0
                                                 0))))
          (cond ((eql count 1)
-                (frob-input fd-stream)
+                (refill-buffer/fd fd-stream)
                 (setf (fd-stream-ibuf-head fd-stream) 0)
                 (setf (fd-stream-ibuf-tail fd-stream) 0))
                (t
        (sb!sys:serve-all-events)))
     (:element-type
      (fd-stream-element-type fd-stream))
+    (:external-format
+     (fd-stream-external-format fd-stream))
     (:interactive-p
-      ;; FIXME: sb!unix:unix-isatty is undefined.
      (= 1 (the (member 0 1)
             (sb!unix:unix-isatty (fd-stream-fd fd-stream)))))
     (:line-length
        ;; appropriate value for the EXPECTED-TYPE slot..
        (error 'simple-type-error
               :datum fd-stream
-              :expected-type 'file-stream
+              :expected-type 'fd-stream
               :format-control "~S is not a stream associated with a file."
               :format-arguments (list fd-stream)))
      (multiple-value-bind (okay dev ino mode nlink uid gid rdev size
        (if (zerop mode)
           nil
           (truncate size (fd-stream-element-size fd-stream)))))
+    ;; FIXME: I doubt this is correct in the presence of Unicode,
+    ;; since fd-stream FILE-POSITION is measured in bytes. 
+    (:file-string-length
+     (etypecase arg1
+       (character 1)
+       (string (length arg1))))
     (:file-position
      (fd-stream-file-position fd-stream arg1))))
 
 (defun fd-stream-file-position (stream &optional newpos)
-  (declare (type file-stream stream)
-          (type (or index (member nil :start :end)) newpos))
+  (declare (type fd-stream stream)
+          (type (or (alien sb!unix:off-t) (member nil :start :end)) newpos))
   (if (null newpos)
       (sb!sys:without-interrupts
        ;; First, find the position of the UNIX file descriptor in the file.
        (multiple-value-bind (posn errno)
            (sb!unix:unix-lseek (fd-stream-fd stream) 0 sb!unix:l_incr)
-         (declare (type (or index null) posn))
-         (cond ((fixnump posn)
+         (declare (type (or (alien sb!unix:off-t) null) posn))
+         (cond ((integerp posn)
                 ;; Adjust for buffered output: If there is any output
                 ;; buffered, the *real* file position will be larger
                 ;; than reported by lseek() because lseek() obviously
                 ;; cannot take into account output we have not sent
                 ;; yet.
                 (dolist (later (fd-stream-output-later stream))
-                  (incf posn (- (the index (caddr later))
-                                (the index (cadr later)))))
+                  (incf posn (- (caddr later)
+                                (cadr later))))
                 (incf posn (fd-stream-obuf-tail stream))
                 ;; Adjust for unread input: If there is any input
                 ;; read from UNIX but not supplied to the user of the
                                         stream
                                         errno))))))
       (let ((offset 0) origin)
-       (declare (type index offset))
+       (declare (type (alien sb!unix:off-t) offset))
        ;; Make sure we don't have any output pending, because if we
        ;; move the file pointer before writing this stuff, it will be
        ;; written in the wrong location.
               (setf offset 0 origin sb!unix:l_set))
              ((eq newpos :end)
               (setf offset 0 origin sb!unix:l_xtnd))
-             ((typep newpos 'index)
+             ((typep newpos '(alien sb!unix:off-t))
               (setf offset (* newpos (fd-stream-element-size stream))
                     origin sb!unix:l_set))
              (t
               (error "invalid position given to FILE-POSITION: ~S" newpos)))
        (multiple-value-bind (posn errno)
            (sb!unix:unix-lseek (fd-stream-fd stream) offset origin)
-         (cond ((typep posn 'fixnum)
+         (cond ((typep posn '(alien sb!unix:off-t))
                 t)
                ((eq errno sb!unix:espipe)
                 nil)
                       (output nil output-p)
                       (element-type 'base-char)
                       (buffering :full)
+                      (external-format :default)
                       timeout
                       file
                       original
                       delete-original
                       pathname
                       input-buffer-p
+                      dual-channel-p
                       (name (if file
                                 (format nil "file ~S" file)
                                 (format nil "descriptor ~W" fd)))
                                 :delete-original delete-original
                                 :pathname pathname
                                 :buffering buffering
+                                :dual-channel-p dual-channel-p
+                                :external-format external-format
                                 :timeout timeout)))
     (set-fd-stream-routines stream element-type input output input-buffer-p)
     (when (and auto-close (fboundp 'finalize))
    :DIRECTION - one of :INPUT, :OUTPUT, :IO, or :PROBE
    :ELEMENT-TYPE - the type of object to read or write, default BASE-CHAR
    :IF-EXISTS - one of :ERROR, :NEW-VERSION, :RENAME, :RENAME-AND-DELETE,
-                      :OVERWRITE, :APPEND, :SUPERSEDE or NIL
-   :IF-DOES-NOT-EXIST - one of :ERROR, :CREATE or nil
+                       :OVERWRITE, :APPEND, :SUPERSEDE or NIL
+   :IF-DOES-NOT-EXIST - one of :ERROR, :CREATE or NIL
   See the manual for details."
 
-  (unless (eq external-format :default)
-    (error "Any external format other than :DEFAULT isn't recognized."))
-
-  ;; First, make sure that DIRECTION is valid.
-  (ensure-one-of direction
-                '(:input :output :io :probe)
-                :direction)
-
   ;; Calculate useful stuff.
   (multiple-value-bind (input output mask)
       (case direction
           (namestring
            (cond ((unix-namestring pathname input))
                  ((and input (eq if-does-not-exist :create))
+                  (unix-namestring pathname nil))
+                 ((and (eq direction :io) (not if-does-not-exist-given))
                   (unix-namestring pathname nil)))))
       ;; Process if-exists argument if we are doing any output.
       (cond (output
                                     :append :supersede nil)
                            :if-exists)
             (case if-exists
-              ((:error nil)
+              ((:new-version :error nil)
                (setf mask (logior mask sb!unix:o_excl)))
               ((:rename :rename-and-delete)
                (setf mask (logior mask sb!unix:o_creat)))
-              ((:new-version :supersede)
+              ((:supersede)
                (setf mask (logior mask sb!unix:o_trunc)))
               (:append
                (setf mask (logior mask sb!unix:o_append)))))
       (if (eq if-does-not-exist :create)
        (setf mask (logior mask sb!unix:o_creat)))
 
-      (let ((original (if (member if-exists
-                                 '(:rename :rename-and-delete))
-                         (pick-backup-name namestring)))
+      (let ((original (case if-exists
+                       ((:rename :rename-and-delete)
+                        (pick-backup-name namestring))
+                       ((:append :overwrite)
+                        ;; KLUDGE: Provent CLOSE from deleting
+                        ;; appending streams when called with :ABORT T
+                        namestring)))
            (delete-original (eq if-exists :rename-and-delete))
            (mode #o666))
-       (when original
+       (when (and original (not (eq original namestring)))
          ;; We are doing a :RENAME or :RENAME-AND-DELETE. Determine
          ;; whether the file already exists, make sure the original
          ;; file is not a directory, and keep the mode.
                      (logior (logandc2 mask sb!unix:o_creat)
                              sb!unix:o_trunc)))
              (setf if-exists :supersede))))
-       
+
        ;; Now we can try the actual Unix open(2).
        (multiple-value-bind (fd errno)
            (if namestring
                                      :input input
                                      :output output
                                      :element-type element-type
+                                     :external-format external-format
                                      :file namestring
                                      :original original
                                      :delete-original delete-original
                                      :pathname pathname
+                                     :dual-channel-p nil
                                      :input-buffer-p t
                                      :auto-close t))
                     (:probe
                      (open-error "~@<The path ~2I~_~S ~I~_does not exist.~:>"
                                  pathname))
                     (t nil)))
-                 ((and (eql errno sb!unix:eexist) if-exists)
+                 ((and (eql errno sb!unix:eexist) (null if-exists))
                   nil)
                  (t
                   (vanilla-open-error)))))))))
        (make-fd-stream 1 :name "standard output" :output t :buffering :line))
   (setf *stderr*
        (make-fd-stream 2 :name "standard error" :output t :buffering :line))
-  (let ((tty (sb!unix:unix-open "/dev/tty" sb!unix:o_rdwr #o666)))
+  (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string))
+        (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666)))
     (if tty
        (setf *tty*
              (make-fd-stream tty
 ;;;
 ;;; FIXME: misleading name, screwy interface
 (defun file-name (stream &optional new-name)
-  (when (typep stream 'file-stream)
+  (when (typep stream 'fd-stream)
       (cond (new-name
             (setf (fd-stream-pathname stream) new-name)
             (setf (fd-stream-file stream)
             t)
            (t
             (fd-stream-pathname stream)))))
-\f
-;;;; international character support (which is trivial for our simple
-;;;; character sets)
-
-;;;; (Those who do Lisp only in English might not remember that ANSI
-;;;; requires these functions to be exported from package
-;;;; COMMON-LISP.)
-
-(defun file-string-length (stream object)
-  (declare (type (or string character) object) (type file-stream stream))
-  #!+sb-doc
-  "Return the delta in STREAM's FILE-POSITION that would be caused by writing
-   OBJECT to STREAM. Non-trivial only in implementations that support
-   international character sets."
-  (declare (ignore stream))
-  (etypecase object
-    (character 1)
-    (string (length object))))
-
-(defun stream-external-format (stream)
-  (declare (type file-stream stream) (ignore stream))
-  #!+sb-doc
-  "Return :DEFAULT."
-  :default)