(external-format :default)
;; fixed width, or function to call with a character
(char-size 1 :type (or fixnum function))
- (output-bytes #'ill-out :type function))
+ (output-bytes #'ill-out :type function)
+ ;; a boolean indicating whether the stream is bivalent. For
+ ;; internal use only.
+ (bivalent-p nil :type boolean))
(def!method print-object ((fd-stream fd-stream) stream)
(declare (type stream stream))
(print-unreadable-object (fd-stream stream :type t :identity t)
(defun stream-decoding-error (stream octets)
(error 'stream-decoding-error
+ :external-format (stream-external-format stream)
:stream stream
;; FIXME: dunno how to get at OCTETS currently, or even if
;; that's the right thing to report.
:octets octets))
(defun stream-encoding-error (stream code)
(error 'stream-encoding-error
+ :external-format (stream-external-format stream)
:stream stream
:code code))
(attempt-resync ()
:report (lambda (stream)
(format stream
- "~@<Attempt to resync the stream at a character ~
+ "~@<Attempt to resync the stream at a ~
character boundary and continue.~@:>"))
(fd-stream-resync stream)
nil)
(setf (fd-stream-char-pos stream) (- end last-newline 1))
(incf (fd-stream-char-pos stream) (- end start))))))
-(defvar *external-formats* ()
+(defstruct (external-format
+ (:constructor %make-external-format)
+ (:conc-name ef-)
+ (:predicate external-format-p)
+ (:copier nil))
+ ;; All the names that can refer to this external format. The first
+ ;; one is the canonical name.
+ (names (missing-arg) :type list :read-only t)
+ (read-n-chars-fun (missing-arg) :type function :read-only t)
+ (read-char-fun (missing-arg) :type function :read-only t)
+ (write-n-bytes-fun (missing-arg) :type function :read-only t)
+ (write-char-none-buffered-fun (missing-arg) :type function :read-only t)
+ (write-char-line-buffered-fun (missing-arg) :type function :read-only t)
+ (write-char-full-buffered-fun (missing-arg) :type function :read-only t)
+ ;; Can be nil for fixed-width formats.
+ (resync-fun nil :type (or function null) :read-only t)
+ (bytes-for-char-fun (missing-arg) :type function :read-only t)
+ (read-c-string-fun (missing-arg) :type function :read-only t)
+ (write-c-string-fun (missing-arg) :type function :read-only t)
+ ;; We make these symbols so that a developer working on the octets
+ ;; code can easily redefine things and use the new function definition
+ ;; without redefining the external format as well. The slots above
+ ;; are functions because a developer working with those slots would be
+ ;; redefining the external format anyway.
+ (octets-to-string-sym (missing-arg) :type symbol :read-only t)
+ (string-to-octets-sym (missing-arg) :type symbol :read-only t))
+
+(defvar *external-formats* (make-hash-table)
#!+sb-doc
- "List of all available external formats. Each element is a list of the
- element-type, string input function name, character input function name,
- and string output function name.")
+ "Hashtable of all available external formats. The table maps from
+ external-format names to EXTERNAL-FORMAT structures.")
(defun get-external-format (external-format)
- (dolist (entry *external-formats*)
- (when (member external-format (first entry))
- (return entry))))
+ (gethash external-format *external-formats*))
-(defun get-external-format-function (external-format index)
- (let ((entry (get-external-format external-format)))
- (when entry (nth index entry))))
+(defun get-external-format-or-lose (external-format)
+ (or (get-external-format external-format)
+ (error "Undefined external-format ~A" external-format)))
;;; Find an output routine to use given the type and buffering. Return
;;; as multiple values the routine, the real type transfered, and the
(let ((entry (get-external-format external-format)))
(when entry
(return-from pick-output-routine
- (values (symbol-function (nth (ecase buffering
- (:none 4)
- (:line 5)
- (:full 6))
- entry))
+ (values (ecase buffering
+ (:none (ef-write-char-none-buffered-fun entry))
+ (:line (ef-write-char-line-buffered-fun entry))
+ (:full (ef-write-char-full-buffered-fun entry)))
'character
1
- (symbol-function (fourth entry))
- (first (first entry)))))))
+ (ef-write-n-bytes-fun entry)
+ (first (ef-names entry)))))))
(dolist (entry *output-routines*)
(when (and (subtypep type (first entry))
(eq buffering (second entry))
;;; correct on win32. However, none of the places that use it require
;;; further assurance than "may" versus "will definitely not".
(defun sysread-may-block-p (stream)
- #+win32
+ #!+win32
;; This answers T at EOF on win32, I think.
(not (sb!win32:fd-listen (fd-stream-fd stream)))
- #-win32
+ #!-win32
(sb!unix:with-restarted-syscall (count errno)
(sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set)))
(sb!unix:fd-zero read-fds)
;;; then fill the input buffer, and return the number of bytes read. Throws
;;; to EOF-INPUT-CATCHER if the eof was reached.
(defun refill-input-buffer (stream)
- (let ((fd (fd-stream-fd stream))
- (errno 0)
- (count 0))
- (declare (dynamic-extent fd errno count))
+ (dx-let ((fd (fd-stream-fd stream))
+ (errno 0)
+ (count 0))
(tagbody
;; Check for blocking input before touching the stream, as if
;; we happen to wait we are liable to be interrupted, and the
;;; bytes per element (and for character types string input routine).
(defun pick-input-routine (type &optional external-format)
(when (subtypep type 'character)
- (dolist (entry *external-formats*)
- (when (member external-format (first entry))
+ (let ((entry (get-external-format external-format)))
+ (when entry
(return-from pick-input-routine
- (values (symbol-function (third entry))
+ (values (ef-read-char-fun entry)
'character
1
- (symbol-function (second entry))
- (first (first entry)))))))
+ (ef-read-n-chars-fun entry)
+ (first (ef-names entry)))))))
(dolist (entry *input-routines*)
(when (and (subtypep type (first entry))
(or (not (fourth entry))
))))
(defun fd-stream-resync (stream)
- (dolist (entry *external-formats*)
- (when (member (fd-stream-external-format stream) (first entry))
- (return-from fd-stream-resync
- (funcall (symbol-function (eighth entry)) stream)))))
+ (let ((entry (get-external-format (fd-stream-external-format stream))))
+ (when entry
+ (funcall (ef-resync-fun entry) stream))))
(defun get-fd-stream-character-sizer (stream)
- (dolist (entry *external-formats*)
- (when (member (fd-stream-external-format stream) (first entry))
- (return-from get-fd-stream-character-sizer (ninth entry)))))
+ (let ((entry (get-external-format (fd-stream-external-format stream))))
+ (when entry
+ (ef-bytes-for-char-fun entry))))
(defun fd-stream-character-size (stream char)
(let ((sizer (get-fd-stream-character-sizer stream)))
(defun find-external-format (external-format)
(when external-format
- (find external-format *external-formats* :test #'member :key #'car)))
+ (get-external-format external-format)))
(defun variable-width-external-format-p (ef-entry)
- (when (eighth ef-entry) t))
+ (and ef-entry (not (null (ef-resync-fun ef-entry)))))
(defun bytes-for-char-fun (ef-entry)
- (if ef-entry (symbol-function (ninth ef-entry)) (constantly 1)))
+ (if ef-entry (ef-bytes-for-char-fun ef-entry) (constantly 1)))
-;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp
(defmacro define-external-format (external-format size output-restart
- out-expr in-expr)
+ out-expr in-expr
+ octets-to-string-sym
+ string-to-octets-sym)
(let* ((name (first external-format))
(out-function (symbolicate "OUTPUT-BYTES/" name))
(format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
(declare (ignorable bits byte))
,out-expr)))
,n-buffer)))
- (setf *external-formats*
- (cons '(,external-format ,in-function ,in-char-function ,out-function
- ,@(mapcar #'(lambda (buffering)
- (intern (format nil format (string buffering))))
- '(:none :line :full))
- nil ; no resync-function
- ,size-function ,read-c-string-function ,output-c-string-function)
- *external-formats*)))))
+ (let ((entry (%make-external-format
+ :names ',external-format
+ :read-n-chars-fun #',in-function
+ :read-char-fun #',in-char-function
+ :write-n-bytes-fun #',out-function
+ ,@(mapcan #'(lambda (buffering)
+ (list (intern (format nil "WRITE-CHAR-~A-BUFFERED-FUN" buffering) :keyword)
+ `#',(intern (format nil format (string buffering)))))
+ '(:none :line :full))
+ :resync-fun nil
+ :bytes-for-char-fun #',size-function
+ :read-c-string-fun #',read-c-string-function
+ :write-c-string-fun #',output-c-string-function
+ :octets-to-string-sym ',octets-to-string-sym
+ :string-to-octets-sym ',string-to-octets-sym)))
+ (dolist (ef ',external-format)
+ (setf (gethash ef *external-formats*) entry))))))
(defmacro define-external-format/variable-width
(external-format output-restart out-size-expr
- out-expr in-size-expr in-expr)
+ out-expr in-size-expr in-expr
+ octets-to-string-sym string-to-octets-sym)
(let* ((name (first external-format))
(out-function (symbolicate "OUTPUT-BYTES/" name))
(format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
,out-expr)))
,n-buffer)))
- (setf *external-formats*
- (cons '(,external-format ,in-function ,in-char-function ,out-function
- ,@(mapcar #'(lambda (buffering)
- (intern (format nil format (string buffering))))
- '(:none :line :full))
- ,resync-function
- ,size-function ,read-c-string-function ,output-c-string-function)
- *external-formats*)))))
-
-;;; Multiple names for the :ISO{,-}8859-* families are needed because on
-;;; FreeBSD (and maybe other BSD systems), nl_langinfo("LATIN-1") will
-;;; return "ISO8859-1" instead of "ISO-8859-1".
-(define-external-format (:latin-1 :latin1 :iso-8859-1 :iso8859-1)
- 1 t
- (if (>= bits 256)
- (external-format-encoding-error stream bits)
- (setf (sap-ref-8 sap tail) bits))
- (code-char byte))
-
-(define-external-format (:ascii :us-ascii :ansi_x3.4-1968
- :iso-646 :iso-646-us :|646|)
- 1 t
- (if (>= bits 128)
- (external-format-encoding-error stream bits)
- (setf (sap-ref-8 sap tail) bits))
- (code-char byte))
-
-(let* ((table (let ((s (make-string 256)))
- (map-into s #'code-char
- '(#x00 #x01 #x02 #x03 #x9c #x09 #x86 #x7f #x97 #x8d #x8e #x0b #x0c #x0d #x0e #x0f
- #x10 #x11 #x12 #x13 #x9d #x85 #x08 #x87 #x18 #x19 #x92 #x8f #x1c #x1d #x1e #x1f
- #x80 #x81 #x82 #x83 #x84 #x0a #x17 #x1b #x88 #x89 #x8a #x8b #x8c #x05 #x06 #x07
- #x90 #x91 #x16 #x93 #x94 #x95 #x96 #x04 #x98 #x99 #x9a #x9b #x14 #x15 #x9e #x1a
- #x20 #xa0 #xe2 #xe4 #xe0 #xe1 #xe3 #xe5 #xe7 #xf1 #xa2 #x2e #x3c #x28 #x2b #x7c
- #x26 #xe9 #xea #xeb #xe8 #xed #xee #xef #xec #xdf #x21 #x24 #x2a #x29 #x3b #xac
- #x2d #x2f #xc2 #xc4 #xc0 #xc1 #xc3 #xc5 #xc7 #xd1 #xa6 #x2c #x25 #x5f #x3e #x3f
- #xf8 #xc9 #xca #xcb #xc8 #xcd #xce #xcf #xcc #x60 #x3a #x23 #x40 #x27 #x3d #x22
- #xd8 #x61 #x62 #x63 #x64 #x65 #x66 #x67 #x68 #x69 #xab #xbb #xf0 #xfd #xfe #xb1
- #xb0 #x6a #x6b #x6c #x6d #x6e #x6f #x70 #x71 #x72 #xaa #xba #xe6 #xb8 #xc6 #xa4
- #xb5 #x7e #x73 #x74 #x75 #x76 #x77 #x78 #x79 #x7a #xa1 #xbf #xd0 #xdd #xde #xae
- #x5e #xa3 #xa5 #xb7 #xa9 #xa7 #xb6 #xbc #xbd #xbe #x5b #x5d #xaf #xa8 #xb4 #xd7
- #x7b #x41 #x42 #x43 #x44 #x45 #x46 #x47 #x48 #x49 #xad #xf4 #xf6 #xf2 #xf3 #xf5
- #x7d #x4a #x4b #x4c #x4d #x4e #x4f #x50 #x51 #x52 #xb9 #xfb #xfc #xf9 #xfa #xff
- #x5c #xf7 #x53 #x54 #x55 #x56 #x57 #x58 #x59 #x5a #xb2 #xd4 #xd6 #xd2 #xd3 #xd5
- #x30 #x31 #x32 #x33 #x34 #x35 #x36 #x37 #x38 #x39 #xb3 #xdb #xdc #xd9 #xda #x9f))
- s))
- (reverse-table (let ((rt (make-array 256 :element-type '(unsigned-byte 8) :initial-element 0)))
- (loop for char across table for i from 0
- do (aver (= 0 (aref rt (char-code char))))
- do (setf (aref rt (char-code char)) i))
- rt)))
- (define-external-format (:ebcdic-us :ibm-037 :ibm037)
- 1 t
- (if (>= bits 256)
- (external-format-encoding-error stream bits)
- (setf (sap-ref-8 sap tail) (aref reverse-table bits)))
- (aref table byte)))
-
-
-#!+sb-unicode
-(let ((latin-9-table (let ((table (make-string 256)))
- (do ((i 0 (1+ i)))
- ((= i 256))
- (setf (aref table i) (code-char i)))
- (setf (aref table #xa4) (code-char #x20ac))
- (setf (aref table #xa6) (code-char #x0160))
- (setf (aref table #xa8) (code-char #x0161))
- (setf (aref table #xb4) (code-char #x017d))
- (setf (aref table #xb8) (code-char #x017e))
- (setf (aref table #xbc) (code-char #x0152))
- (setf (aref table #xbd) (code-char #x0153))
- (setf (aref table #xbe) (code-char #x0178))
- table))
- (latin-9-reverse-1 (make-array 16
- :element-type '(unsigned-byte 21)
- :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0)))
- (latin-9-reverse-2 (make-array 16
- :element-type '(unsigned-byte 8)
- :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0))))
- (define-external-format (:latin-9 :latin9 :iso-8859-15 :iso8859-15)
- 1 t
- (setf (sap-ref-8 sap tail)
- (if (< bits 256)
- (if (= bits (char-code (aref latin-9-table bits)))
- bits
- (external-format-encoding-error stream byte))
- (if (= (aref latin-9-reverse-1 (logand bits 15)) bits)
- (aref latin-9-reverse-2 (logand bits 15))
- (external-format-encoding-error stream byte))))
- (aref latin-9-table byte)))
-
-(define-external-format/variable-width (:utf-8 :utf8) nil
- (let ((bits (char-code byte)))
- (cond ((< bits #x80) 1)
- ((< bits #x800) 2)
- ((< bits #x10000) 3)
- (t 4)))
- (ecase size
- (1 (setf (sap-ref-8 sap tail) bits))
- (2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits))
- (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 0) bits))))
- (3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits))
- (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 6) bits))
- (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits))))
- (4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits))
- (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 12) bits))
- (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits))
- (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits)))))
- (cond ((< byte #x80) 1)
- ((< byte #xc2) (return-from decode-break-reason 1))
- ((< byte #xe0) 2)
- ((< byte #xf0) 3)
- (t 4))
- (code-char (ecase size
- (1 byte)
- (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
- (unless (<= #x80 byte2 #xbf)
- (return-from decode-break-reason 2))
- (dpb byte (byte 5 6) byte2)))
- (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
- (byte3 (sap-ref-8 sap (+ 2 head))))
- (unless (and (<= #x80 byte2 #xbf)
- (<= #x80 byte3 #xbf))
- (return-from decode-break-reason 3))
- (dpb byte (byte 4 12) (dpb byte2 (byte 6 6) byte3))))
- (4 (let ((byte2 (sap-ref-8 sap (1+ head)))
- (byte3 (sap-ref-8 sap (+ 2 head)))
- (byte4 (sap-ref-8 sap (+ 3 head))))
- (unless (and (<= #x80 byte2 #xbf)
- (<= #x80 byte3 #xbf)
- (<= #x80 byte4 #xbf))
- (return-from decode-break-reason 4))
- (dpb byte (byte 3 18)
- (dpb byte2 (byte 6 12)
- (dpb byte3 (byte 6 6) byte4))))))))
+ (let ((entry (%make-external-format
+ :names ',external-format
+ :read-n-chars-fun #',in-function
+ :read-char-fun #',in-char-function
+ :write-n-bytes-fun #',out-function
+ ,@(mapcan #'(lambda (buffering)
+ (list (intern (format nil "WRITE-CHAR-~A-BUFFERED-FUN" buffering) :keyword)
+ `#',(intern (format nil format (string buffering)))))
+ '(:none :line :full))
+ :resync-fun #',resync-function
+ :bytes-for-char-fun #',size-function
+ :read-c-string-fun #',read-c-string-function
+ :write-c-string-fun #',output-c-string-function
+ :octets-to-string-sym ',octets-to-string-sym
+ :string-to-octets-sym ',string-to-octets-sym)))
+ (dolist (ef ',external-format)
+ (setf (gethash ef *external-formats*) entry))))))
\f
;;;; utility functions (misc routines, etc)
(do-listen)))))))
(do-listen)))
(:unread
- (setf (fd-stream-unread fd-stream) arg1)
+ ;; If the stream is bivalent, the user might follow an
+ ;; unread-char with a read-byte. In this case, the bookkeeping
+ ;; is simpler if we adjust the buffer head by the number of code
+ ;; units in the character.
+ ;; FIXME: there has to be a proper way to check for bivalence,
+ ;; right?
+ (if (fd-stream-bivalent-p fd-stream)
+ (decf (buffer-head (fd-stream-ibuf fd-stream))
+ (fd-stream-character-size fd-stream arg1))
+ (setf (fd-stream-unread fd-stream) arg1))
(setf (fd-stream-listen fd-stream) t))
(:close
- (cond (arg1 ; We got us an abort on our hands.
+ ;; Drop input buffers
+ (setf (ansi-stream-in-index fd-stream) +ansi-stream-in-buffer-length+
+ (ansi-stream-cin-buffer fd-stream) nil
+ (ansi-stream-in-buffer fd-stream) nil)
+ (cond (arg1
+ ;; We got us an abort on our hands.
(let ((outputp (fd-stream-obuf fd-stream))
(file (fd-stream-file fd-stream))
(orig (fd-stream-original fd-stream)))
:buffering buffering
:dual-channel-p dual-channel-p
:external-format external-format
+ :bivalent-p (eq element-type :default)
:char-size (external-format-char-size external-format)
:timeout
(if timeout
(:io (values t t sb!unix:o_rdwr))
(:probe (values t nil sb!unix:o_rdonly)))
(declare (type index mask))
- (let* ((pathname (merge-pathnames filename))
- (namestring
- (cond ((unix-namestring pathname input))
- ((and input (eq if-does-not-exist :create))
- (unix-namestring pathname nil))
- ((and (eq direction :io) (not if-does-not-exist-given))
- (unix-namestring pathname nil)))))
+ (let* (;; PATHNAME is the pathname we associate with the stream.
+ (pathname (merge-pathnames filename))
+ (physical (physicalize-pathname pathname))
+ (truename (probe-file physical))
+ ;; NAMESTRING is the native namestring we open the file with.
+ (namestring (cond (truename
+ (native-namestring truename :as-file t))
+ ((or (not input)
+ (and input (eq if-does-not-exist :create))
+ (and (eq direction :io) (not if-does-not-exist-given)))
+ (native-namestring physical :as-file t)))))
;; Process if-exists argument if we are doing any output.
(cond (output
(unless if-exists-given
(when (and output (= (logand orig-mode #o170000)
#o40000))
(error 'simple-file-error
- :pathname namestring
+ :pathname pathname
:format-control
"can't open ~S for output: is a directory"
:format-arguments (list namestring)))
(cond (new-name
(setf (fd-stream-pathname stream) new-name)
(setf (fd-stream-file stream)
- (unix-namestring new-name nil))
+ (native-namestring (physicalize-pathname new-name)
+ :as-file t))
t)
(t
(fd-stream-pathname stream)))))