X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ffd-stream.lisp;h=655ee641013c41a287a176ba89e87f489142954a;hb=95591ed483dbb8c0846c129953acac1554f28809;hp=1428363c0541ced5448e35dc1e16dc25677f5885;hpb=496071a75429677a2c064e4995c379d3ba6ec458;p=sbcl.git diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 1428363..655ee64 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -187,7 +187,10 @@ (external-format :default) ;; fixed width, or function to call with a character (char-size 1 :type (or fixnum function)) - (output-bytes #'ill-out :type function)) + (output-bytes #'ill-out :type function) + ;; a boolean indicating whether the stream is bivalent. For + ;; internal use only. + (bivalent-p nil :type boolean)) (def!method print-object ((fd-stream fd-stream) stream) (declare (type stream stream)) (print-unreadable-object (fd-stream stream :type t :identity t) @@ -418,12 +421,14 @@ (defun stream-decoding-error (stream octets) (error 'stream-decoding-error + :external-format (stream-external-format stream) :stream stream ;; FIXME: dunno how to get at OCTETS currently, or even if ;; that's the right thing to report. :octets octets)) (defun stream-encoding-error (stream code) (error 'stream-encoding-error + :external-format (stream-external-format stream) :stream stream :code code)) @@ -450,7 +455,7 @@ (attempt-resync () :report (lambda (stream) (format stream - "~@")) (fd-stream-resync stream) nil) @@ -718,20 +723,44 @@ (setf (fd-stream-char-pos stream) (- end last-newline 1)) (incf (fd-stream-char-pos stream) (- end start)))))) -(defvar *external-formats* () +(defstruct (external-format + (:constructor %make-external-format) + (:conc-name ef-) + (:predicate external-format-p) + (:copier nil)) + ;; All the names that can refer to this external format. The first + ;; one is the canonical name. + (names (missing-arg) :type list :read-only t) + (read-n-chars-fun (missing-arg) :type function :read-only t) + (read-char-fun (missing-arg) :type function :read-only t) + (write-n-bytes-fun (missing-arg) :type function :read-only t) + (write-char-none-buffered-fun (missing-arg) :type function :read-only t) + (write-char-line-buffered-fun (missing-arg) :type function :read-only t) + (write-char-full-buffered-fun (missing-arg) :type function :read-only t) + ;; Can be nil for fixed-width formats. + (resync-fun nil :type (or function null) :read-only t) + (bytes-for-char-fun (missing-arg) :type function :read-only t) + (read-c-string-fun (missing-arg) :type function :read-only t) + (write-c-string-fun (missing-arg) :type function :read-only t) + ;; We make these symbols so that a developer working on the octets + ;; code can easily redefine things and use the new function definition + ;; without redefining the external format as well. The slots above + ;; are functions because a developer working with those slots would be + ;; redefining the external format anyway. + (octets-to-string-sym (missing-arg) :type symbol :read-only t) + (string-to-octets-sym (missing-arg) :type symbol :read-only t)) + +(defvar *external-formats* (make-hash-table) #!+sb-doc - "List of all available external formats. Each element is a list of the - element-type, string input function name, character input function name, - and string output function name.") + "Hashtable of all available external formats. The table maps from + external-format names to EXTERNAL-FORMAT structures.") (defun get-external-format (external-format) - (dolist (entry *external-formats*) - (when (member external-format (first entry)) - (return entry)))) + (gethash external-format *external-formats*)) -(defun get-external-format-function (external-format index) - (let ((entry (get-external-format external-format))) - (when entry (nth index entry)))) +(defun get-external-format-or-lose (external-format) + (or (get-external-format external-format) + (error "Undefined external-format ~A" external-format))) ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the @@ -741,15 +770,14 @@ (let ((entry (get-external-format external-format))) (when entry (return-from pick-output-routine - (values (symbol-function (nth (ecase buffering - (:none 4) - (:line 5) - (:full 6)) - entry)) + (values (ecase buffering + (:none (ef-write-char-none-buffered-fun entry)) + (:line (ef-write-char-line-buffered-fun entry)) + (:full (ef-write-char-full-buffered-fun entry))) 'character 1 - (symbol-function (fourth entry)) - (first (first entry))))))) + (ef-write-n-bytes-fun entry) + (first (ef-names entry))))))) (dolist (entry *output-routines*) (when (and (subtypep type (first entry)) (eq buffering (second entry)) @@ -821,10 +849,10 @@ ;;; correct on win32. However, none of the places that use it require ;;; further assurance than "may" versus "will definitely not". (defun sysread-may-block-p (stream) - #+win32 + #!+win32 ;; This answers T at EOF on win32, I think. (not (sb!win32:fd-listen (fd-stream-fd stream))) - #-win32 + #!-win32 (sb!unix:with-restarted-syscall (count errno) (sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set))) (sb!unix:fd-zero read-fds) @@ -844,11 +872,9 @@ ;;; then fill the input buffer, and return the number of bytes read. Throws ;;; to EOF-INPUT-CATCHER if the eof was reached. (defun refill-input-buffer (stream) - (let ((fd (fd-stream-fd stream)) - (errno 0) - (count 0)) - (declare (optimize sb!c::stack-allocate-value-cells) - (dynamic-extent fd errno count)) + (dx-let ((fd (fd-stream-fd stream)) + (errno 0) + (count 0)) (tagbody ;; Check for blocking input before touching the stream, as if ;; we happen to wait we are liable to be interrupted, and the @@ -1089,14 +1115,14 @@ ;;; bytes per element (and for character types string input routine). (defun pick-input-routine (type &optional external-format) (when (subtypep type 'character) - (dolist (entry *external-formats*) - (when (member external-format (first entry)) + (let ((entry (get-external-format external-format))) + (when entry (return-from pick-input-routine - (values (symbol-function (third entry)) + (values (ef-read-char-fun entry) 'character 1 - (symbol-function (second entry)) - (first (first entry))))))) + (ef-read-n-chars-fun entry) + (first (ef-names entry))))))) (dolist (entry *input-routines*) (when (and (subtypep type (first entry)) (or (not (fourth entry)) @@ -1199,15 +1225,14 @@ )))) (defun fd-stream-resync (stream) - (dolist (entry *external-formats*) - (when (member (fd-stream-external-format stream) (first entry)) - (return-from fd-stream-resync - (funcall (symbol-function (eighth entry)) stream))))) + (let ((entry (get-external-format (fd-stream-external-format stream)))) + (when entry + (funcall (ef-resync-fun entry) stream)))) (defun get-fd-stream-character-sizer (stream) - (dolist (entry *external-formats*) - (when (member (fd-stream-external-format stream) (first entry)) - (return-from get-fd-stream-character-sizer (ninth entry))))) + (let ((entry (get-external-format (fd-stream-external-format stream)))) + (when entry + (ef-bytes-for-char-fun entry)))) (defun fd-stream-character-size (stream char) (let ((sizer (get-fd-stream-character-sizer stream))) @@ -1220,17 +1245,18 @@ (defun find-external-format (external-format) (when external-format - (find external-format *external-formats* :test #'member :key #'car))) + (get-external-format external-format))) (defun variable-width-external-format-p (ef-entry) - (when (eighth ef-entry) t)) + (and ef-entry (not (null (ef-resync-fun ef-entry))))) (defun bytes-for-char-fun (ef-entry) - (if ef-entry (symbol-function (ninth ef-entry)) (constantly 1))) + (if ef-entry (ef-bytes-for-char-fun ef-entry) (constantly 1))) -;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp (defmacro define-external-format (external-format size output-restart - out-expr in-expr) + out-expr in-expr + octets-to-string-sym + string-to-octets-sym) (let* ((name (first external-format)) (out-function (symbolicate "OUTPUT-BYTES/" name)) (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name))) @@ -1395,18 +1421,28 @@ (declare (ignorable bits byte)) ,out-expr))) ,n-buffer))) - (setf *external-formats* - (cons '(,external-format ,in-function ,in-char-function ,out-function - ,@(mapcar #'(lambda (buffering) - (intern (format nil format (string buffering)))) - '(:none :line :full)) - nil ; no resync-function - ,size-function ,read-c-string-function ,output-c-string-function) - *external-formats*))))) + (let ((entry (%make-external-format + :names ',external-format + :read-n-chars-fun #',in-function + :read-char-fun #',in-char-function + :write-n-bytes-fun #',out-function + ,@(mapcan #'(lambda (buffering) + (list (intern (format nil "WRITE-CHAR-~A-BUFFERED-FUN" buffering) :keyword) + `#',(intern (format nil format (string buffering))))) + '(:none :line :full)) + :resync-fun nil + :bytes-for-char-fun #',size-function + :read-c-string-fun #',read-c-string-function + :write-c-string-fun #',output-c-string-function + :octets-to-string-sym ',octets-to-string-sym + :string-to-octets-sym ',string-to-octets-sym))) + (dolist (ef ',external-format) + (setf (gethash ef *external-formats*) entry)))))) (defmacro define-external-format/variable-width (external-format output-restart out-size-expr - out-expr in-size-expr in-expr) + out-expr in-size-expr in-expr + octets-to-string-sym string-to-octets-sym) (let* ((name (first external-format)) (out-function (symbolicate "OUTPUT-BYTES/" name)) (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name))) @@ -1427,7 +1463,7 @@ (declare (type index start end)) (synchronize-stream-output stream) (unless (<= 0 start end (length string)) - (sequence-bounding-indices-bad string start end)) + (sequence-bounding-indices-bad-error string start end)) (do () ((= end start)) (let ((obuf (fd-stream-obuf stream))) @@ -1648,141 +1684,23 @@ ,out-expr))) ,n-buffer))) - (setf *external-formats* - (cons '(,external-format ,in-function ,in-char-function ,out-function - ,@(mapcar #'(lambda (buffering) - (intern (format nil format (string buffering)))) - '(:none :line :full)) - ,resync-function - ,size-function ,read-c-string-function ,output-c-string-function) - *external-formats*))))) - -;;; Multiple names for the :ISO{,-}8859-* families are needed because on -;;; FreeBSD (and maybe other BSD systems), nl_langinfo("LATIN-1") will -;;; return "ISO8859-1" instead of "ISO-8859-1". -(define-external-format (:latin-1 :latin1 :iso-8859-1 :iso8859-1) - 1 t - (if (>= bits 256) - (external-format-encoding-error stream bits) - (setf (sap-ref-8 sap tail) bits)) - (code-char byte)) - -(define-external-format (:ascii :us-ascii :ansi_x3.4-1968 - :iso-646 :iso-646-us :|646|) - 1 t - (if (>= bits 128) - (external-format-encoding-error stream bits) - (setf (sap-ref-8 sap tail) bits)) - (code-char byte)) - -(let* ((table (let ((s (make-string 256))) - (map-into s #'code-char - '(#x00 #x01 #x02 #x03 #x9c #x09 #x86 #x7f #x97 #x8d #x8e #x0b #x0c #x0d #x0e #x0f - #x10 #x11 #x12 #x13 #x9d #x85 #x08 #x87 #x18 #x19 #x92 #x8f #x1c #x1d #x1e #x1f - #x80 #x81 #x82 #x83 #x84 #x0a #x17 #x1b #x88 #x89 #x8a #x8b #x8c #x05 #x06 #x07 - #x90 #x91 #x16 #x93 #x94 #x95 #x96 #x04 #x98 #x99 #x9a #x9b #x14 #x15 #x9e #x1a - #x20 #xa0 #xe2 #xe4 #xe0 #xe1 #xe3 #xe5 #xe7 #xf1 #xa2 #x2e #x3c #x28 #x2b #x7c - #x26 #xe9 #xea #xeb #xe8 #xed #xee #xef #xec #xdf #x21 #x24 #x2a #x29 #x3b #xac - #x2d #x2f #xc2 #xc4 #xc0 #xc1 #xc3 #xc5 #xc7 #xd1 #xa6 #x2c #x25 #x5f #x3e #x3f - #xf8 #xc9 #xca #xcb #xc8 #xcd #xce #xcf #xcc #x60 #x3a #x23 #x40 #x27 #x3d #x22 - #xd8 #x61 #x62 #x63 #x64 #x65 #x66 #x67 #x68 #x69 #xab #xbb #xf0 #xfd #xfe #xb1 - #xb0 #x6a #x6b #x6c #x6d #x6e #x6f #x70 #x71 #x72 #xaa #xba #xe6 #xb8 #xc6 #xa4 - #xb5 #x7e #x73 #x74 #x75 #x76 #x77 #x78 #x79 #x7a #xa1 #xbf #xd0 #xdd #xde #xae - #x5e #xa3 #xa5 #xb7 #xa9 #xa7 #xb6 #xbc #xbd #xbe #x5b #x5d #xaf #xa8 #xb4 #xd7 - #x7b #x41 #x42 #x43 #x44 #x45 #x46 #x47 #x48 #x49 #xad #xf4 #xf6 #xf2 #xf3 #xf5 - #x7d #x4a #x4b #x4c #x4d #x4e #x4f #x50 #x51 #x52 #xb9 #xfb #xfc #xf9 #xfa #xff - #x5c #xf7 #x53 #x54 #x55 #x56 #x57 #x58 #x59 #x5a #xb2 #xd4 #xd6 #xd2 #xd3 #xd5 - #x30 #x31 #x32 #x33 #x34 #x35 #x36 #x37 #x38 #x39 #xb3 #xdb #xdc #xd9 #xda #x9f)) - s)) - (reverse-table (let ((rt (make-array 256 :element-type '(unsigned-byte 8) :initial-element 0))) - (loop for char across table for i from 0 - do (aver (= 0 (aref rt (char-code char)))) - do (setf (aref rt (char-code char)) i)) - rt))) - (define-external-format (:ebcdic-us :ibm-037 :ibm037) - 1 t - (if (>= bits 256) - (external-format-encoding-error stream bits) - (setf (sap-ref-8 sap tail) (aref reverse-table bits))) - (aref table byte))) - - -#!+sb-unicode -(let ((latin-9-table (let ((table (make-string 256))) - (do ((i 0 (1+ i))) - ((= i 256)) - (setf (aref table i) (code-char i))) - (setf (aref table #xa4) (code-char #x20ac)) - (setf (aref table #xa6) (code-char #x0160)) - (setf (aref table #xa8) (code-char #x0161)) - (setf (aref table #xb4) (code-char #x017d)) - (setf (aref table #xb8) (code-char #x017e)) - (setf (aref table #xbc) (code-char #x0152)) - (setf (aref table #xbd) (code-char #x0153)) - (setf (aref table #xbe) (code-char #x0178)) - table)) - (latin-9-reverse-1 (make-array 16 - :element-type '(unsigned-byte 21) - :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0))) - (latin-9-reverse-2 (make-array 16 - :element-type '(unsigned-byte 8) - :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0)))) - (define-external-format (:latin-9 :latin9 :iso-8859-15 :iso8859-15) - 1 t - (setf (sap-ref-8 sap tail) - (if (< bits 256) - (if (= bits (char-code (aref latin-9-table bits))) - bits - (external-format-encoding-error stream byte)) - (if (= (aref latin-9-reverse-1 (logand bits 15)) bits) - (aref latin-9-reverse-2 (logand bits 15)) - (external-format-encoding-error stream byte)))) - (aref latin-9-table byte))) - -(define-external-format/variable-width (:utf-8 :utf8) nil - (let ((bits (char-code byte))) - (cond ((< bits #x80) 1) - ((< bits #x800) 2) - ((< bits #x10000) 3) - (t 4))) - (ecase size - (1 (setf (sap-ref-8 sap tail) bits)) - (2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits)) - (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 0) bits)))) - (3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits)) - (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 6) bits)) - (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits)))) - (4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits)) - (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 12) bits)) - (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits)) - (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits))))) - (cond ((< byte #x80) 1) - ((< byte #xc2) (return-from decode-break-reason 1)) - ((< byte #xe0) 2) - ((< byte #xf0) 3) - (t 4)) - (code-char (ecase size - (1 byte) - (2 (let ((byte2 (sap-ref-8 sap (1+ head)))) - (unless (<= #x80 byte2 #xbf) - (return-from decode-break-reason 2)) - (dpb byte (byte 5 6) byte2))) - (3 (let ((byte2 (sap-ref-8 sap (1+ head))) - (byte3 (sap-ref-8 sap (+ 2 head)))) - (unless (and (<= #x80 byte2 #xbf) - (<= #x80 byte3 #xbf)) - (return-from decode-break-reason 3)) - (dpb byte (byte 4 12) (dpb byte2 (byte 6 6) byte3)))) - (4 (let ((byte2 (sap-ref-8 sap (1+ head))) - (byte3 (sap-ref-8 sap (+ 2 head))) - (byte4 (sap-ref-8 sap (+ 3 head)))) - (unless (and (<= #x80 byte2 #xbf) - (<= #x80 byte3 #xbf) - (<= #x80 byte4 #xbf)) - (return-from decode-break-reason 4)) - (dpb byte (byte 3 18) - (dpb byte2 (byte 6 12) - (dpb byte3 (byte 6 6) byte4)))))))) + (let ((entry (%make-external-format + :names ',external-format + :read-n-chars-fun #',in-function + :read-char-fun #',in-char-function + :write-n-bytes-fun #',out-function + ,@(mapcan #'(lambda (buffering) + (list (intern (format nil "WRITE-CHAR-~A-BUFFERED-FUN" buffering) :keyword) + `#',(intern (format nil format (string buffering))))) + '(:none :line :full)) + :resync-fun #',resync-function + :bytes-for-char-fun #',size-function + :read-c-string-fun #',read-c-string-function + :write-c-string-fun #',output-c-string-function + :octets-to-string-sym ',octets-to-string-sym + :string-to-octets-sym ',string-to-octets-sym))) + (dolist (ef ',external-format) + (setf (gethash ef *external-formats*) entry)))))) ;;;; utility functions (misc routines, etc) @@ -2041,10 +1959,24 @@ (do-listen))))))) (do-listen))) (:unread - (setf (fd-stream-unread fd-stream) arg1) + ;; If the stream is bivalent, the user might follow an + ;; unread-char with a read-byte. In this case, the bookkeeping + ;; is simpler if we adjust the buffer head by the number of code + ;; units in the character. + ;; FIXME: there has to be a proper way to check for bivalence, + ;; right? + (if (fd-stream-bivalent-p fd-stream) + (decf (buffer-head (fd-stream-ibuf fd-stream)) + (fd-stream-character-size fd-stream arg1)) + (setf (fd-stream-unread fd-stream) arg1)) (setf (fd-stream-listen fd-stream) t)) (:close - (cond (arg1 ; We got us an abort on our hands. + ;; Drop input buffers + (setf (ansi-stream-in-index fd-stream) +ansi-stream-in-buffer-length+ + (ansi-stream-cin-buffer fd-stream) nil + (ansi-stream-in-buffer fd-stream) nil) + (cond (arg1 + ;; We got us an abort on our hands. (let ((outputp (fd-stream-obuf fd-stream)) (file (fd-stream-file fd-stream)) (orig (fd-stream-original fd-stream))) @@ -2310,6 +2242,7 @@ :buffering buffering :dual-channel-p dual-channel-p :external-format external-format + :bivalent-p (eq element-type :default) :char-size (external-format-char-size external-format) :timeout (if timeout @@ -2387,13 +2320,17 @@ (:io (values t t sb!unix:o_rdwr)) (:probe (values t nil sb!unix:o_rdonly))) (declare (type index mask)) - (let* ((pathname (merge-pathnames filename)) - (namestring - (cond ((unix-namestring pathname input)) - ((and input (eq if-does-not-exist :create)) - (unix-namestring pathname nil)) - ((and (eq direction :io) (not if-does-not-exist-given)) - (unix-namestring pathname nil))))) + (let* (;; PATHNAME is the pathname we associate with the stream. + (pathname (merge-pathnames filename)) + (physical (physicalize-pathname pathname)) + (truename (probe-file physical)) + ;; NAMESTRING is the native namestring we open the file with. + (namestring (cond (truename + (native-namestring truename :as-file t)) + ((or (not input) + (and input (eq if-does-not-exist :create)) + (and (eq direction :io) (not if-does-not-exist-given))) + (native-namestring physical :as-file t))))) ;; Process if-exists argument if we are doing any output. (cond (output (unless if-exists-given @@ -2458,7 +2395,7 @@ (when (and output (= (logand orig-mode #o170000) #o40000)) (error 'simple-file-error - :pathname namestring + :pathname pathname :format-control "can't open ~S for output: is a directory" :format-arguments (list namestring))) @@ -2611,7 +2548,8 @@ (cond (new-name (setf (fd-stream-pathname stream) new-name) (setf (fd-stream-file stream) - (unix-namestring new-name nil)) + (native-namestring (physicalize-pathname new-name) + :as-file t)) t) (t (fd-stream-pathname stream)))))