X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ffd-stream.lisp;h=ed1140d0bef573a5cc675f0cb0b7b75d7b00ac9a;hb=fd324a9d981355d8bc10d2bd469cb54c4c9108fd;hp=0ef85b1ff67a2d209316d788fa15ed764552d603;hpb=bb8e5ebfbc575236a3061f0a2d38ad616d246a87;p=sbcl.git diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 0ef85b1..ed1140d 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -77,8 +77,9 @@ ;; sources where input and output aren't related). non-NIL means ;; don't clear input buffer. (dual-channel-p nil) - ;; character position (if known) - (char-pos nil :type (or index null)) + ;; character position if known -- this may run into bignums, but + ;; we probably should flip it into null then for efficiency's sake... + (char-pos nil :type (or unsigned-byte null)) ;; T if input is waiting on FD. :EOF if we hit EOF. (listen nil :type (member nil t :eof)) @@ -97,8 +98,8 @@ ;; output flushed, but not written due to non-blocking io? (output-later nil) (handler nil) - ;; timeout specified for this stream, or NIL if none - (timeout nil :type (or index null)) + ;; timeout specified for this stream as seconds or NIL if none + (timeout nil :type (or single-float null)) ;; pathname of the file this stream is opened to (returned by PATHNAME) (pathname nil :type (or pathname null)) (external-format :default) @@ -140,6 +141,16 @@ :stream stream :code code)) +(defun c-string-encoding-error (external-format code) + (error 'c-string-encoding-error + :external-format external-format + :code code)) + +(defun c-string-decoding-error (external-format octets) + (error 'c-string-decoding-error + :external-format external-format + :octets octets)) + ;;; Returning true goes into end of file handling, false will enter another ;;; round of input buffer filling followed by re-entering character decode. (defun stream-decoding-error-and-handle (stream octet-count) @@ -169,6 +180,16 @@ (format stream "~@")) (throw 'output-nothing nil)))) +(defun external-format-encoding-error (stream code) + (if (streamp stream) + (stream-encoding-error-and-handle stream code) + (c-string-encoding-error stream code))) + +(defun external-format-decoding-error (stream octet-count) + (if (streamp stream) + (stream-decoding-error stream octet-count) + (c-string-decoding-error stream octet-count))) + ;;; This is called by the server when we can write to the given file ;;; descriptor. Attempt to write the data again. If it worked, remove ;;; the data from the OUTPUT-LATER list. If it didn't work, something @@ -187,6 +208,9 @@ start length) (cond ((not count) + #!+win32 + (simple-stream-perror "couldn't write to ~S" stream errno) + #!-win32 (if (= errno sb!unix:ewouldblock) (error "Write would have blocked, but SERVER told us to go.") (simple-stream-perror "couldn't write to ~S" stream errno))) @@ -200,7 +224,7 @@ end) (fd-stream-output-later stream)))))) (unless (fd-stream-output-later stream) - (sb!sys:remove-fd-handler (fd-stream-handler stream)) + (remove-fd-handler (fd-stream-handler stream)) (setf (fd-stream-handler stream) nil))) ;;; Arange to output the string when we can write on the file descriptor. @@ -209,7 +233,7 @@ (setf (fd-stream-output-later stream) (list (list base start end reuse-sap))) (setf (fd-stream-handler stream) - (sb!sys:add-fd-handler (fd-stream-fd stream) + (add-fd-handler (fd-stream-fd stream) :output (lambda (fd) (declare (ignore fd)) @@ -230,19 +254,19 @@ (type (or system-area-pointer (simple-array * (*))) base) (type index start end)) (if (not (null (fd-stream-output-later stream))) ; something buffered. - (progn - (output-later stream base start end reuse-sap) - ;; ### check to see whether any of this noise can be output - ) + (output-later stream base start end reuse-sap) + ;; ### check to see whether any of this noise can be output (let ((length (- end start))) (multiple-value-bind (count errno) (sb!unix:unix-write (fd-stream-fd stream) base start length) (cond ((not count) + #!+win32 + (simple-stream-perror "Couldn't write to ~S" stream errno) + #!-win32 (if (= errno sb!unix:ewouldblock) (output-later stream base start end reuse-sap) - (simple-stream-perror "couldn't write to ~S" - stream - errno))) + (simple-stream-perror "Couldn't write to ~S" + stream errno))) ((not (eql count length)) (output-later stream base (the index (+ start count)) end reuse-sap))))))) @@ -254,6 +278,10 @@ (frob-output stream (fd-stream-obuf-sap stream) 0 length t) (setf (fd-stream-obuf-tail stream) 0)))) +(defun fd-stream-output-finished-p (stream) + (and (zerop (fd-stream-obuf-tail stream)) + (not (fd-stream-output-later stream)))) + (defmacro output-wrapper/variable-width ((stream size buffering restart) &body body) (let ((stream-var (gensym))) @@ -325,6 +353,7 @@ (intern (format nil name-fmt (string (car buffering)))))) `(progn (defun ,function (stream byte) + (declare (ignorable byte)) (output-wrapper/variable-width (stream ,size ,buffering ,restart) ,@body)) (setf *output-routines* @@ -429,6 +458,24 @@ (fd-stream-obuf-tail stream)) byte)) +#+#.(cl:if (cl:= sb!vm:n-word-bits 64) '(and) '(or)) +(progn + (def-output-routines ("OUTPUT-UNSIGNED-LONG-LONG-~A-BUFFERED" + 8 + nil + (:none (unsigned-byte 64)) + (:full (unsigned-byte 64))) + (setf (sap-ref-64 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream)) + byte)) + (def-output-routines ("OUTPUT-SIGNED-LONG-LONG-~A-BUFFERED" + 8 + nil + (:none (signed-byte 64)) + (:full (signed-byte 64))) + (setf (signed-sap-ref-64 (fd-stream-obuf-sap stream) + (fd-stream-obuf-tail stream)) + byte))) + ;;; Do the actual output. If there is space to buffer the string, ;;; buffer it. If the string would normally fit in the buffer, but ;;; doesn't because of other stuff in the buffer, flush the old noise @@ -502,15 +549,20 @@ (end (or end (length (the vector thing))))) (declare (fixnum start end)) (if (stringp thing) - (let ((last-newline (and (find #\newline (the simple-string thing) - :start start :end end) - ;; FIXME why do we need both calls? - ;; Is find faster forwards than - ;; position is backwards? - (position #\newline (the simple-string thing) - :from-end t - :start start - :end end)))) + (let ((last-newline + (string-dispatch (simple-base-string + #!+sb-unicode + (simple-array character) + string) + thing + (and (find #\newline thing :start start :end end) + ;; FIXME why do we need both calls? + ;; Is find faster forwards than + ;; position is backwards? + (position #\newline thing + :from-end t + :start start + :end end))))) (if (and (typep thing 'base-string) (eq (fd-stream-external-format stream) :latin-1)) (ecase (fd-stream-buffering stream) @@ -546,13 +598,22 @@ element-type, string input function name, character input function name, and string output function name.") +(defun get-external-format (external-format) + (dolist (entry *external-formats*) + (when (member external-format (first entry)) + (return entry)))) + +(defun get-external-format-function (external-format index) + (let ((entry (get-external-format external-format))) + (when entry (nth index entry)))) + ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the ;;; number of bytes per element. (defun pick-output-routine (type buffering &optional external-format) (when (subtypep type 'character) - (dolist (entry *external-formats*) - (when (member external-format (first entry)) + (let ((entry (get-external-format external-format))) + (when entry (return-from pick-output-routine (values (symbol-function (nth (ecase buffering (:none 4) @@ -630,72 +691,107 @@ ;;; per element. (defvar *input-routines* ()) -;;; Fill the input buffer, and return the number of bytes read. Throw -;;; to EOF-INPUT-CATCHER if the eof was reached. Drop into -;;; SYSTEM:SERVER if necessary. +;;; Return whether a primitive partial read operation on STREAM's FD +;;; would (probably) block. Signal a `simple-stream-error' if the +;;; system call implementing this operation fails. +;;; +;;; It is "may" instead of "would" because "would" is not quite +;;; correct on win32. However, none of the places that use it require +;;; further assurance than "may" versus "will definitely not". +(defun sysread-may-block-p (stream) + #+win32 + ;; This answers T at EOF on win32, I think. + (not (sb!win32:fd-listen (fd-stream-fd stream))) + #-win32 + (sb!unix:with-restarted-syscall (count errno) + (sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set))) + (sb!unix:fd-zero read-fds) + (sb!unix:fd-set (fd-stream-fd stream) read-fds) + (sb!unix:unix-fast-select (1+ (fd-stream-fd stream)) + (sb!alien:addr read-fds) + nil nil 0 0)) + (case count + ((1) nil) + ((0) t) + (otherwise + (simple-stream-perror "couldn't check whether ~S is readable" + stream + errno))))) + +;;; If the read would block wait (using SERVE-EVENT) till input is available, +;;; then fill the input buffer, and return the number of bytes read. Throws +;;; to EOF-INPUT-CATCHER if the eof was reached. (defun refill-buffer/fd (stream) (let ((fd (fd-stream-fd stream)) - (ibuf-sap (fd-stream-ibuf-sap stream)) - (buflen (fd-stream-ibuf-length stream)) - (head (fd-stream-ibuf-head stream)) - (tail (fd-stream-ibuf-tail stream))) - (declare (type index head tail)) - (unless (zerop head) - (cond ((eql head tail) - (setf head 0) - (setf tail 0) - (setf (fd-stream-ibuf-head stream) 0) - (setf (fd-stream-ibuf-tail stream) 0)) - (t - (decf tail head) - (system-area-ub8-copy ibuf-sap head - ibuf-sap 0 tail) - (setf head 0) - (setf (fd-stream-ibuf-head stream) 0) - (setf (fd-stream-ibuf-tail stream) tail)))) - (setf (fd-stream-listen stream) nil) - (sb!unix:with-restarted-syscall (count errno) - ;; FIXME: Judging from compiler warnings, this WITH-ALIEN form expands - ;; into something which uses the not-yet-defined type - ;; (SB!ALIEN-INTERNALS:ALIEN (* (SB!ALIEN:STRUCT SB!UNIX:FD-SET))). - ;; This is probably inefficient and unsafe and generally bad, so - ;; try to find some way to make that type known before - ;; this is compiled. - (sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set))) - (sb!unix:fd-zero read-fds) - (sb!unix:fd-set fd read-fds) - (sb!unix:unix-fast-select (1+ fd) - (sb!alien:addr read-fds) - nil nil 0 0)) - (case count - (1) - (0 - (unless (sb!sys:wait-until-fd-usable - fd :input (fd-stream-timeout stream)) - (error 'io-timeout :stream stream :direction :read))) - (t - (simple-stream-perror "couldn't check whether ~S is readable" - stream - errno)))) - (multiple-value-bind (count errno) - (sb!unix:unix-read fd - (sb!sys:int-sap (+ (sb!sys:sap-int ibuf-sap) tail)) - (- buflen tail)) - (cond ((null count) - (if (eql errno sb!unix:ewouldblock) - (progn - (unless (sb!sys:wait-until-fd-usable - fd :input (fd-stream-timeout stream)) - (error 'io-timeout :stream stream :direction :read)) - (refill-buffer/fd stream)) - (simple-stream-perror "couldn't read from ~S" stream errno))) - ((zerop count) - (setf (fd-stream-listen stream) :eof) - (/show0 "THROWing EOF-INPUT-CATCHER") - (throw 'eof-input-catcher nil)) - (t - (incf (fd-stream-ibuf-tail stream) count) - count))))) + (errno 0) + (count 0)) + (tagbody + ;; Check for blocking input before touching the stream, as if + ;; we happen to wait we are liable to be interrupted, and the + ;; interrupt handler may use the same stream. + (if (sysread-may-block-p stream) + (go :wait-for-input) + (go :main)) + ;; These (:CLOSED-FLAME and :READ-ERROR) tags are here so what + ;; we can signal errors outside the WITHOUT-INTERRUPTS. + :closed-flame + (closed-flame stream) + :read-error + (simple-stream-perror "couldn't read from ~S" stream errno) + :wait-for-input + ;; This tag is here so we can unwind outside the WITHOUT-INTERRUPTS + ;; to wait for input if read tells us EWOULDBLOCK. + (unless (wait-until-fd-usable fd :input (fd-stream-timeout stream)) + (signal-timeout 'io-timeout :stream stream :direction :read + :seconds (fd-stream-timeout stream))) + :main + ;; Since the read should not block, we'll disable the + ;; interrupts here, so that we don't accidentally unwind and + ;; leave the stream in an inconsistent state. + (without-interrupts + (let ((ibuf-sap (fd-stream-ibuf-sap stream)) + (buflen (fd-stream-ibuf-length stream)) + (head (fd-stream-ibuf-head stream)) + (tail (fd-stream-ibuf-tail stream))) + (declare (type index head tail)) + ;; Check the SAP: if it is null, then someone has closed + ;; the stream from underneath us. This is not ment to fix + ;; multithreaded races, but to deal with interrupt handlers + ;; closing the stream. + (unless ibuf-sap + (go :closed-flame)) + (unless (zerop head) + (cond ((eql head tail) + (setf head 0 + tail 0 + (fd-stream-ibuf-head stream) 0 + (fd-stream-ibuf-tail stream) 0)) + (t + (decf tail head) + (system-area-ub8-copy ibuf-sap head + ibuf-sap 0 tail) + (setf head 0 + (fd-stream-ibuf-head stream) 0 + (fd-stream-ibuf-tail stream) tail)))) + (setf (fd-stream-listen stream) nil) + (setf (values count errno) + (sb!unix:unix-read fd (int-sap (+ (sap-int ibuf-sap) tail)) + (- buflen tail))) + (cond ((null count) + #!+win32 + (go :read-error) + #!-win32 + (if (eql errno sb!unix:ewouldblock) + (go :wait-for-input) + (go :read-error))) + ((zerop count) + (setf (fd-stream-listen stream) :eof) + (/show0 "THROWing EOF-INPUT-CATCHER") + (throw 'eof-input-catcher nil)) + (t + ;; Success! + (incf (fd-stream-ibuf-tail stream) count)))))) + count)) ;;; Make sure there are at least BYTES number of bytes in the input ;;; buffer. Keep calling REFILL-BUFFER/FD until that condition is met. @@ -736,6 +832,7 @@ ,stream-var) (fd-stream-ibuf-head ,stream-var)))) + (declare (ignorable byte)) (setq size ,bytes) (input-at-least ,stream-var size) (setq ,element-var (locally ,@read-forms)) @@ -840,7 +937,14 @@ ((signed-byte 32) 4 sap head) (signed-sap-ref-32 sap head)) - +#+#.(cl:if (cl:= sb!vm:n-word-bits 64) '(and) '(or)) +(progn + (def-input-routine input-unsigned-64bit-byte + ((unsigned-byte 64) 8 sap head) + (sap-ref-64 sap head)) + (def-input-routine input-signed-64bit-byte + ((signed-byte 64) 8 sap head) + (signed-sap-ref-64 sap head))) ;;; Find an input routine to use given the type. Return as multiple ;;; values the routine, the real type transfered, and the number of @@ -971,6 +1075,30 @@ (return-from fd-stream-resync (funcall (symbol-function (eighth entry)) stream))))) +(defun get-fd-stream-character-sizer (stream) + (dolist (entry *external-formats*) + (when (member (fd-stream-external-format stream) (first entry)) + (return-from get-fd-stream-character-sizer (ninth entry))))) + +(defun fd-stream-character-size (stream char) + (let ((sizer (get-fd-stream-character-sizer stream))) + (when sizer (funcall sizer char)))) + +(defun fd-stream-string-size (stream string) + (let ((sizer (get-fd-stream-character-sizer stream))) + (when sizer + (loop for char across string summing (funcall sizer char))))) + +(defun find-external-format (external-format) + (when external-format + (find external-format *external-formats* :test #'member :key #'car))) + +(defun variable-width-external-format-p (ef-entry) + (when (eighth ef-entry) t)) + +(defun bytes-for-char-fun (ef-entry) + (if ef-entry (symbol-function (ninth ef-entry)) (constantly 1))) + ;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp (defmacro define-external-format (external-format size output-restart out-expr in-expr) @@ -978,8 +1106,15 @@ (out-function (symbolicate "OUTPUT-BYTES/" name)) (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name))) (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name)) - (in-char-function (symbolicate "INPUT-CHAR/" name))) + (in-char-function (symbolicate "INPUT-CHAR/" name)) + (size-function (symbolicate "BYTES-FOR-CHAR/" name)) + (read-c-string-function (symbolicate "READ-FROM-C-STRING/" name)) + (output-c-string-function (symbolicate "OUTPUT-TO-C-STRING/" name)) + (n-buffer (gensym "BUFFER"))) `(progn + (defun ,size-function (byte) + (declare (ignore byte)) + ,size) (defun ,out-function (stream string flush-p start end) (let ((start (or start 0)) (end (or end (length string)))) @@ -988,26 +1123,38 @@ (> (fd-stream-ibuf-tail stream) (fd-stream-ibuf-head stream))) (file-position stream (file-position stream))) - (when (< end start) - (error ":END before :START!")) + (unless (<= 0 start end (length string)) + (signal-bounding-indices-bad-error string start end)) (do () ((= end start)) (setf (fd-stream-obuf-tail stream) - (do* ((len (fd-stream-obuf-length stream)) - (sap (fd-stream-obuf-sap stream)) - (tail (fd-stream-obuf-tail stream))) - ((or (= start end) (< (- len tail) 4)) tail) - ,(if output-restart - `(catch 'output-nothing - (let* ((byte (aref string start)) - (bits (char-code byte))) - ,out-expr - (incf tail ,size))) - `(let* ((byte (aref string start)) - (bits (char-code byte))) - ,out-expr - (incf tail ,size))) - (incf start))) + (string-dispatch (simple-base-string + #!+sb-unicode + (simple-array character) + string) + string + (let ((len (fd-stream-obuf-length stream)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + (declare (type index tail) + ;; STRING bounds have already been checked. + (optimize (safety 0))) + (loop + (,@(if output-restart + `(catch 'output-nothing) + `(progn)) + (do* () + ((or (= start end) (< (- len tail) 4))) + (let* ((byte (aref string start)) + (bits (char-code byte))) + ,out-expr + (incf tail ,size) + (incf start))) + ;; Exited from the loop normally + (return tail)) + ;; Exited via CATCH. Skip the current character + ;; and try the inner loop again. + (incf start))))) (when (< start end) (flush-output-buffer stream))) (when flush-p @@ -1026,52 +1173,109 @@ (tail (fd-stream-obuf-tail stream))) ,out-expr)) (defun ,in-function (stream buffer start requested eof-error-p - &aux (total-copied 0)) - (declare (type fd-stream stream)) - (declare (type index start requested total-copied)) - (declare (type (simple-array character (#.+ansi-stream-in-buffer-length+)) buffer)) + &aux (index start) (end (+ start requested))) + (declare (type fd-stream stream) + (type index start requested index end) + (type + (simple-array character (#.+ansi-stream-in-buffer-length+)) + buffer)) (let ((unread (fd-stream-unread stream))) (when unread - (setf (aref buffer start) unread) + (setf (aref buffer index) unread) (setf (fd-stream-unread stream) nil) (setf (fd-stream-listen stream) nil) - (incf total-copied))) + (incf index))) (do () (nil) (let* ((head (fd-stream-ibuf-head stream)) (tail (fd-stream-ibuf-tail stream)) (sap (fd-stream-ibuf-sap stream))) - (declare (type index head tail)) + (declare (type index head tail) + (type system-area-pointer sap)) ;; Copy data from stream buffer into user's buffer. - (do () - ((or (= tail head) (= requested total-copied))) + (dotimes (i (min (truncate (- tail head) ,size) + (- end index))) + (declare (optimize speed)) (let* ((byte (sap-ref-8 sap head))) - (when (> ,size (- tail head)) - (return)) - (setf (aref buffer (+ start total-copied)) ,in-expr) - (incf total-copied) + (setf (aref buffer index) ,in-expr) + (incf index) (incf head ,size))) (setf (fd-stream-ibuf-head stream) head) ;; Maybe we need to refill the stream buffer. - (cond ( ;; If there were enough data in the stream buffer, we're done. - (= total-copied requested) - (return total-copied)) + (cond ( ;; If there was enough data in the stream buffer, we're done. + (= index end) + (return (- index start))) ( ;; If EOF, we're done in another way. (null (catch 'eof-input-catcher (refill-buffer/fd stream))) (if eof-error-p (error 'end-of-file :stream stream) - (return total-copied))) + (return (- index start)))) ;; Otherwise we refilled the stream buffer, so fall ;; through into another pass of the loop. )))) (def-input-routine ,in-char-function (character ,size sap head) (let ((byte (sap-ref-8 sap head))) ,in-expr)) + (defun ,read-c-string-function (sap element-type) + (declare (type system-area-pointer sap) + (type (member character base-char) element-type)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((stream ,name) + (length + (loop for head of-type index upfrom 0 by ,size + for count of-type index upto (1- array-dimension-limit) + for byte = (sap-ref-8 sap head) + for char of-type character = ,in-expr + until (zerop (char-code char)) + finally (return count))) + ;; Inline the common cases + (string (make-string length :element-type element-type))) + (declare (ignorable stream) + (type index length) + (type simple-string string)) + (/show0 before-copy-loop) + (loop for head of-type index upfrom 0 by ,size + for index of-type index below length + for byte = (sap-ref-8 sap head) + for char of-type character = ,in-expr + do (setf (aref string index) char)) + string))) ;; last loop rewrite to dotimes? + (defun ,output-c-string-function (string) + (declare (type simple-string string)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((length (length string)) + (,n-buffer (make-array (* (1+ length) ,size) + :element-type '(unsigned-byte 8))) + ;; This SAP-taking may seem unsafe without pinning, + ;; but since the variable name is a gensym OUT-EXPR + ;; cannot close over it even if it tried, so the buffer + ;; will always be either in a register or on stack. + ;; FIXME: But ...this is true on x86oids only! + (sap (vector-sap ,n-buffer)) + (tail 0) + (stream ,name)) + (declare (type index length tail) + (type system-area-pointer sap)) + (dotimes (i length) + (let* ((byte (aref string i)) + (bits (char-code byte))) + (declare (ignorable byte bits)) + ,out-expr) + (incf tail ,size)) + (let* ((bits 0) + (byte (code-char bits))) + (declare (ignorable bits byte)) + ,out-expr) + ,n-buffer))) (setf *external-formats* (cons '(,external-format ,in-function ,in-char-function ,out-function ,@(mapcar #'(lambda (buffering) (intern (format nil format (string buffering)))) - '(:none :line :full))) + '(:none :line :full)) + nil ; no resync-function + ,size-function ,read-c-string-function ,output-c-string-function) *external-formats*))))) (defmacro define-external-format/variable-width @@ -1082,8 +1286,15 @@ (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name))) (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name)) (in-char-function (symbolicate "INPUT-CHAR/" name)) - (resync-function (symbolicate "RESYNC/" name))) + (resync-function (symbolicate "RESYNC/" name)) + (size-function (symbolicate "BYTES-FOR-CHAR/" name)) + (read-c-string-function (symbolicate "READ-FROM-C-STRING/" name)) + (output-c-string-function (symbolicate "OUTPUT-TO-C-STRING/" name)) + (n-buffer (gensym "BUFFER"))) `(progn + (defun ,size-function (byte) + (declare (ignorable byte)) + ,out-size-expr) (defun ,out-function (stream string flush-p start end) (let ((start (or start 0)) (end (or end (length string)))) @@ -1092,28 +1303,39 @@ (> (fd-stream-ibuf-tail stream) (fd-stream-ibuf-head stream))) (file-position stream (file-position stream))) - (when (< end start) - (error ":END before :START!")) + (unless (<= 0 start end (length string)) + (signal-bounding-indices-bad-error string start end)) (do () ((= end start)) (setf (fd-stream-obuf-tail stream) - (do* ((len (fd-stream-obuf-length stream)) - (sap (fd-stream-obuf-sap stream)) - (tail (fd-stream-obuf-tail stream))) - ((or (= start end) (< (- len tail) 4)) tail) - ,(if output-restart - `(catch 'output-nothing - (let* ((byte (aref string start)) - (bits (char-code byte)) - (size ,out-size-expr)) - ,out-expr - (incf tail size))) - `(let* ((byte (aref string start)) - (bits (char-code byte)) - (size ,out-size-expr)) - ,out-expr - (incf tail size))) - (incf start))) + (string-dispatch (simple-base-string + #!+sb-unicode + (simple-array character) + string) + string + (let ((len (fd-stream-obuf-length stream)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + (declare (type index tail) + ;; STRING bounds have already been checked. + (optimize (safety 0))) + (loop + (,@(if output-restart + `(catch 'output-nothing) + `(progn)) + (do* () + ((or (= start end) (< (- len tail) 4))) + (let* ((byte (aref string start)) + (bits (char-code byte)) + (size ,out-size-expr)) + ,out-expr + (incf tail size) + (incf start))) + ;; Exited from the loop normally + (return tail)) + ;; Exited via CATCH. Skip the current character + ;; and try the inner loop again. + (incf start))))) (when (< start end) (flush-output-buffer stream))) (when flush-p @@ -1134,9 +1356,11 @@ ,out-expr)) (defun ,in-function (stream buffer start requested eof-error-p &aux (total-copied 0)) - (declare (type fd-stream stream)) - (declare (type index start requested total-copied)) - (declare (type (simple-array character (#.+ansi-stream-in-buffer-length+)) buffer)) + (declare (type fd-stream stream) + (type index start requested total-copied) + (type + (simple-array character (#.+ansi-stream-in-buffer-length+)) + buffer)) (let ((unread (fd-stream-unread stream))) (when unread (setf (aref buffer start) unread) @@ -1148,7 +1372,6 @@ (let* ((head (fd-stream-ibuf-head stream)) (tail (fd-stream-ibuf-tail stream)) (sap (fd-stream-ibuf-sap stream)) - (head-start head) (decode-break-reason nil)) (declare (type index head tail)) ;; Copy data from stream buffer into user's buffer. @@ -1157,6 +1380,7 @@ (setf decode-break-reason (block decode-break-reason (let ((byte (sap-ref-8 sap head))) + (declare (ignorable byte)) (setq size ,in-size-expr) (when (> size (- tail head)) (return)) @@ -1165,17 +1389,22 @@ (incf head size)) nil)) (setf (fd-stream-ibuf-head stream) head) - (when (and decode-break-reason - (= head head-start)) + (when decode-break-reason + ;; If we've already read some characters on when the invalid + ;; code sequence is detected, we return immediately. The + ;; handling of the error is deferred until the next call + ;; (where this check will be false). This allows establishing + ;; high-level handlers for decode errors (for example + ;; automatically resyncing in Lisp comments). + (when (plusp total-copied) + (return-from ,in-function total-copied)) (when (stream-decoding-error-and-handle stream decode-break-reason) (if eof-error-p (error 'end-of-file :stream stream) (return-from ,in-function total-copied))) (setf head (fd-stream-ibuf-head stream)) - (setf tail (fd-stream-ibuf-tail stream))) - (when (plusp total-copied) - (return-from ,in-function total-copied))) + (setf tail (fd-stream-ibuf-tail stream)))) (setf (fd-stream-ibuf-head stream) head) ;; Maybe we need to refill the stream buffer. (cond ( ;; If there were enough data in the stream buffer, we're done. @@ -1196,24 +1425,113 @@ ,in-size-expr sap head) (let ((byte (sap-ref-8 sap head))) + (declare (ignorable byte)) ,in-expr)) (defun ,resync-function (stream) - (loop (input-at-least stream 1) + (loop (input-at-least stream 2) (incf (fd-stream-ibuf-head stream)) (unless (block decode-break-reason (let* ((sap (fd-stream-ibuf-sap stream)) (head (fd-stream-ibuf-head stream)) (byte (sap-ref-8 sap head)) (size ,in-size-expr)) - ,in-expr) + (declare (ignorable byte)) + (input-at-least stream size) + (let ((sap (fd-stream-ibuf-sap stream)) + (head (fd-stream-ibuf-head stream))) + ,in-expr)) nil) (return)))) + (defun ,read-c-string-function (sap element-type) + (declare (type system-area-pointer sap)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((stream ,name) + (size 0) (head 0) (byte 0) (char nil) + (decode-break-reason nil) + (length (dotimes (count (1- ARRAY-DIMENSION-LIMIT) count) + (setf decode-break-reason + (block decode-break-reason + (setf byte (sap-ref-8 sap head) + size ,in-size-expr + char ,in-expr) + (incf head size) + nil)) + (when decode-break-reason + (c-string-decoding-error ,name decode-break-reason)) + (when (zerop (char-code char)) + (return count)))) + (string (make-string length :element-type element-type))) + (declare (ignorable stream) + (type index head length) ;; size + (type (unsigned-byte 8) byte) + (type (or null character) char) + (type string string)) + (setf head 0) + (dotimes (index length string) + (setf decode-break-reason + (block decode-break-reason + (setf byte (sap-ref-8 sap head) + size ,in-size-expr + char ,in-expr) + (incf head size) + nil)) + (when decode-break-reason + (c-string-decoding-error ,name decode-break-reason)) + (setf (aref string index) char))))) + + (defun ,output-c-string-function (string) + (declare (type simple-string string)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((length (length string)) + (char-length (make-array (1+ length) :element-type 'index)) + (buffer-length + (+ (loop for i of-type index below length + for byte of-type character = (aref string i) + for bits = (char-code byte) + sum (setf (aref char-length i) + (the index ,out-size-expr))) + (let* ((byte (code-char 0)) + (bits (char-code byte))) + (declare (ignorable byte bits)) + (setf (aref char-length length) + (the index ,out-size-expr))))) + (tail 0) + (,n-buffer (make-array buffer-length + :element-type '(unsigned-byte 8))) + ;; This SAP-taking may seem unsafe without pinning, + ;; but since the variable name is a gensym OUT-EXPR + ;; cannot close over it even if it tried, so the buffer + ;; will always be either in a register or on stack. + ;; FIXME: But ...this is true on x86oids only! + (sap (vector-sap ,n-buffer)) + stream) + (declare (type index length buffer-length tail) + (type system-area-pointer sap) + (type null stream) + (ignorable stream)) + (loop for i of-type index below length + for byte of-type character = (aref string i) + for bits = (char-code byte) + for size of-type index = (aref char-length i) + do (prog1 + ,out-expr + (incf tail size))) + (let* ((bits 0) + (byte (code-char bits)) + (size (aref char-length length))) + (declare (ignorable bits byte size)) + ,out-expr) + ,n-buffer))) + (setf *external-formats* (cons '(,external-format ,in-function ,in-char-function ,out-function ,@(mapcar #'(lambda (buffering) (intern (format nil format (string buffering)))) '(:none :line :full)) - ,resync-function) + ,resync-function + ,size-function ,read-c-string-function ,output-c-string-function) *external-formats*))))) ;;; Multiple names for the :ISO{,-}8859-* families are needed because on @@ -1222,7 +1540,7 @@ (define-external-format (:latin-1 :latin1 :iso-8859-1 :iso8859-1) 1 t (if (>= bits 256) - (stream-encoding-error-and-handle stream bits) + (external-format-encoding-error stream bits) (setf (sap-ref-8 sap tail) bits)) (code-char byte)) @@ -1230,7 +1548,7 @@ :iso-646 :iso-646-us :|646|) 1 t (if (>= bits 128) - (stream-encoding-error-and-handle stream bits) + (external-format-encoding-error stream bits) (setf (sap-ref-8 sap tail) bits)) (code-char byte)) @@ -1261,7 +1579,7 @@ (define-external-format (:ebcdic-us :ibm-037 :ibm037) 1 t (if (>= bits 256) - (stream-encoding-error-and-handle stream bits) + (external-format-encoding-error stream bits) (setf (sap-ref-8 sap tail) (aref reverse-table bits))) (aref table byte))) @@ -1292,10 +1610,10 @@ (if (< bits 256) (if (= bits (char-code (aref latin-9-table bits))) bits - (stream-encoding-error-and-handle stream byte)) + (external-format-encoding-error stream byte)) (if (= (aref latin-9-reverse-1 (logand bits 15)) bits) (aref latin-9-reverse-2 (logand bits 15)) - (stream-encoding-error-and-handle stream byte)))) + (external-format-encoding-error stream byte)))) (aref latin-9-table byte))) (define-external-format/variable-width (:utf-8 :utf8) nil @@ -1512,26 +1830,43 @@ (declare (ignore arg2)) (case operation (:listen - (or (not (eql (fd-stream-ibuf-head fd-stream) - (fd-stream-ibuf-tail fd-stream))) - (fd-stream-listen fd-stream) - (setf (fd-stream-listen fd-stream) - (eql (sb!unix:with-restarted-syscall () - (sb!alien:with-alien ((read-fds (sb!alien:struct - sb!unix:fd-set))) - (sb!unix:fd-zero read-fds) - (sb!unix:fd-set (fd-stream-fd fd-stream) read-fds) - (sb!unix:unix-fast-select (1+ (fd-stream-fd fd-stream)) - (sb!alien:addr read-fds) - nil nil 0 0))) - 1)))) + (labels ((do-listen () + (or (not (eql (fd-stream-ibuf-head fd-stream) + (fd-stream-ibuf-tail fd-stream))) + (fd-stream-listen fd-stream) + #!+win32 + (sb!win32:fd-listen (fd-stream-fd fd-stream)) + #!-win32 + ;; If the read can block, LISTEN will certainly return NIL. + (if (sysread-may-block-p fd-stream) + nil + ;; Otherwise select(2) and CL:LISTEN have slightly + ;; different semantics. The former returns that an FD + ;; is readable when a read operation wouldn't block. + ;; That includes EOF. However, LISTEN must return NIL + ;; at EOF. + (progn (catch 'eof-input-catcher + ;; r-b/f too calls select, but it shouldn't + ;; block as long as read can return once w/o + ;; blocking + (refill-buffer/fd fd-stream)) + ;; At this point either IBUF-HEAD != IBUF-TAIL + ;; and FD-STREAM-LISTEN is NIL, in which case + ;; we should return T, or IBUF-HEAD == + ;; IBUF-TAIL and FD-STREAM-LISTEN is :EOF, in + ;; which case we should return :EOF for this + ;; call and all future LISTEN call on this stream. + ;; Call ourselves again to determine which case + ;; applies. + (do-listen)))))) + (do-listen))) (:unread (setf (fd-stream-unread fd-stream) arg1) (setf (fd-stream-listen fd-stream) t)) (:close - (cond (arg1 ; We got us an abort on our hands. + (cond (arg1 ; We got us an abort on our hands. (when (fd-stream-handler fd-stream) - (sb!sys:remove-fd-handler (fd-stream-handler fd-stream)) + (remove-fd-handler (fd-stream-handler fd-stream)) (setf (fd-stream-handler fd-stream) nil)) ;; We can't do anything unless we know what file were ;; dealing with, and we don't want to do anything @@ -1604,29 +1939,22 @@ (setf (fd-stream-unread fd-stream) nil) (setf (fd-stream-ibuf-head fd-stream) 0) (setf (fd-stream-ibuf-tail fd-stream) 0) + #!+win32 + (progn + (sb!win32:fd-clear-input (fd-stream-fd fd-stream)) + (setf (fd-stream-listen fd-stream) nil)) + #!-win32 (catch 'eof-input-catcher - (loop - (let ((count (sb!unix:with-restarted-syscall () - (sb!alien:with-alien ((read-fds (sb!alien:struct - sb!unix:fd-set))) - (sb!unix:fd-zero read-fds) - (sb!unix:fd-set (fd-stream-fd fd-stream) read-fds) - (sb!unix:unix-fast-select (1+ (fd-stream-fd fd-stream)) - (sb!alien:addr read-fds) - nil nil 0 0))))) - (cond ((eql count 1) - (refill-buffer/fd fd-stream) - (setf (fd-stream-ibuf-head fd-stream) 0) - (setf (fd-stream-ibuf-tail fd-stream) 0)) - (t - (return t))))))) + (loop until (sysread-may-block-p fd-stream) + do + (refill-buffer/fd fd-stream) + (setf (fd-stream-ibuf-head fd-stream) 0) + (setf (fd-stream-ibuf-tail fd-stream) 0)) + t)) (:force-output (flush-output-buffer fd-stream)) (:finish-output - (flush-output-buffer fd-stream) - (do () - ((null (fd-stream-output-later fd-stream))) - (sb!sys:serve-all-events))) + (finish-fd-stream-output fd-stream)) (:element-type (fd-stream-element-type fd-stream)) (:external-format @@ -1650,7 +1978,7 @@ :format-control "~S is not a stream associated with a file." :format-arguments (list fd-stream))) (multiple-value-bind (okay dev ino mode nlink uid gid rdev size - atime mtime ctime blksize blocks) + atime mtime ctime blksize blocks) (sb!unix:unix-fstat (fd-stream-fd fd-stream)) (declare (ignore ino nlink uid gid rdev atime mtime ctime blksize blocks)) @@ -1659,88 +1987,114 @@ (if (zerop mode) nil (truncate size (fd-stream-element-size fd-stream))))) - ;; FIXME: I doubt this is correct in the presence of Unicode, - ;; since fd-stream FILE-POSITION is measured in bytes. (:file-string-length (etypecase arg1 - (character 1) - (string (length arg1)))) + (character (fd-stream-character-size fd-stream arg1)) + (string (fd-stream-string-size fd-stream arg1)))) (:file-position - (fd-stream-file-position fd-stream arg1)))) + (if arg1 + (fd-stream-set-file-position fd-stream arg1) + (fd-stream-get-file-position fd-stream))))) + +;; FIXME: Think about this. +;; +;; (defun finish-fd-stream-output (fd-stream) +;; (let ((timeout (fd-stream-timeout fd-stream))) +;; (loop while (fd-stream-output-later fd-stream) +;; ;; FIXME: SIGINT while waiting for a timeout will +;; ;; cause a timeout here. +;; do (when (and (not (serve-event timeout)) timeout) +;; (signal-timeout 'io-timeout +;; :stream fd-stream +;; :direction :write +;; :seconds timeout))))) + +(defun finish-fd-stream-output (stream) + (flush-output-buffer stream) + (do () + ((null (fd-stream-output-later stream))) + (serve-all-events))) + +(defun fd-stream-get-file-position (stream) + (declare (fd-stream stream)) + (without-interrupts + (let ((posn (sb!unix:unix-lseek (fd-stream-fd stream) 0 sb!unix:l_incr))) + (declare (type (or (alien sb!unix:off-t) null) posn)) + ;; We used to return NIL for errno==ESPIPE, and signal an error + ;; in other failure cases. However, CLHS says to return NIL if + ;; the position cannot be determined -- so that's what we do. + (when (integerp posn) + ;; Adjust for buffered output: If there is any output + ;; buffered, the *real* file position will be larger + ;; than reported by lseek() because lseek() obviously + ;; cannot take into account output we have not sent + ;; yet. + (dolist (later (fd-stream-output-later stream)) + (incf posn (- (caddr later) (cadr later)))) + (incf posn (fd-stream-obuf-tail stream)) + ;; Adjust for unread input: If there is any input + ;; read from UNIX but not supplied to the user of the + ;; stream, the *real* file position will smaller than + ;; reported, because we want to look like the unread + ;; stuff is still available. + (decf posn (- (fd-stream-ibuf-tail stream) + (fd-stream-ibuf-head stream))) + (when (fd-stream-unread stream) + (decf posn)) + ;; Divide bytes by element size. + (truncate posn (fd-stream-element-size stream)))))) + +(defun fd-stream-set-file-position (stream position-spec) + (declare (fd-stream stream)) + (check-type position-spec + (or (alien sb!unix:off-t) (member nil :start :end)) + "valid file position designator") + (tagbody + :again + ;; Make sure we don't have any output pending, because if we + ;; move the file pointer before writing this stuff, it will be + ;; written in the wrong location. + (finish-fd-stream-output stream) + ;; Disable interrupts so that interrupt handlers doing output + ;; won't screw us. + (without-interrupts + (unless (fd-stream-output-finished-p stream) + ;; We got interrupted and more output came our way during + ;; the interrupt. Wrapping the FINISH-FD-STREAM-OUTPUT in + ;; WITHOUT-INTERRUPTS gets nasty as it can signal errors, + ;; so we prefer to do things like this... + (go :again)) + ;; Clear out any pending input to force the next read to go to + ;; the disk. + (setf (fd-stream-unread stream) nil + (fd-stream-ibuf-head stream) 0 + (fd-stream-ibuf-tail stream) 0) + ;; Trash cached value for listen, so that we check next time. + (setf (fd-stream-listen stream) nil) + ;; Now move it. + (multiple-value-bind (offset origin) + (case position-spec + (:start + (values 0 sb!unix:l_set)) + (:end + (values 0 sb!unix:l_xtnd)) + (t + (values (* position-spec (fd-stream-element-size stream)) + sb!unix:l_set))) + (declare (type (alien sb!unix:off-t) offset)) + (let ((posn (sb!unix:unix-lseek (fd-stream-fd stream) + offset origin))) + ;; CLHS says to return true if the file-position was set + ;; succesfully, and NIL otherwise. We are to signal an error + ;; only if the given position was out of bounds, and that is + ;; dealt with above. In times past we used to return NIL for + ;; errno==ESPIPE, and signal an error in other cases. + ;; + ;; FIXME: We are still liable to signal an error if flushing + ;; output fails. + (return-from fd-stream-set-file-position + (typep posn '(alien sb!unix:off-t)))))))) -(defun fd-stream-file-position (stream &optional newpos) - (declare (type fd-stream stream) - (type (or (alien sb!unix:off-t) (member nil :start :end)) newpos)) - (if (null newpos) - (sb!sys:without-interrupts - ;; First, find the position of the UNIX file descriptor in the file. - (multiple-value-bind (posn errno) - (sb!unix:unix-lseek (fd-stream-fd stream) 0 sb!unix:l_incr) - (declare (type (or (alien sb!unix:off-t) null) posn)) - (cond ((integerp posn) - ;; Adjust for buffered output: If there is any output - ;; buffered, the *real* file position will be larger - ;; than reported by lseek() because lseek() obviously - ;; cannot take into account output we have not sent - ;; yet. - (dolist (later (fd-stream-output-later stream)) - (incf posn (- (caddr later) - (cadr later)))) - (incf posn (fd-stream-obuf-tail stream)) - ;; Adjust for unread input: If there is any input - ;; read from UNIX but not supplied to the user of the - ;; stream, the *real* file position will smaller than - ;; reported, because we want to look like the unread - ;; stuff is still available. - (decf posn (- (fd-stream-ibuf-tail stream) - (fd-stream-ibuf-head stream))) - (when (fd-stream-unread stream) - (decf posn)) - ;; Divide bytes by element size. - (truncate posn (fd-stream-element-size stream))) - ((eq errno sb!unix:espipe) - nil) - (t - (sb!sys:with-interrupts - (simple-stream-perror "failure in Unix lseek() on ~S" - stream - errno)))))) - (let ((offset 0) origin) - (declare (type (alien sb!unix:off-t) offset)) - ;; Make sure we don't have any output pending, because if we - ;; move the file pointer before writing this stuff, it will be - ;; written in the wrong location. - (flush-output-buffer stream) - (do () - ((null (fd-stream-output-later stream))) - (sb!sys:serve-all-events)) - ;; Clear out any pending input to force the next read to go to - ;; the disk. - (setf (fd-stream-unread stream) nil) - (setf (fd-stream-ibuf-head stream) 0) - (setf (fd-stream-ibuf-tail stream) 0) - ;; Trash cached value for listen, so that we check next time. - (setf (fd-stream-listen stream) nil) - ;; Now move it. - (cond ((eq newpos :start) - (setf offset 0 origin sb!unix:l_set)) - ((eq newpos :end) - (setf offset 0 origin sb!unix:l_xtnd)) - ((typep newpos '(alien sb!unix:off-t)) - (setf offset (* newpos (fd-stream-element-size stream)) - origin sb!unix:l_set)) - (t - (error "invalid position given to FILE-POSITION: ~S" newpos))) - (multiple-value-bind (posn errno) - (sb!unix:unix-lseek (fd-stream-fd stream) offset origin) - (cond ((typep posn '(alien sb!unix:off-t)) - t) - ((eq errno sb!unix:espipe) - nil) - (t - (simple-stream-perror "error in Unix lseek() on ~S" - stream - errno))))))) ;;;; creation routines (MAKE-FD-STREAM and OPEN) @@ -1779,7 +2133,7 @@ (format nil "file ~A" file) (format nil "descriptor ~W" fd))) auto-close) - (declare (type index fd) (type (or index null) timeout) + (declare (type index fd) (type (or real null) timeout) (type (member :none :line :full) buffering)) (cond ((not (or input-p output-p)) (setf input t)) @@ -1794,7 +2148,10 @@ :buffering buffering :dual-channel-p dual-channel-p :external-format external-format - :timeout timeout))) + :timeout + (if timeout + (coerce timeout 'single-float) + nil)))) (set-fd-stream-routines stream element-type external-format input output input-buffer-p) (when (and auto-close (fboundp 'finalize)) @@ -1809,8 +2166,8 @@ ;;; Pick a name to use for the backup file for the :IF-EXISTS ;;; :RENAME-AND-DELETE and :RENAME options. (defun pick-backup-name (name) - (declare (type simple-base-string name)) - (concatenate 'simple-base-string name ".bak")) + (declare (type simple-string name)) + (concatenate 'simple-string name ".bak")) ;;; Ensure that the given arg is one of the given list of valid ;;; things. Allow the user to fix any problems. @@ -2042,11 +2399,14 @@ (setf *available-buffers* nil) (with-output-to-string (*error-output*) (setf *stdin* - (make-fd-stream 0 :name "standard input" :input t :buffering :line)) + (make-fd-stream 0 :name "standard input" :input t :buffering :line + #!+win32 :external-format #!+win32 (sb!win32::console-input-codepage))) (setf *stdout* - (make-fd-stream 1 :name "standard output" :output t :buffering :line)) + (make-fd-stream 1 :name "standard output" :output t :buffering :line + #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage))) (setf *stderr* - (make-fd-stream 2 :name "standard error" :output t :buffering :line)) + (make-fd-stream 2 :name "standard error" :output t :buffering :line + #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage))) (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string)) (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666))) (if tty