0.9.16.6: better circularity detection in fasl dumper
[sbcl.git] / src / code / fd-stream.lisp
index 3d6cb0f..ef0c2c4 100644 (file)
                             start
                             length)
       (cond ((not count)
-             (if (= errno sb!unix:ewouldblock)
+             (if #!-win32 (= errno sb!unix:ewouldblock) #!+win32 t #!-win32
                  (error "Write would have blocked, but SERVER told us to go.")
                  (simple-stream-perror "couldn't write to ~S" stream errno)))
             ((eql count length) ; Hot damn, it worked.
         (multiple-value-bind (count errno)
             (sb!unix:unix-write (fd-stream-fd stream) base start length)
           (cond ((not count)
-                 (if (= errno sb!unix:ewouldblock)
+                 (if #!-win32 (= errno sb!unix:ewouldblock) #!+win32 t #!-win32
                      (output-later stream base start end reuse-sap)
                      (simple-stream-perror "couldn't write to ~S"
                                            stream
                      (intern (format nil name-fmt (string (car buffering))))))
                 `(progn
                    (defun ,function (stream byte)
+                     (declare (ignorable byte))
                      (output-wrapper/variable-width (stream ,size ,buffering ,restart)
                        ,@body))
                    (setf *output-routines*
     (declare (fixnum start end))
     (if (stringp thing)
         (let ((last-newline
-               (flet ((do-it (string)
-                        (and (find #\newline string :start start :end end)
-                             ;; FIXME why do we need both calls?
-                             ;; Is find faster forwards than
-                             ;; position is backwards?
-                             (position #\newline string
-                                       :from-end t
-                                       :start start
-                                       :end end))))
-                 (declare (inline do-it))
-                 ;; Specialize the common cases
-                 (etypecase thing
-                   (simple-base-string
-                    (do-it (the simple-base-string thing)))
-                   #!+sb-unicode
-                   ((simple-array character)
-                    (do-it (the (simple-array character) thing)))
-                   (string
-                    (do-it thing))))))
+               (string-dispatch (simple-base-string
+                                 #!+sb-unicode
+                                 (simple-array character)
+                                 string)
+                   thing
+                 (and (find #\newline thing :start start :end end)
+                      ;; FIXME why do we need both calls?
+                      ;; Is find faster forwards than
+                      ;; position is backwards?
+                      (position #\newline thing
+                                :from-end t
+                                :start start
+                                :end end)))))
           (if (and (typep thing 'base-string)
                    (eq (fd-stream-external-format stream) :latin-1))
               (ecase (fd-stream-buffering stream)
 ;;; per element.
 (defvar *input-routines* ())
 
+;;; Return whether a primitive partial read operation on STREAM's FD
+;;; would (probably) block.  Signal a `simple-stream-error' if the
+;;; system call implementing this operation fails.
+;;;
+;;; It is "may" instead of "would" because "would" is not quite
+;;; correct on win32.  However, none of the places that use it require
+;;; further assurance than "may" versus "will definitely not".
+(defun sysread-may-block-p (stream)
+  #+win32
+  ;; This answers T at EOF on win32, I think.
+  (not (sb!win32:fd-listen (fd-stream-fd stream)))
+  #-win32
+  (sb!unix:with-restarted-syscall (count errno)
+    (sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set)))
+      (sb!unix:fd-zero read-fds)
+      (sb!unix:fd-set (fd-stream-fd stream) read-fds)
+      (sb!unix:unix-fast-select (1+ (fd-stream-fd stream))
+                                (sb!alien:addr read-fds)
+                                nil nil 0 0))
+    (case count
+      ((1) nil)
+      ((0) t)
+      (otherwise
+       (simple-stream-perror "couldn't check whether ~S is readable"
+                             stream
+                             errno)))))
+
 ;;; Fill the input buffer, and return the number of bytes read. Throw
 ;;; to EOF-INPUT-CATCHER if the eof was reached. Drop into
 ;;; SYSTEM:SERVER if necessary.
              (setf (fd-stream-ibuf-head stream) 0)
              (setf (fd-stream-ibuf-tail stream) tail))))
     (setf (fd-stream-listen stream) nil)
-    (sb!unix:with-restarted-syscall (count errno)
-      ;; FIXME: Judging from compiler warnings, this WITH-ALIEN form expands
-      ;; into something which uses the not-yet-defined type
-      ;;   (SB!ALIEN-INTERNALS:ALIEN (* (SB!ALIEN:STRUCT SB!UNIX:FD-SET))).
-      ;; This is probably inefficient and unsafe and generally bad, so
-      ;; try to find some way to make that type known before
-      ;; this is compiled.
-      (sb!alien:with-alien ((read-fds (sb!alien:struct sb!unix:fd-set)))
-        (sb!unix:fd-zero read-fds)
-        (sb!unix:fd-set fd read-fds)
-        (sb!unix:unix-fast-select (1+ fd)
-                                  (sb!alien:addr read-fds)
-                                  nil nil 0 0))
-      (case count
-        (1)
-        (0
-         (unless (sb!sys:wait-until-fd-usable
-                  fd :input (fd-stream-timeout stream))
-           (error 'io-timeout :stream stream :direction :read)))
-        (t
-         (simple-stream-perror "couldn't check whether ~S is readable"
-                               stream
-                               errno))))
+    ;;This isn't quite the same on win32.  Then again, neither was
+    ;;(not (sb!win32:fd-listen fd)), as was originally here.  See
+    ;;comment in `sysread-may-block-p'.
+    (when (sysread-may-block-p stream)
+      (unless (sb!sys:wait-until-fd-usable
+               fd :input (fd-stream-timeout stream))
+        (error 'io-timeout :stream stream :direction :read)))
     (multiple-value-bind (count errno)
         (sb!unix:unix-read fd
                            (sb!sys:int-sap (+ (sb!sys:sap-int ibuf-sap) tail))
                            (- buflen tail))
       (cond ((null count)
-             (if (eql errno sb!unix:ewouldblock)
+             (if #!-win32 (eql errno sb!unix:ewouldblock) #!+win32 t #!-win32
                  (progn
                    (unless (sb!sys:wait-until-fd-usable
                             fd :input (fd-stream-timeout stream))
                                                       ,stream-var)
                                                      (fd-stream-ibuf-head
                                                       ,stream-var))))
+                               (declare (ignorable byte))
                                (setq size ,bytes)
                                (input-at-least ,stream-var size)
                                (setq ,element-var (locally ,@read-forms))
       (return-from fd-stream-resync
         (funcall (symbol-function (eighth entry)) stream)))))
 
+(defun get-fd-stream-character-sizer (stream)
+  (dolist (entry *external-formats*)
+    (when (member (fd-stream-external-format stream) (first entry))
+      (return-from get-fd-stream-character-sizer (ninth entry)))))
+
+(defun fd-stream-character-size (stream char)
+  (let ((sizer (get-fd-stream-character-sizer stream)))
+    (when sizer (funcall sizer char))))
+
+(defun fd-stream-string-size (stream string)
+  (let ((sizer (get-fd-stream-character-sizer stream)))
+    (when sizer
+      (loop for char across string summing (funcall sizer char)))))
+
+(defun find-external-format (external-format)
+  (when external-format
+    (find external-format *external-formats* :test #'member :key #'car)))
+
+(defun variable-width-external-format-p (ef-entry)
+  (when (eighth ef-entry) t))
+
+(defun bytes-for-char-fun (ef-entry)
+  (if ef-entry (symbol-function (ninth ef-entry)) (constantly 1)))
+
 ;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp
 (defmacro define-external-format (external-format size output-restart
                                   out-expr in-expr)
          (out-function (symbolicate "OUTPUT-BYTES/" name))
          (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
          (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
-         (in-char-function (symbolicate "INPUT-CHAR/" name)))
+         (in-char-function (symbolicate "INPUT-CHAR/" name))
+         (size-function (symbolicate "BYTES-FOR-CHAR/" name)))
     `(progn
+      (defun ,size-function (byte)
+        (declare (ignore byte))
+        ,size)
       (defun ,out-function (stream string flush-p start end)
         (let ((start (or start 0))
               (end (or end (length string))))
                      (> (fd-stream-ibuf-tail stream)
                         (fd-stream-ibuf-head stream)))
             (file-position stream (file-position stream)))
-          (when (< end start)
-            (error ":END before :START!"))
+          (unless (<= 0 start end (length string))
+            (signal-bounding-indices-bad-error string start end))
           (do ()
               ((= end start))
             (setf (fd-stream-obuf-tail stream)
-                  (flet ((do-it (string)
-                           (do* ((len (fd-stream-obuf-length stream))
-                                 (sap (fd-stream-obuf-sap stream))
-                                 (tail (fd-stream-obuf-tail stream)))
-                                ((or (= start end) (< (- len tail) 4)) tail)
-                             ,(if output-restart
-                                  `(catch 'output-nothing
-                                     (let* ((byte (aref string start))
-                                            (bits (char-code byte)))
-                                       ,out-expr
-                                       (incf tail ,size)))
-                                  `(let* ((byte (aref string start))
-                                          (bits (char-code byte)))
-                                     ,out-expr
-                                     (incf tail ,size)))
-                             (incf start))))
-                    (declare (inline do-it))
-                    ;; Specialized versions for the common cases of
-                    ;; SIMPLE-BASE-STRING and (SIMPLE-ARRAY CHARACTER)
-                    ;; to avoid doing a generic AREF.
-                    (etypecase string
-                      (simple-base-string
-                       (do-it (the simple-base-string string)))
-                      #!+sb-unicode
-                      ((simple-array character)
-                       ;; For some reason the type information from the
-                       ;; etypecase doesn't propagate through here without
-                       ;; an explicit THE.
-                       (do-it (the (simple-array character) string)))
-                      (string
-                       (do-it string)))))
+                  (string-dispatch (simple-base-string
+                                    #!+sb-unicode
+                                    (simple-array character)
+                                    string)
+                      string
+                    (let ((len (fd-stream-obuf-length stream))
+                          (sap (fd-stream-obuf-sap stream))
+                          (tail (fd-stream-obuf-tail stream)))
+                      (declare (type index tail)
+                               ;; STRING bounds have already been checked.
+                               (optimize (safety 0)))
+                      (loop
+                         (,@(if output-restart
+                                `(catch 'output-nothing)
+                                `(progn))
+                            (do* ()
+                                 ((or (= start end) (< (- len tail) 4)))
+                              (let* ((byte (aref string start))
+                                     (bits (char-code byte)))
+                                ,out-expr
+                                (incf tail ,size)
+                                (incf start)))
+                            ;; Exited from the loop normally
+                            (return tail))
+                         ;; Exited via CATCH. Skip the current character
+                         ;; and try the inner loop again.
+                         (incf start)))))
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
        (cons '(,external-format ,in-function ,in-char-function ,out-function
                ,@(mapcar #'(lambda (buffering)
                              (intern (format nil format (string buffering))))
-                         '(:none :line :full)))
+                         '(:none :line :full))
+               nil ; no resync-function
+               ,size-function)
         *external-formats*)))))
 
 (defmacro define-external-format/variable-width
          (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
          (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
          (in-char-function (symbolicate "INPUT-CHAR/" name))
-         (resync-function (symbolicate "RESYNC/" name)))
+         (resync-function (symbolicate "RESYNC/" name))
+         (size-function (symbolicate "BYTES-FOR-CHAR/" name)))
     `(progn
+      (defun ,size-function (byte)
+        (declare (ignorable byte))
+        ,out-size-expr)
       (defun ,out-function (stream string flush-p start end)
         (let ((start (or start 0))
               (end (or end (length string))))
                      (> (fd-stream-ibuf-tail stream)
                         (fd-stream-ibuf-head stream)))
             (file-position stream (file-position stream)))
-          (when (< end start)
-            (error ":END before :START!"))
+          (unless (<= 0 start end (length string))
+            (signal-bounding-indices-bad-error string start end))
           (do ()
               ((= end start))
             (setf (fd-stream-obuf-tail stream)
-                  (flet ((do-it (string)
-                           (do* ((len (fd-stream-obuf-length stream))
-                                 (sap (fd-stream-obuf-sap stream))
-                                 (tail (fd-stream-obuf-tail stream)))
-                                ((or (= start end) (< (- len tail) 4)) tail)
-                             ,(if output-restart
-                                  `(catch 'output-nothing
-                                     (let* ((byte (aref string start))
-                                            (bits (char-code byte))
-                                            (size ,out-size-expr))
-                                       ,out-expr
-                                       (incf tail size)))
-                                  `(let* ((byte (aref string start))
-                                          (bits (char-code byte))
-                                          (size ,out-size-expr))
-                                     ,out-expr
-                                     (incf tail size)))
-                             (incf start))))
-                    (declare (inline do-it))
-                    ;; Specialized versions for the common cases of
-                    ;; SIMPLE-BASE-STRING and (SIMPLE-ARRAY CHARACTER)
-                    ;; to avoid doing a generic AREF.
-                    (etypecase string
-                      (simple-base-string
-                       (do-it (the simple-base-string string)))
-                      #!+sb-unicode
-                      ((simple-array character)
-                       ;; For some reason the type information from the
-                       ;; etypecase doesn't propagate through here without
-                       ;; an explicit THE.
-                       (do-it (the (simple-array character) string)))
-                      (string
-                       (do-it string)))))
+                  (string-dispatch (simple-base-string
+                                    #!+sb-unicode
+                                    (simple-array character)
+                                    string)
+                      string
+                    (let ((len (fd-stream-obuf-length stream))
+                          (sap (fd-stream-obuf-sap stream))
+                          (tail (fd-stream-obuf-tail stream)))
+                      (declare (type index tail)
+                               ;; STRING bounds have already been checked.
+                               (optimize (safety 0)))
+                      (loop
+                         (,@(if output-restart
+                                `(catch 'output-nothing)
+                                `(progn))
+                            (do* ()
+                                 ((or (= start end) (< (- len tail) 4)))
+                              (let* ((byte (aref string start))
+                                     (bits (char-code byte))
+                                     (size ,out-size-expr))
+                                ,out-expr
+                                (incf tail size)
+                                (incf start)))
+                            ;; Exited from the loop normally
+                            (return tail))
+                         ;; Exited via CATCH. Skip the current character
+                         ;; and try the inner loop again.
+                         (incf start)))))
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
           (let* ((head (fd-stream-ibuf-head stream))
                  (tail (fd-stream-ibuf-tail stream))
                  (sap (fd-stream-ibuf-sap stream))
-                 (head-start head)
                  (decode-break-reason nil))
             (declare (type index head tail))
             ;; Copy data from stream buffer into user's buffer.
               (setf decode-break-reason
                     (block decode-break-reason
                       (let ((byte (sap-ref-8 sap head)))
+                        (declare (ignorable byte))
                         (setq size ,in-size-expr)
                         (when (> size (- tail head))
                           (return))
                         (incf head size))
                       nil))
               (setf (fd-stream-ibuf-head stream) head)
-              (when (and decode-break-reason
-                         (= head head-start))
+              (when decode-break-reason
+                ;; If we've already read some characters on when the invalid
+                ;; code sequence is detected, we return immediately. The
+                ;; handling of the error is deferred until the next call
+                ;; (where this check will be false). This allows establishing
+                ;; high-level handlers for decode errors (for example
+                ;; automatically resyncing in Lisp comments).
+                (when (plusp total-copied)
+                  (return-from ,in-function total-copied))
                 (when (stream-decoding-error-and-handle
                        stream decode-break-reason)
                   (if eof-error-p
                                                            ,in-size-expr
                                                            sap head)
         (let ((byte (sap-ref-8 sap head)))
+          (declare (ignorable byte))
           ,in-expr))
       (defun ,resync-function (stream)
-        (loop (input-at-least stream 1)
+        (loop (input-at-least stream 2)
               (incf (fd-stream-ibuf-head stream))
               (unless (block decode-break-reason
                         (let* ((sap (fd-stream-ibuf-sap stream))
                                (head (fd-stream-ibuf-head stream))
                                (byte (sap-ref-8 sap head))
                                (size ,in-size-expr))
-                          ,in-expr)
+                          (declare (ignorable byte))
+                          (input-at-least stream size)
+                          (let ((sap (fd-stream-ibuf-sap stream))
+                                (head (fd-stream-ibuf-head stream)))
+                            ,in-expr))
                         nil)
                 (return))))
       (setf *external-formats*
                ,@(mapcar #'(lambda (buffering)
                              (intern (format nil format (string buffering))))
                          '(:none :line :full))
-               ,resync-function)
+               ,resync-function
+               ,size-function)
         *external-formats*)))))
 
 ;;; Multiple names for the :ISO{,-}8859-* families are needed because on
   (declare (ignore arg2))
   (case operation
     (:listen
-     (or (not (eql (fd-stream-ibuf-head fd-stream)
-                   (fd-stream-ibuf-tail fd-stream)))
-         (fd-stream-listen fd-stream)
-         (setf (fd-stream-listen fd-stream)
-               (eql (sb!unix:with-restarted-syscall ()
-                      (sb!alien:with-alien ((read-fds (sb!alien:struct
-                                                       sb!unix:fd-set)))
-                        (sb!unix:fd-zero read-fds)
-                        (sb!unix:fd-set (fd-stream-fd fd-stream) read-fds)
-                        (sb!unix:unix-fast-select (1+ (fd-stream-fd fd-stream))
-                                                  (sb!alien:addr read-fds)
-                                                  nil nil 0 0)))
-                    1))))
+     (labels ((do-listen ()
+                (or (not (eql (fd-stream-ibuf-head fd-stream)
+                              (fd-stream-ibuf-tail fd-stream)))
+                    (fd-stream-listen fd-stream)
+                    #!+win32
+                    (sb!win32:fd-listen (fd-stream-fd fd-stream))
+                    #!-win32
+                    ;; If the read can block, LISTEN will certainly return NIL.
+                    (if (sysread-may-block-p fd-stream)
+                        nil
+                        ;; Otherwise select(2) and CL:LISTEN have slightly
+                        ;; different semantics.  The former returns that an FD
+                        ;; is readable when a read operation wouldn't block.
+                        ;; That includes EOF.  However, LISTEN must return NIL
+                        ;; at EOF.
+                        (progn (catch 'eof-input-catcher
+                                 ;; r-b/f too calls select, but it shouldn't
+                                 ;; block as long as read can return once w/o
+                                 ;; blocking
+                                 (refill-buffer/fd fd-stream))
+                               ;; At this point either IBUF-HEAD != IBUF-TAIL
+                               ;; and FD-STREAM-LISTEN is NIL, in which case
+                               ;; we should return T, or IBUF-HEAD ==
+                               ;; IBUF-TAIL and FD-STREAM-LISTEN is :EOF, in
+                               ;; which case we should return :EOF for this
+                               ;; call and all future LISTEN call on this stream.
+                               ;; Call ourselves again to determine which case
+                               ;; applies.
+                               (do-listen))))))
+       (do-listen)))
     (:unread
      (setf (fd-stream-unread fd-stream) arg1)
      (setf (fd-stream-listen fd-stream) t))
     (:close
-     (cond (arg1 ; We got us an abort on our hands.
+     (cond (arg1                    ; We got us an abort on our hands.
             (when (fd-stream-handler fd-stream)
               (sb!sys:remove-fd-handler (fd-stream-handler fd-stream))
               (setf (fd-stream-handler fd-stream) nil))
      (setf (fd-stream-unread fd-stream) nil)
      (setf (fd-stream-ibuf-head fd-stream) 0)
      (setf (fd-stream-ibuf-tail fd-stream) 0)
+     #!+win32
+     (progn
+       (sb!win32:fd-clear-input (fd-stream-fd fd-stream))
+       (setf (fd-stream-listen fd-stream) nil))
+     #!-win32
      (catch 'eof-input-catcher
-       (loop
-        (let ((count (sb!unix:with-restarted-syscall ()
-                       (sb!alien:with-alien ((read-fds (sb!alien:struct
-                                                        sb!unix:fd-set)))
-                         (sb!unix:fd-zero read-fds)
-                         (sb!unix:fd-set (fd-stream-fd fd-stream) read-fds)
-                         (sb!unix:unix-fast-select (1+ (fd-stream-fd fd-stream))
-                                                   (sb!alien:addr read-fds)
-                                                   nil nil 0 0)))))
-          (cond ((eql count 1)
-                 (refill-buffer/fd fd-stream)
-                 (setf (fd-stream-ibuf-head fd-stream) 0)
-                 (setf (fd-stream-ibuf-tail fd-stream) 0))
-                (t
-                 (return t)))))))
+       (loop until (sysread-may-block-p fd-stream)
+             do
+             (refill-buffer/fd fd-stream)
+             (setf (fd-stream-ibuf-head fd-stream) 0)
+             (setf (fd-stream-ibuf-tail fd-stream) 0))
+       t))
     (:force-output
      (flush-output-buffer fd-stream))
     (:finish-output
               :format-control "~S is not a stream associated with a file."
               :format-arguments (list fd-stream)))
      (multiple-value-bind (okay dev ino mode nlink uid gid rdev size
-                           atime mtime ctime blksize blocks)
+                                atime mtime ctime blksize blocks)
          (sb!unix:unix-fstat (fd-stream-fd fd-stream))
        (declare (ignore ino nlink uid gid rdev
                         atime mtime ctime blksize blocks))
        (if (zerop mode)
            nil
            (truncate size (fd-stream-element-size fd-stream)))))
-    ;; FIXME: I doubt this is correct in the presence of Unicode,
-    ;; since fd-stream FILE-POSITION is measured in bytes.
     (:file-string-length
      (etypecase arg1
-       (character 1)
-       (string (length arg1))))
+       (character (fd-stream-character-size fd-stream arg1))
+       (string (fd-stream-string-size fd-stream arg1))))
     (:file-position
      (fd-stream-file-position fd-stream arg1))))
 
   (setf *available-buffers* nil)
   (with-output-to-string (*error-output*)
     (setf *stdin*
-          (make-fd-stream 0 :name "standard input" :input t :buffering :line))
+          (make-fd-stream 0 :name "standard input" :input t :buffering :line
+                            #!+win32 :external-format #!+win32 (sb!win32::console-input-codepage)))
     (setf *stdout*
-          (make-fd-stream 1 :name "standard output" :output t :buffering :line))
+          (make-fd-stream 1 :name "standard output" :output t :buffering :line
+                            #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage)))
     (setf *stderr*
-          (make-fd-stream 2 :name "standard error" :output t :buffering :line))
+          (make-fd-stream 2 :name "standard error" :output t :buffering :line
+                            #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage)))
     (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string))
            (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666)))
       (if tty