0.9.8.28:
[sbcl.git] / src / code / fd-stream.lisp
index 945790e..0e43da7 100644 (file)
                             start
                             length)
       (cond ((not count)
-             (if (= errno sb!unix:ewouldblock)
+             (if #!-win32 (= errno sb!unix:ewouldblock) #!+win32 t #!-win32
                  (error "Write would have blocked, but SERVER told us to go.")
                  (simple-stream-perror "couldn't write to ~S" stream errno)))
             ((eql count length) ; Hot damn, it worked.
         (multiple-value-bind (count errno)
             (sb!unix:unix-write (fd-stream-fd stream) base start length)
           (cond ((not count)
-                 (if (= errno sb!unix:ewouldblock)
+                 (if #!-win32 (= errno sb!unix:ewouldblock) #!+win32 t #!-win32
                      (output-later stream base start end reuse-sap)
                      (simple-stream-perror "couldn't write to ~S"
                                            stream
         (end (or end (length (the vector thing)))))
     (declare (fixnum start end))
     (if (stringp thing)
-        (let ((last-newline (and (find #\newline (the simple-string thing)
-                                       :start start :end end)
-                                 ;; FIXME why do we need both calls?
-                                 ;; Is find faster forwards than
-                                 ;; position is backwards?
-                                 (position #\newline (the simple-string thing)
-                                           :from-end t
-                                           :start start
-                                           :end end))))
+        (let ((last-newline
+               (string-dispatch (simple-base-string
+                                 #!+sb-unicode
+                                 (simple-array character)
+                                 string)
+                   thing
+                 (and (find #\newline thing :start start :end end)
+                      ;; FIXME why do we need both calls?
+                      ;; Is find faster forwards than
+                      ;; position is backwards?
+                      (position #\newline thing
+                                :from-end t
+                                :start start
+                                :end end)))))
           (if (and (typep thing 'base-string)
                    (eq (fd-stream-external-format stream) :latin-1))
               (ecase (fd-stream-buffering stream)
                            (sb!sys:int-sap (+ (sb!sys:sap-int ibuf-sap) tail))
                            (- buflen tail))
       (cond ((null count)
-             (if (eql errno sb!unix:ewouldblock)
+             (if #!-win32 (eql errno sb!unix:ewouldblock) #!+win32 t #!-win32
                  (progn
                    (unless (sb!sys:wait-until-fd-usable
                             fd :input (fd-stream-timeout stream))
       (return-from fd-stream-resync
         (funcall (symbol-function (eighth entry)) stream)))))
 
+(defun get-fd-stream-character-sizer (stream)
+  (dolist (entry *external-formats*)
+    (when (member (fd-stream-external-format stream) (first entry))
+      (return-from get-fd-stream-character-sizer (ninth entry)))))
+
+(defun fd-stream-character-size (stream char)
+  (let ((sizer (get-fd-stream-character-sizer stream)))
+    (when sizer (funcall sizer char))))
+
+(defun fd-stream-string-size (stream string)
+  (let ((sizer (get-fd-stream-character-sizer stream)))
+    (when sizer
+      (loop for char across string summing (funcall sizer char)))))
+
 ;;; FIXME: OAOOM here vrt. *EXTERNAL-FORMAT-FUNCTIONS* in fd-stream.lisp
 (defmacro define-external-format (external-format size output-restart
                                   out-expr in-expr)
          (out-function (symbolicate "OUTPUT-BYTES/" name))
          (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
          (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
-         (in-char-function (symbolicate "INPUT-CHAR/" name)))
+         (in-char-function (symbolicate "INPUT-CHAR/" name))
+         (size-function (symbolicate "BYTES-FOR-CHAR/" name)))
     `(progn
+      (defun ,size-function (byte)
+        (declare (ignore byte))
+        ,size)
       (defun ,out-function (stream string flush-p start end)
         (let ((start (or start 0))
               (end (or end (length string))))
                      (> (fd-stream-ibuf-tail stream)
                         (fd-stream-ibuf-head stream)))
             (file-position stream (file-position stream)))
-          (when (< end start)
-            (error ":END before :START!"))
+          (unless (<= 0 start end (length string))
+            (signal-bounding-indices-bad-error string start end))
           (do ()
               ((= end start))
             (setf (fd-stream-obuf-tail stream)
-                  (flet ((do-it (string)
-                           (do* ((len (fd-stream-obuf-length stream))
-                                 (sap (fd-stream-obuf-sap stream))
-                                 (tail (fd-stream-obuf-tail stream)))
-                                ((or (= start end) (< (- len tail) 4)) tail)
-                             ,(if output-restart
-                                  `(catch 'output-nothing
-                                     (let* ((byte (aref string start))
-                                            (bits (char-code byte)))
-                                       ,out-expr
-                                       (incf tail ,size)))
-                                  `(let* ((byte (aref string start))
-                                          (bits (char-code byte)))
-                                     ,out-expr
-                                     (incf tail ,size)))
-                             (incf start))))
-                    (declare (inline do-it))
-                    ;; Specialized versions for the common cases of
-                    ;; SIMPLE-BASE-STRING and (SIMPLE-ARRAY CHARACTER)
-                    ;; to avoid doing a generic AREF.
-                    (etypecase string
-                      (simple-base-string
-                       (do-it (the simple-base-string string)))
-                      #!+sb-unicode
-                      ((simple-array character)
-                       ;; For some reason the type information from the
-                       ;; etypecase doesn't propagate through here without
-                       ;; an explicit THE.
-                       (do-it (the (simple-array character) string)))
-                      (string
-                       (do-it string)))))
+                  (string-dispatch (simple-base-string
+                                    #!+sb-unicode
+                                    (simple-array character)
+                                    string)
+                      string
+                    (let ((len (fd-stream-obuf-length stream))
+                          (sap (fd-stream-obuf-sap stream))
+                          (tail (fd-stream-obuf-tail stream)))
+                      (declare (type index tail)
+                               ;; STRING bounds have already been checked.
+                               (optimize (safety 0)))
+                      (loop
+                         (,@(if output-restart
+                                `(catch 'output-nothing)
+                                `(progn))
+                            (do* ()
+                                 ((or (= start end) (< (- len tail) 4)))
+                              (let* ((byte (aref string start))
+                                     (bits (char-code byte)))
+                                ,out-expr
+                                (incf tail ,size)
+                                (incf start)))
+                            ;; Exited from the loop normally
+                            (return tail))
+                         ;; Exited via CATCH. Skip the current character
+                         ;; and try the inner loop again.
+                         (incf start)))))
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
        (cons '(,external-format ,in-function ,in-char-function ,out-function
                ,@(mapcar #'(lambda (buffering)
                              (intern (format nil format (string buffering))))
-                         '(:none :line :full)))
+                         '(:none :line :full))
+               nil ; no resync-function
+               ,size-function)
         *external-formats*)))))
 
 (defmacro define-external-format/variable-width
          (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name)))
          (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name))
          (in-char-function (symbolicate "INPUT-CHAR/" name))
-         (resync-function (symbolicate "RESYNC/" name)))
+         (resync-function (symbolicate "RESYNC/" name))
+         (size-function (symbolicate "BYTES-FOR-CHAR/" name)))
     `(progn
+      (defun ,size-function (byte)
+        ,out-size-expr)
       (defun ,out-function (stream string flush-p start end)
         (let ((start (or start 0))
               (end (or end (length string))))
                      (> (fd-stream-ibuf-tail stream)
                         (fd-stream-ibuf-head stream)))
             (file-position stream (file-position stream)))
-          (when (< end start)
-            (error ":END before :START!"))
+          (unless (<= 0 start end (length string))
+            (signal-bounding-indices-bad-error string start end))
           (do ()
               ((= end start))
             (setf (fd-stream-obuf-tail stream)
-                  (flet ((do-it (string)
-                           (do* ((len (fd-stream-obuf-length stream))
-                                 (sap (fd-stream-obuf-sap stream))
-                                 (tail (fd-stream-obuf-tail stream)))
-                                ((or (= start end) (< (- len tail) 4)) tail)
-                             ,(if output-restart
-                                  `(catch 'output-nothing
-                                     (let* ((byte (aref string start))
-                                            (bits (char-code byte))
-                                            (size ,out-size-expr))
-                                       ,out-expr
-                                       (incf tail size)))
-                                  `(let* ((byte (aref string start))
-                                          (bits (char-code byte))
-                                          (size ,out-size-expr))
-                                     ,out-expr
-                                     (incf tail size)))
-                             (incf start))))
-                    (declare (inline do-it))
-                    ;; Specialized versions for the common cases of
-                    ;; SIMPLE-BASE-STRING and (SIMPLE-ARRAY CHARACTER)
-                    ;; to avoid doing a generic AREF.
-                    (etypecase string
-                      (simple-base-string
-                       (do-it (the simple-base-string string)))
-                      #!+sb-unicode
-                      ((simple-array character)
-                       ;; For some reason the type information from the
-                       ;; etypecase doesn't propagate through here without
-                       ;; an explicit THE.
-                       (do-it (the (simple-array character) string)))
-                      (string
-                       (do-it string)))))
+                  (string-dispatch (simple-base-string
+                                    #!+sb-unicode
+                                    (simple-array character)
+                                    string)
+                      string
+                    (let ((len (fd-stream-obuf-length stream))
+                          (sap (fd-stream-obuf-sap stream))
+                          (tail (fd-stream-obuf-tail stream)))
+                      (declare (type index tail)
+                               ;; STRING bounds have already been checked.
+                               (optimize (safety 0)))
+                      (loop
+                         (,@(if output-restart
+                                `(catch 'output-nothing)
+                                `(progn))
+                            (do* ()
+                                 ((or (= start end) (< (- len tail) 4)))
+                              (let* ((byte (aref string start))
+                                     (bits (char-code byte))
+                                     (size ,out-size-expr))
+                                ,out-expr
+                                (incf tail size)
+                                (incf start)))
+                            ;; Exited from the loop normally
+                            (return tail))
+                         ;; Exited via CATCH. Skip the current character
+                         ;; and try the inner loop again.
+                         (incf start)))))
             (when (< start end)
               (flush-output-buffer stream)))
           (when flush-p
           (let* ((head (fd-stream-ibuf-head stream))
                  (tail (fd-stream-ibuf-tail stream))
                  (sap (fd-stream-ibuf-sap stream))
-                 (head-start head)
                  (decode-break-reason nil))
             (declare (type index head tail))
             ;; Copy data from stream buffer into user's buffer.
                         (incf head size))
                       nil))
               (setf (fd-stream-ibuf-head stream) head)
-              (when (and decode-break-reason
-                         (= head head-start))
+              (when decode-break-reason
+                ;; If we've already read some characters on when the invalid
+                ;; code sequence is detected, we return immediately. The
+                ;; handling of the error is deferred until the next call
+                ;; (where this check will be false). This allows establishing
+                ;; high-level handlers for decode errors (for example
+                ;; automatically resyncing in Lisp comments).
+                (when (plusp total-copied)
+                  (return-from ,in-function total-copied))
                 (when (stream-decoding-error-and-handle
                        stream decode-break-reason)
                   (if eof-error-p
                ,@(mapcar #'(lambda (buffering)
                              (intern (format nil format (string buffering))))
                          '(:none :line :full))
-               ,resync-function)
+               ,resync-function
+               ,size-function)
         *external-formats*)))))
 
 ;;; Multiple names for the :ISO{,-}8859-* families are needed because on
        (if (zerop mode)
            nil
            (truncate size (fd-stream-element-size fd-stream)))))
-    ;; FIXME: I doubt this is correct in the presence of Unicode,
-    ;; since fd-stream FILE-POSITION is measured in bytes.
     (:file-string-length
      (etypecase arg1
-       (character 1)
-       (string (length arg1))))
+       (character (fd-stream-character-size fd-stream arg1))
+       (string (fd-stream-string-size fd-stream arg1))))
     (:file-position
      (fd-stream-file-position fd-stream arg1))))
 
 ;;; Rename NAMESTRING to ORIGINAL. First, check whether we have write
 ;;; access, since we don't want to trash unwritable files even if we
 ;;; technically can. We return true if we succeed in renaming.
+#!-win32
 (defun rename-the-old-one (namestring original)
   (unless (sb!unix:unix-access namestring sb!unix:w_ok)
     (error "~@<The file ~2I~_~S ~I~_is not writable.~:>" namestring))