1.0.33.15: preparation for UTF external formats
authorChristophe Rhodes <csr21@cantab.net>
Wed, 16 Dec 2009 21:54:05 +0000 (21:54 +0000)
committerChristophe Rhodes <csr21@cantab.net>
Wed, 16 Dec 2009 21:54:05 +0000 (21:54 +0000)
On input, there are potentially two non-constant sizes of data that need
to be read; for variable-width formats, there are both the number of bytes
for a particular character, and the number of bytes that need to be read in
order to know how many bytes need to be read for a character (previously
hardwired to 1).  Separate out these two sizes in preparation for UTF-16 and
UTF-32 external formats.

src/code/external-formats/enc-basic.lisp
src/code/external-formats/mb-util.lisp
src/code/fd-stream.lisp
version.lisp-expr

index 819879f..0deb08a 100644 (file)
              (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 12) bits))
              (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits))
              (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits)))))
-  (cond ((< byte #x80) 1)
-        ((< byte #xc2) (return-from decode-break-reason 1))
-        ((< byte #xe0) 2)
-        ((< byte #xf0) 3)
-        (t 4))
+  (1 (cond ((< byte #x80) 1)
+           ((< byte #xc2) (return-from decode-break-reason 1))
+           ((< byte #xe0) 2)
+           ((< byte #xf0) 3)
+           (t 4)))
   (code-char (ecase size
                (1 byte)
                (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
index f0ff16d..9a82d45 100644 (file)
                  (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)
                           (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)
                           (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))
-         (,mb-len byte)
+         (1 (,mb-len byte))
          (let* ((mb (ecase size
                       (1 byte)
                       (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
index 79911fc..d2acaee 100644 (file)
                      (catch 'eof-input-catcher
                        (setf decode-break-reason
                              (block decode-break-reason
-                               (input-at-least ,stream-var 1)
-                               (let* ((byte (sap-ref-8 (buffer-sap ibuf)
-                                                       (buffer-head ibuf))))
+                               (input-at-least ,stream-var ,(if (consp bytes) (car bytes) `(setq size ,bytes)))
+                               (let* ((byte (sap-ref-8 (buffer-sap ibuf) (buffer-head ibuf))))
                                  (declare (ignorable byte))
-                                 (setq size ,bytes)
-                                 (input-at-least ,stream-var size)
+                                 ,@(when (consp bytes)
+                                     `((let ((sap (buffer-sap ibuf))
+                                             (head (buffer-head ibuf)))
+                                         (declare (ignorable sap head))
+                                         (setq size ,(cadr bytes))
+                                         (input-at-least ,stream-var size))))
                                  (setq ,element-var (locally ,@read-forms))
                                  (setq ,retry-var nil))
                                nil))
                 ((or (= tail head) (= requested total-copied)))
               (setf decode-break-reason
                     (block decode-break-reason
+                      ,@(when (consp in-size-expr)
+                          `((when (> ,(car in-size-expr) (- tail head))
+                              (return))))
                       (let ((byte (sap-ref-8 sap head)))
                         (declare (ignorable byte))
-                        (setq size ,in-size-expr)
+                        (setq size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr))
                         (when (> size (- tail head))
                           (return))
                         (setf (aref buffer (+ start total-copied)) ,in-expr)
           (declare (ignorable byte))
           ,in-expr))
       (defun ,resync-function (stream)
-        (let ((ibuf (fd-stream-ibuf stream)))
+        (let ((ibuf (fd-stream-ibuf stream))
+              size)
           (catch 'eof-input-catcher
             (loop
                (incf (buffer-head ibuf))
-               (input-at-least stream 1)
+               (input-at-least stream ,(if (consp in-size-expr) (car in-size-expr) `(setq size ,in-size-expr)))
                (unless (block decode-break-reason
                          (let* ((sap (buffer-sap ibuf))
                                 (head (buffer-head ibuf))
-                                (byte (sap-ref-8 sap head))
-                                (size ,in-size-expr))
+                                (byte (sap-ref-8 sap head)))
                            (declare (ignorable byte))
-                           (input-at-least stream size)
+                           ,@(when (consp in-size-expr)
+                               `((setq size ,(cadr in-size-expr))
+                                 (input-at-least stream size)))
                            (setf head (buffer-head ibuf))
                            ,in-expr)
                          nil)
                            (setf decode-break-reason
                                  (block decode-break-reason
                                    (setf byte (sap-ref-8 sap head)
-                                         size ,in-size-expr
+                                         size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr)
                                          char ,in-expr)
                                    (incf head size)
                                    nil))
               (setf decode-break-reason
                     (block decode-break-reason
                       (setf byte (sap-ref-8 sap head)
-                            size ,in-size-expr
+                            size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr)
                             char ,in-expr)
                       (incf head size)
                       nil))
index 2abde29..868cb2d 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.33.14"
+"1.0.33.15"