From 388f043377acc98ef40d4e1445c09fb2c3b168e6 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Wed, 16 Dec 2009 21:54:05 +0000 Subject: [PATCH] 1.0.33.15: preparation for UTF external formats On input, there are potentially two non-constant sizes of data that need to be read; for variable-width formats, there are both the number of bytes for a particular character, and the number of bytes that need to be read in order to know how many bytes need to be read for a character (previously hardwired to 1). Separate out these two sizes in preparation for UTF-16 and UTF-32 external formats. --- src/code/external-formats/enc-basic.lisp | 10 ++++----- src/code/external-formats/mb-util.lisp | 2 +- src/code/fd-stream.lisp | 34 ++++++++++++++++++------------ version.lisp-expr | 2 +- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/code/external-formats/enc-basic.lisp b/src/code/external-formats/enc-basic.lisp index 819879f..0deb08a 100644 --- a/src/code/external-formats/enc-basic.lisp +++ b/src/code/external-formats/enc-basic.lisp @@ -402,11 +402,11 @@ (sap-ref-8 sap (+ 1 tail)) (logior #x80 (ldb (byte 6 12) bits)) (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits)) (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits))))) - (cond ((< byte #x80) 1) - ((< byte #xc2) (return-from decode-break-reason 1)) - ((< byte #xe0) 2) - ((< byte #xf0) 3) - (t 4)) + (1 (cond ((< byte #x80) 1) + ((< byte #xc2) (return-from decode-break-reason 1)) + ((< byte #xe0) 2) + ((< byte #xf0) 3) + (t 4))) (code-char (ecase size (1 byte) (2 (let ((byte2 (sap-ref-8 sap (1+ head)))) diff --git a/src/code/external-formats/mb-util.lisp b/src/code/external-formats/mb-util.lisp index f0ff16d..9a82d45 100644 --- a/src/code/external-formats/mb-util.lisp +++ b/src/code/external-formats/mb-util.lisp @@ -265,7 +265,7 @@ (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb) (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb) (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb)))))) - (,mb-len byte) + (1 (,mb-len byte)) (let* ((mb (ecase size (1 byte) (2 (let ((byte2 (sap-ref-8 sap (1+ head)))) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 79911fc..d2acaee 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -1087,12 +1087,15 @@ (catch 'eof-input-catcher (setf decode-break-reason (block decode-break-reason - (input-at-least ,stream-var 1) - (let* ((byte (sap-ref-8 (buffer-sap ibuf) - (buffer-head ibuf)))) + (input-at-least ,stream-var ,(if (consp bytes) (car bytes) `(setq size ,bytes))) + (let* ((byte (sap-ref-8 (buffer-sap ibuf) (buffer-head ibuf)))) (declare (ignorable byte)) - (setq size ,bytes) - (input-at-least ,stream-var size) + ,@(when (consp bytes) + `((let ((sap (buffer-sap ibuf)) + (head (buffer-head ibuf))) + (declare (ignorable sap head)) + (setq size ,(cadr bytes)) + (input-at-least ,stream-var size)))) (setq ,element-var (locally ,@read-forms)) (setq ,retry-var nil)) nil)) @@ -1501,9 +1504,12 @@ ((or (= tail head) (= requested total-copied))) (setf decode-break-reason (block decode-break-reason + ,@(when (consp in-size-expr) + `((when (> ,(car in-size-expr) (- tail head)) + (return)))) (let ((byte (sap-ref-8 sap head))) (declare (ignorable byte)) - (setq size ,in-size-expr) + (setq size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr)) (when (> size (- tail head)) (return)) (setf (aref buffer (+ start total-copied)) ,in-expr) @@ -1552,18 +1558,20 @@ (declare (ignorable byte)) ,in-expr)) (defun ,resync-function (stream) - (let ((ibuf (fd-stream-ibuf stream))) + (let ((ibuf (fd-stream-ibuf stream)) + size) (catch 'eof-input-catcher (loop (incf (buffer-head ibuf)) - (input-at-least stream 1) + (input-at-least stream ,(if (consp in-size-expr) (car in-size-expr) `(setq size ,in-size-expr))) (unless (block decode-break-reason (let* ((sap (buffer-sap ibuf)) (head (buffer-head ibuf)) - (byte (sap-ref-8 sap head)) - (size ,in-size-expr)) + (byte (sap-ref-8 sap head))) (declare (ignorable byte)) - (input-at-least stream size) + ,@(when (consp in-size-expr) + `((setq size ,(cadr in-size-expr)) + (input-at-least stream size))) (setf head (buffer-head ibuf)) ,in-expr) nil) @@ -1579,7 +1587,7 @@ (setf decode-break-reason (block decode-break-reason (setf byte (sap-ref-8 sap head) - size ,in-size-expr + size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr) char ,in-expr) (incf head size) nil)) @@ -1598,7 +1606,7 @@ (setf decode-break-reason (block decode-break-reason (setf byte (sap-ref-8 sap head) - size ,in-size-expr + size ,(if (consp in-size-expr) (cadr in-size-expr) in-size-expr) char ,in-expr) (incf head size) nil)) diff --git a/version.lisp-expr b/version.lisp-expr index 2abde29..868cb2d 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.33.14" +"1.0.33.15" -- 1.7.10.4