From: Rudi Schlatte Date: Thu, 6 Jul 2006 06:14:17 +0000 (+0000) Subject: 0.9.14.8: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=01e9e8c568777d6480699e6cb3947f38c3bed350;p=sbcl.git 0.9.14.8: Added support for the ucs-2 external format (thanks to Ivan Boldyrev) --- diff --git a/NEWS b/NEWS index d6f5be5..eed5f25 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ ;;;; -*- coding: utf-8; -*- changes in sbcl-0.9.15 relative to sbcl-0.9.14: + * added support for the ucs-2 external format (contributed by Ivan Boldyrev) * minor incompatible change: pretty printing of objects of type (cons symbol) is, in the default pprint-dispatch-table, now sensitive to whether the symbol satisfies FBOUNDP. (thanks to diff --git a/build-order.lisp-expr b/build-order.lisp-expr index 7e49e54..144b335 100644 --- a/build-order.lisp-expr +++ b/build-order.lisp-expr @@ -680,6 +680,8 @@ ("src/code/external-formats/enc-win" :not-host) #!+sb-unicode ("src/code/external-formats/eucjp" :not-host) + #!+sb-unicode + ("src/code/external-formats/ucs-2" :not-host) ;; The code here can't be compiled until CONDITION and ;; DEFINE-CONDITION are defined and SB!DEBUG:*STACK-TOP-HINT* is diff --git a/src/code/external-formats/ucs-2.lisp b/src/code/external-formats/ucs-2.lisp new file mode 100644 index 0000000..4375114 --- /dev/null +++ b/src/code/external-formats/ucs-2.lisp @@ -0,0 +1,218 @@ +(in-package #:sb!impl) + +;;; TODO Macro for generating different variants: +;;; :ucs-2le (little endian) sap-ref-16le +;;; :ucs-2be (big endian) sap-ref-16be +;;; :ucs-2 (native) sap-ref-16 + +;;; Utilities + +(declaim (inline sap-ref-16le (setf sap-ref-16le) + sap-ref-16be (setf sap-ref-16be))) + +;;; Define feature LITTLE-ENDIAN-AND-MISALIGNED-READ? +(defun sap-ref-16le (sap offset) + #!+(or x86 x86-64) + (sap-ref-16 sap offset) + #!-(or x86 x86-64) + (dpb (sap-ref-8 sap (1+ offset)) (byte 8 8) + (sap-ref-8 sap offset))) + +(defun (setf sap-ref-16le) (value sap offset) + #!+(or x86 x86-64) + (setf (sap-ref-16 sap offset) value) + #!-(or x86 x86-64) + (setf (sap-ref-8 sap offset) (logand #xFF value) + (sap-ref-8 sap (1+ offset)) (ldb (byte 8 8) value))) + +(defun sap-ref-16be (sap offset) + (dpb (sap-ref-8 sap offset) (byte 8 8) + (sap-ref-8 sap (1+ offset)))) + +(defun (setf sap-ref-16be) (value sap offset) + (setf (sap-ref-8 sap (1+ offset)) (logand #xFF value) + (sap-ref-8 sap offset) (ldb (byte 8 8) value))) + +;;; +;;; Define external format: fd-stream +;;; +(define-external-format/variable-width (:ucs-2le :ucs2le) nil + 2 + (if (< bits #x10000) + (setf (sap-ref-16le sap tail) bits) + (stream-encoding-error-and-handle stream bits)) + 2 + (code-char (sap-ref-16le sap head))) + +(define-external-format/variable-width (:ucs-2be :ucs2be) nil + 2 + (if (< bits #x10000) + (setf (sap-ref-16be sap tail) bits) + (stream-encoding-error-and-handle stream bits)) + 2 + (code-char (sap-ref-16be sap head))) + + +;;; +;;; octets +;;; + +;;; Conversion to UCS-2{LE,BE} +(declaim (inline char->ucs-2le)) +(defun char->ucs-2le (char dest string pos) + (declare (optimize speed (safety 0)) + (type (array (unsigned-byte 8) (*)) dest)) + (let ((code (char-code char))) + (if (< code #x10000) + (flet ((add-byte (b) + (declare (type (unsigned-byte 8) b)) + (vector-push b dest))) + (declare (inline add-byte)) + (add-byte (ldb (byte 8 0) code)) + (add-byte (ldb (byte 8 8) code))) + ; signal error + (encoding-error :ucs-2le string pos)))) + +(declaim (inline char->ucs-2be)) +(defun char->ucs-2be (char dest string pos) + (declare (optimize speed (safety 0)) + (type (array (unsigned-byte 8) (*)) dest)) + (let ((code (char-code char))) + (if (< code #x10000) + (flet ((add-byte (b) + (declare (type (unsigned-byte 8) b)) + (vector-push b dest))) + (declare (inline add-byte)) + (add-byte (ldb (byte 8 8) code)) + (add-byte (ldb (byte 8 0) code))) + ; signal error + (encoding-error :ucs-16be string pos)))) + +(defun string->ucs-2le (string sstart send additional-space) + (declare (optimize speed (safety 0)) + (type simple-string string) + (type array-range sstart send additional-space)) + (let ((array (make-array (* 2 (+ additional-space (- send sstart))) + :element-type '(unsigned-byte 8) + :fill-pointer 0))) + (loop for i from sstart below send + do (char->ucs-2le (char string i) array string i)) + (dotimes (i additional-space) + (vector-push 0 array) + (vector-push 0 array)) + (coerce array '(simple-array (unsigned-byte 8) (*))))) + +(defun string->ucs-2be (string sstart send additional-space) + (declare (optimize speed (safety 0)) + (type simple-string string) + (type array-range sstart send additional-space)) + (let ((array (make-array (* 2 (+ additional-space (- send sstart))) + :element-type '(unsigned-byte 8) + :fill-pointer 0))) + (loop for i from sstart below send + do (char->ucs-2be (char string i) array string i)) + (dotimes (i additional-space) + (vector-push 0 array) + (vector-push 0 array)) + (coerce array '(simple-array (unsigned-byte 8) (*))))) + +;; Conversion from UCS-2{LE,BE} +(defmacro define-bytes-per-ucs2-character (accessor type) + (declare (ignore type)) + (let ((name-le (make-od-name 'bytes-per-ucs-2le-character accessor)) + (name-be (make-od-name 'bytes-per-ucs-2be-character accessor))) + `(progn + (defun ,name-le (array pos end) + (declare (ignore array pos end)) + (values 2 nil)) + (defun ,name-be (array pos end) + (declare (ignore array pos end)) + (values 2 nil))))) +(instantiate-octets-definition define-bytes-per-ucs2-character) + +(defmacro define-simple-get-ucs2-character (accessor type) + (let ((name-le (make-od-name 'simple-get-ucs-2le-char accessor)) + (name-be (make-od-name 'simple-get-ucs-2be-char accessor))) + `(progn + (defun ,name-le (array pos bytes) + (declare (optimize speed (safety 0)) + (type ,type array) + (type array-range pos) + (type (integer 1 4) bytes) + (ignore bytes)) + ;; Optimization for SYSTEM-AREA-POINTER: use SAP-REF-16LE that + ;; reads two bytes at once on some architectures. + ,(if (and (eq accessor 'sap-ref-8) + (eq type 'system-area-pointer)) + '(code-char (sap-ref-16le array pos)) + `(flet ((cref (x) + (,accessor array (the array-range (+ pos x))))) + (declare (inline cref)) + (code-char (dpb (cref 1) (byte 8 8) + (cref 0)))))) + (defun ,name-be (array pos bytes) + (declare (optimize speed (safety 0)) + (type ,type array) + (type array-range pos) + (type (integer 1 4) bytes) + (ignore bytes)) + ;; Use SAP-REF-16BE even if it is not optimized + ,(if (and (eq accessor 'sap-ref-8) + (eq type 'system-area-pointer)) + '(code-char (sap-ref-16be array pos)) + `(flet ((cref (x) + (,accessor array (the array-range (+ pos x))))) + (declare (inline cref)) + (code-char (dpb (cref 0) (byte 8 8) + (cref 1))))))))) + +(instantiate-octets-definition define-simple-get-ucs2-character) + +(defmacro define-ucs-2->string (accessor type) + (let ((name-le (make-od-name 'ucs-2le->string accessor)) + (name-be (make-od-name 'ucs-2be->string accessor))) + `(progn + (defun ,name-le (array astart aend) + (declare (optimize speed (safety 0)) + (type ,type array) + (type array-range astart aend)) + (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character))) + (loop with pos = astart + while (< pos aend) + do (multiple-value-bind (bytes invalid) + (,(make-od-name 'bytes-per-ucs-2le-character accessor) array pos aend) + (declare (type (or null string) invalid)) + (assert (null invalid)) + (vector-push-extend + (,(make-od-name 'simple-get-ucs-2le-char accessor) + array pos bytes) + string) + (incf pos bytes))) + string)) + (defun ,name-be (array astart aend) + (declare (optimize speed (safety 0)) + (type ,type array) + (type array-range astart aend)) + (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character))) + (loop with pos = astart + while (< pos aend) + do (multiple-value-bind (bytes invalid) + (,(make-od-name 'bytes-per-ucs-2be-character accessor) array pos aend) + (declare (type (or null string) invalid)) + (assert (null invalid)) + (vector-push-extend + (,(make-od-name 'simple-get-ucs-2be-char accessor) + array pos bytes) + string) + (incf pos bytes))) + string))))) + +(instantiate-octets-definition define-ucs-2->string) + +(pushnew '((:ucs-2le :ucs2le) + ucs-2le->string-aref string->ucs-2le) + *external-format-functions*) + +(pushnew '((:ucs-2be :ucs2be) + ucs-2be->string-aref string->ucs-2be) + *external-format-functions*) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index faab631..ef0c2c4 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -325,6 +325,7 @@ (intern (format nil name-fmt (string (car buffering)))))) `(progn (defun ,function (stream byte) + (declare (ignorable byte)) (output-wrapper/variable-width (stream ,size ,buffering ,restart) ,@body)) (setf *output-routines* @@ -752,6 +753,7 @@ ,stream-var) (fd-stream-ibuf-head ,stream-var)))) + (declare (ignorable byte)) (setq size ,bytes) (input-at-least ,stream-var size) (setq ,element-var (locally ,@read-forms)) @@ -1144,6 +1146,7 @@ (size-function (symbolicate "BYTES-FOR-CHAR/" name))) `(progn (defun ,size-function (byte) + (declare (ignorable byte)) ,out-size-expr) (defun ,out-function (stream string flush-p start end) (let ((start (or start 0)) @@ -1228,6 +1231,7 @@ (setf decode-break-reason (block decode-break-reason (let ((byte (sap-ref-8 sap head))) + (declare (ignorable byte)) (setq size ,in-size-expr) (when (> size (- tail head)) (return)) @@ -1272,6 +1276,7 @@ ,in-size-expr sap head) (let ((byte (sap-ref-8 sap head))) + (declare (ignorable byte)) ,in-expr)) (defun ,resync-function (stream) (loop (input-at-least stream 2) @@ -1281,6 +1286,7 @@ (head (fd-stream-ibuf-head stream)) (byte (sap-ref-8 sap head)) (size ,in-size-expr)) + (declare (ignorable byte)) (input-at-least stream size) (let ((sap (fd-stream-ibuf-sap stream)) (head (fd-stream-ibuf-head stream))) diff --git a/version.lisp-expr b/version.lisp-expr index ef51ec4..9664cb3 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.14.7" +"0.9.14.8"