X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fcode%2Fexternal-formats%2Fmb-util.lisp;h=5569a9fcac02fb3585296547644475f9d431e083;hb=0c3bbfaa2286626a2d915c8810f690aefc702661;hp=251b8d74de2c8e9582e3ddb96d534c1518dc888e;hpb=7effaab5d43dd5423938b00854848e01eb3a67c8;p=sbcl.git diff --git a/src/code/external-formats/mb-util.lisp b/src/code/external-formats/mb-util.lisp index 251b8d7..5569a9f 100644 --- a/src/code/external-formats/mb-util.lisp +++ b/src/code/external-formats/mb-util.lisp @@ -1,15 +1,12 @@ (in-package "SB!IMPL") -(defun make-multibyte-mapper (list) +(defmacro define-multibyte-mapper (name list) (let ((list (sort (copy-list list) #'< :key #'car)) (hi (loop for x in list maximize (max (car x) (cadr x))))) - (make-array (list (length list) 2) - :element-type (list 'integer 0 hi) - :initial-contents list))) - -(defmacro define-multibyte-mapper (name list) - `(defparameter ,name - (make-multibyte-mapper ,list))) + `(defparameter ,name + (make-array '(,(length list) 2) + :element-type '(integer 0 ,hi) + :initial-contents ',list)))) (defun get-multibyte-mapper (table code) (declare (optimize speed (safety 0)) @@ -81,7 +78,7 @@ ;; two-byte sequence `"initial (length 3)" ;; "non-continuation"' -- `#xef #x32') ;; signal only part of that sequence as - ;; erronous. + ;; erroneous. (loop for i from 1 below (min len remaining-bytes) always (valid-secondary-p i)) (enough-bytes-left-p len)))) @@ -153,7 +150,12 @@ (declare (type (or null string) invalid)) (cond ((null invalid) - (vector-push-extend (,simple-get-mb-char array pos bytes) string)) + (let ((thing (,simple-get-mb-char array pos bytes))) + (typecase thing + (character (vector-push-extend thing string)) + (string + (dotimes (i (length thing)) + (vector-push-extend (char thing i) string)))))) (t (dotimes (i (length invalid)) (vector-push-extend (char invalid i) string)))) @@ -183,38 +185,6 @@ (define-mb->string (make-od-name-list 'define format '>string))) `(progn - ;; for fd-stream.lisp - (define-external-format/variable-width ,aliases t - (mb-char-len (or (,ucs-to-mb (char-code byte)) -1)) - (let ((mb (,ucs-to-mb bits))) - (if (null mb) - (external-format-encoding-error stream byte) - (ecase size - (1 (setf (sap-ref-8 sap tail) mb)) - (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb) - (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb))) - (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb) - (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb) - (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb)))))) - (,mb-len byte) - (let* ((mb (ecase size - (1 byte) - (2 (let ((byte2 (sap-ref-8 sap (1+ head)))) - (unless (,mb-continuation-byte-p byte2) - (return-from decode-break-reason 2)) - (dpb byte (byte 8 8) byte2))) - (3 (let ((byte2 (sap-ref-8 sap (1+ head))) - (byte3 (sap-ref-8 sap (+ 2 head)))) - (unless (,mb-continuation-byte-p byte2) - (return-from decode-break-reason 2)) - (unless (,mb-continuation-byte-p byte3) - (return-from decode-break-reason 3)) - (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3)))))) - (ucs (,mb-to-ucs mb))) - (if (null ucs) - (return-from decode-break-reason 1) - (code-char ucs)))) - ;; for octets.lisp (define-condition ,(make-od-name 'malformed format) (octet-decoding-error) ()) @@ -276,7 +246,44 @@ (instantiate-octets-definition ,define-mb->string) - (add-external-format-funs ',aliases - '(,(make-od-name format '>string-aref) - ,string->mb)) - ))) + ;; for fd-stream.lisp + (define-external-format/variable-width ,aliases t + ;; KLUDGE: it so happens that at present (2009-10-22) none of + ;; the external formats defined with + ;; define-multibyte-encoding can encode the unicode + ;; replacement character, so we hardcode the preferred + ;; replacement here. + #\? + (block size + (mb-char-len (or (,ucs-to-mb (char-code byte)) + (return-from size 0)))) + (let ((mb (,ucs-to-mb bits))) + (if (null mb) + (external-format-encoding-error stream byte) + (ecase size + (1 (setf (sap-ref-8 sap tail) mb)) + (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb) + (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb))) + (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb) + (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb) + (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb)))))) + (1 (,mb-len byte)) + (let* ((mb (ecase size + (1 byte) + (2 (let ((byte2 (sap-ref-8 sap (1+ head)))) + (unless (,mb-continuation-byte-p byte2) + (return-from decode-break-reason 2)) + (dpb byte (byte 8 8) byte2))) + (3 (let ((byte2 (sap-ref-8 sap (1+ head))) + (byte3 (sap-ref-8 sap (+ 2 head)))) + (unless (,mb-continuation-byte-p byte2) + (return-from decode-break-reason 2)) + (unless (,mb-continuation-byte-p byte3) + (return-from decode-break-reason 3)) + (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3)))))) + (ucs (,mb-to-ucs mb))) + (if (null ucs) + (return-from decode-break-reason 1) + (code-char ucs))) + ,(make-od-name format '>string-aref) + ,string->mb))))