(in-package "SB!IMPL")
-(defun make-multibyte-mapper (list)
+(defmacro define-multibyte-mapper (name list)
(let ((list (sort (copy-list list) #'< :key #'car))
(hi (loop for x in list maximize (max (car x) (cadr x)))))
- (make-array (list (length list) 2)
- :element-type (list 'integer 0 hi)
- :initial-contents list)))
-
-(defmacro define-multibyte-mapper (name list)
- `(defparameter ,name
- (make-multibyte-mapper ,list)))
+ `(defparameter ,name
+ (make-array '(,(length list) 2)
+ :element-type '(integer 0 ,hi)
+ :initial-contents ',list))))
(defun get-multibyte-mapper (table code)
(declare (optimize speed (safety 0))
;; two-byte sequence `"initial (length 3)"
;; "non-continuation"' -- `#xef #x32')
;; signal only part of that sequence as
- ;; erronous.
+ ;; erroneous.
(loop for i from 1 below (min len remaining-bytes)
always (valid-secondary-p i))
(enough-bytes-left-p len))))
(declare (type (or null string) invalid))
(cond
((null invalid)
- (vector-push-extend (,simple-get-mb-char array pos bytes) string))
+ (let ((thing (,simple-get-mb-char array pos bytes)))
+ (typecase thing
+ (character (vector-push-extend thing string))
+ (string
+ (dotimes (i (length thing))
+ (vector-push-extend (char thing i) string))))))
(t
(dotimes (i (length invalid))
(vector-push-extend (char invalid i) string))))
(define-mb->string
(make-od-name-list 'define format '>string)))
`(progn
- ;; for fd-stream.lisp
- (define-external-format/variable-width ,aliases t
- (mb-char-len (or (,ucs-to-mb (char-code byte)) -1))
- (let ((mb (,ucs-to-mb bits)))
- (if (null mb)
- (external-format-encoding-error stream byte)
- (ecase size
- (1 (setf (sap-ref-8 sap tail) mb))
- (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb)
- (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb)))
- (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)
- (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)
- (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))
- (,mb-len byte)
- (let* ((mb (ecase size
- (1 byte)
- (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
- (unless (,mb-continuation-byte-p byte2)
- (return-from decode-break-reason 2))
- (dpb byte (byte 8 8) byte2)))
- (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
- (byte3 (sap-ref-8 sap (+ 2 head))))
- (unless (,mb-continuation-byte-p byte2)
- (return-from decode-break-reason 2))
- (unless (,mb-continuation-byte-p byte3)
- (return-from decode-break-reason 3))
- (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3))))))
- (ucs (,mb-to-ucs mb)))
- (if (null ucs)
- (return-from decode-break-reason 1)
- (code-char ucs))))
-
;; for octets.lisp
(define-condition ,(make-od-name 'malformed format)
(octet-decoding-error) ())
(instantiate-octets-definition ,define-mb->string)
- (push '(,aliases
- ,(make-od-name format '>string-aref) ,string->mb)
- *external-format-functions*)
- )))
+ ;; for fd-stream.lisp
+ (define-external-format/variable-width ,aliases t
+ ;; KLUDGE: it so happens that at present (2009-10-22) none of
+ ;; the external formats defined with
+ ;; define-multibyte-encoding can encode the unicode
+ ;; replacement character, so we hardcode the preferred
+ ;; replacement here.
+ #\?
+ (block size
+ (mb-char-len (or (,ucs-to-mb (char-code byte))
+ (return-from size 0))))
+ (let ((mb (,ucs-to-mb bits)))
+ (if (null mb)
+ (external-format-encoding-error stream byte)
+ (ecase size
+ (1 (setf (sap-ref-8 sap tail) mb))
+ (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb)
+ (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb)))
+ (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)
+ (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)
+ (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))
+ (1 (,mb-len byte))
+ (let* ((mb (ecase size
+ (1 byte)
+ (2 (let ((byte2 (sap-ref-8 sap (1+ head))))
+ (unless (,mb-continuation-byte-p byte2)
+ (return-from decode-break-reason 2))
+ (dpb byte (byte 8 8) byte2)))
+ (3 (let ((byte2 (sap-ref-8 sap (1+ head)))
+ (byte3 (sap-ref-8 sap (+ 2 head))))
+ (unless (,mb-continuation-byte-p byte2)
+ (return-from decode-break-reason 2))
+ (unless (,mb-continuation-byte-p byte3)
+ (return-from decode-break-reason 3))
+ (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3))))))
+ (ucs (,mb-to-ucs mb)))
+ (if (null ucs)
+ (return-from decode-break-reason 1)
+ (code-char ucs)))
+ ,(make-od-name format '>string-aref)
+ ,string->mb))))