--- /dev/null
+;;;; Unicode Transformation Format (UTF) encodings
+;;;;
+;;;; In our interpretation, these are distinct from UCS encodings: the
+;;;; UCS encodings are a direct encoding of the code point, in 16- and
+;;;; 32-bit variants; by contrast, the UTF encodings handle Unicode
+;;;; surrogate code points specially.
+
+;;;; This software is part of the SBCL system. See the README file for
+;;;; more information.
+;;;;
+;;;; This software is derived from the CMU CL system, which was
+;;;; written at Carnegie Mellon University and released into the
+;;;; public domain. The software is in the public domain and is
+;;;; provided with absolutely no warranty. See the COPYING and CREDITS
+;;;; files for more information.
+
+(in-package "SB!IMPL")
+
+\f
+(declaim (inline utf-noncharacter-code-p))
+(defun utf-noncharacter-code-p (code)
+ (or (<= #xd800 code #xdfff)
+ (<= #xfdd0 code #xfdef)
+ (= (logand code #xfffe) #xfffe)))
+
+;;; Conversion to UTF-16{LE,BE}
+(declaim (inline char->utf-16le))
+(defun char->utf-16le (char dest string pos)
+ (declare (optimize speed (safety 0))
+ (type (array (unsigned-byte 8) (*)) dest))
+ (let ((code (char-code char)))
+ (if (utf-noncharacter-code-p code)
+ (let ((replacement (encoding-error :utf-16le string pos)))
+ (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
+ (dotimes (i (length replacement))
+ (vector-push-extend (aref replacement i) dest)))
+ (flet ((add-byte (b)
+ (declare (type (unsigned-byte 8) b))
+ (vector-push-extend b dest)))
+ (declare (inline add-byte))
+ (cond
+ ((< code #x10000)
+ (add-byte (ldb (byte 8 0) code))
+ (add-byte (ldb (byte 8 8) code)))
+ (t
+ (let* ((codeoid (- code #x10000))
+ (high (dpb (ldb (byte 10 10) codeoid) (byte 10 0) #xd800))
+ (low (dpb (ldb (byte 10 0) codeoid) (byte 10 0) #xdc00)))
+ (add-byte (ldb (byte 8 0) high))
+ (add-byte (ldb (byte 8 8) high))
+ (add-byte (ldb (byte 8 0) low))
+ (add-byte (ldb (byte 8 8) low)))))))))
+
+(declaim (inline char->utf-16be))
+(defun char->utf-16be (char dest string pos)
+ (declare (optimize speed (safety 0))
+ (type (array (unsigned-byte 8) (*)) dest))
+ (let ((code (char-code char)))
+ (if (utf-noncharacter-code-p code)
+ (let ((replacement (encoding-error :utf-16be string pos)))
+ (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
+ (dotimes (i (length replacement))
+ (vector-push-extend (aref replacement i) dest)))
+ (flet ((add-byte (b)
+ (declare (type (unsigned-byte 8) b))
+ (vector-push-extend b dest)))
+ (declare (inline add-byte))
+ (cond
+ ((< code #x10000)
+ (add-byte (ldb (byte 8 8) code))
+ (add-byte (ldb (byte 8 0) code)))
+ (t
+ (let* ((codeoid (- code #x10000))
+ (high (dpb (ldb (byte 10 10) codeoid) (byte 10 0) #xd800))
+ (low (dpb (ldb (byte 10 0) codeoid) (byte 10 0) #xdc00)))
+ (add-byte (ldb (byte 8 8) high))
+ (add-byte (ldb (byte 8 0) high))
+ (add-byte (ldb (byte 8 8) low))
+ (add-byte (ldb (byte 8 0) low)))))))))
+
+(defun string->utf-16le (string sstart send additional-space)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range sstart send additional-space))
+ (let ((array (make-array (* 2 (+ additional-space (- send sstart)))
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0 :adjustable t)))
+ (loop for i from sstart below send
+ do (char->utf-16le (char string i) array string i))
+ (dotimes (i (* 2 additional-space))
+ (vector-push-extend 0 array))
+ (coerce array '(simple-array (unsigned-byte 8) (*)))))
+
+(defun string->utf-16be (string sstart send additional-space)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range sstart send additional-space))
+ (let ((array (make-array (* 2 (+ additional-space (- send sstart)))
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0 :adjustable t)))
+ (loop for i from sstart below send
+ do (char->utf-16be (char string i) array string i))
+ (dotimes (i (* 2 additional-space))
+ (vector-push-extend 0 array))
+ (coerce array '(simple-array (unsigned-byte 8) (*)))))
+
+;; Conversion from UTF-16{LE,BE}
+(defmacro define-bytes-per-utf16-character (accessor type)
+ (declare (ignore type))
+ (let ((name-le (make-od-name 'bytes-per-utf-16le-character accessor))
+ (name-be (make-od-name 'bytes-per-utf-16be-character accessor)))
+ `(progn
+ (defun ,name-le (array pos end)
+ (let ((remaining (- end pos)))
+ (when (< remaining 2)
+ (return-from ,name-le (values remaining (decoding-error array pos end :utf-16le 'octet-decoding-error pos))))
+ (let ((low (dpb (,accessor array (+ pos 1)) (byte 8 8) (,accessor array pos))))
+ (if (<= #xd800 low #xdbff)
+ (if (< remaining 4)
+ (values remaining (decoding-error array pos end :utf-16le 'octet-decoding-error pos))
+ (let ((high (dpb (,accessor array (+ pos 3)) (byte 8 8) (,accessor array (+ pos 2)))))
+ (if (<= #xdc00 high #xdfff)
+ (let ((code (dpb (ldb (byte 10 0) low) (byte 10 10) (ldb (byte 10 0) high))))
+ (if (= (logand code #xfffe) #xfffe)
+ (values 4 (decoding-error array pos (+ pos 4) :utf-16le 'octet-decoding-error pos))
+ (values 4 nil)))
+ (values 2 (decoding-error array pos (+ pos 2) :utf-16le 'octet-decoding-error pos)))))
+ (if (or (<= #xdc00 low #xdfff)
+ (<= #xfdd0 low #xfdef)
+ (= (logand low #xfffe) #xfffe))
+ (values 2 (decoding-error array pos (+ pos 2) :utf-16le 'octet-decoding-error pos))
+ (values 2 nil))))))
+ (defun ,name-be (array pos end)
+ (let ((remaining (- end pos)))
+ (when (< remaining 2)
+ (return-from ,name-be (values remaining (decoding-error array pos end :utf-16le 'octet-decoding-error pos))))
+ (let ((low (dpb (,accessor array pos) (byte 8 8) (,accessor array (+ pos 1)))))
+ (if (<= #xd800 low #xdbff)
+ (if (< remaining 4)
+ (values remaining (decoding-error array pos end :utf-16le 'octet-decoding-error pos))
+ (let ((high (dpb (,accessor array (+ pos 2)) (byte 8 8) (,accessor array (+ pos 3)))))
+ (if (<= #xdc00 high #xdfff)
+ (let ((code (dpb (ldb (byte 10 0) low) (byte 10 10) (ldb (byte 10 0) high))))
+ (if (= (logand code #xfffe) #xfffe)
+ (values 4 (decoding-error array pos (+ pos 4) :utf-16le 'octet-decoding-error pos))
+ (values 4 nil)))
+ (values 2 (decoding-error array pos (+ pos 2) :utf-16le 'octet-decoding-error pos)))))
+ (if (or (<= #xdc00 low #xdfff)
+ (<= #xfdd0 low #xfdef)
+ (= (logand low #xfffe) #xfffe))
+ (values 2 (decoding-error array pos (+ pos 2) :utf-16le 'octet-decoding-error pos))
+ (values 2 nil)))))))))
+(instantiate-octets-definition define-bytes-per-utf16-character)
+
+(defmacro define-simple-get-utf16-character (accessor type)
+ (let ((name-le (make-od-name 'simple-get-utf-16le-char accessor))
+ (name-be (make-od-name 'simple-get-utf-16be-char accessor)))
+ `(progn
+ (defun ,name-le (array pos bytes)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range pos)
+ (type (integer 1 4) bytes)
+ (ignore bytes))
+ ;; Optimization for SYSTEM-AREA-POINTER: use SAP-REF-16LE that
+ ;; reads two bytes at once on some architectures.
+ (let ((code ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-16le array pos)
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 1) (byte 8 8) (cref 0))))))
+ (if (<= #xd800 code #xdbff)
+ (let ((next ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-16le array (+ pos 2))
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 3) (byte 8 8) (cref 2))))))
+ (code-char (+ #x10000 (dpb (ldb (byte 10 0) code) (byte 10 10) (ldb (byte 10 0) next)))))
+ (code-char code))))
+ (defun ,name-be (array pos bytes)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range pos)
+ (type (integer 1 4) bytes)
+ (ignore bytes))
+ ;; Use SAP-REF-16BE even if it is not optimized
+ (let ((code ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-16be array pos)
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 0) (byte 8 8) (cref 1))))))
+ (if (<= #xd800 code #xdbff)
+ (let ((next ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-16be array (+ pos 2))
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 2) (byte 8 8) (cref 3))))))
+ (code-char (+ #x10000 (dpb (ldb (byte 10 0) code) (byte 10 10) (ldb (byte 10 0) next)))))
+ (code-char code)))))))
+
+(instantiate-octets-definition define-simple-get-utf16-character)
+
+(defmacro define-utf-16->string (accessor type)
+ (let ((name-le (make-od-name 'utf-16le->string accessor))
+ (name-be (make-od-name 'utf-16be->string accessor)))
+ `(progn
+ (defun ,name-le (array astart aend)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range astart aend))
+ (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
+ (loop with pos = astart
+ while (< pos aend)
+ do (multiple-value-bind (bytes invalid)
+ (,(make-od-name 'bytes-per-utf-16le-character accessor) array pos aend)
+ (declare (type (or null string) invalid))
+ (cond
+ ((null invalid)
+ (vector-push-extend
+ (,(make-od-name 'simple-get-utf-16le-char accessor)
+ array pos bytes)
+ string))
+ (t (dotimes (i (length invalid))
+ (vector-push-extend (char invalid i) string))))
+ (incf pos bytes)))
+ string))
+ (defun ,name-be (array astart aend)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range astart aend))
+ (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
+ (loop with pos = astart
+ while (< pos aend)
+ do (multiple-value-bind (bytes invalid)
+ (,(make-od-name 'bytes-per-utf-16be-character accessor) array pos aend)
+ (declare (type (or null string) invalid))
+ (cond
+ ((null invalid)
+ (vector-push-extend
+ (,(make-od-name 'simple-get-utf-16be-char accessor)
+ array pos bytes)
+ string))
+ (t (dotimes (i (length invalid))
+ (vector-push-extend (char invalid i) string))))
+ (incf pos bytes)))
+ string)))))
+
+(instantiate-octets-definition define-utf-16->string)
+
+(define-external-format/variable-width (:utf-16le :utf16le) t
+ (code-char #xfffd)
+ (let ((bits (char-code byte)))
+ (if (< bits #x10000) 2 4))
+ (cond
+ ((< bits #x10000)
+ (if (utf-noncharacter-code-p bits)
+ (external-format-encoding-error stream bits)
+ (setf (sap-ref-16le sap tail) bits)))
+ (t (if (= (logand bits #xfffe) #xfffe)
+ (external-format-encoding-error stream bits)
+ (let* ((new-bits (- bits #x10000))
+ (high (ldb (byte 10 10) new-bits))
+ (low (ldb (byte 10 0) new-bits)))
+ (setf (sap-ref-16le sap tail) (dpb high (byte 10 0) #xd800))
+ (setf (sap-ref-16le sap (+ tail 2)) (dpb low (byte 10 0) #xdc00))))))
+ (2 (if (<= #xd800 (sap-ref-16le sap head) #xdbff) 4 2))
+ (let ((bits (sap-ref-16le sap head)))
+ (cond
+ ((or (<= #xdc00 bits #xdfff)
+ (<= #xfdd0 bits #xfdef)
+ (= (logand bits #xfffe) #xfffe))
+ (return-from decode-break-reason 2))
+ ((<= #xd800 bits #xdbff)
+ (let ((next (sap-ref-16le sap (+ head 2))))
+ (unless (<= #xdc00 next #xdfff)
+ (return-from decode-break-reason 2))
+ (let ((code (dpb (ldb (byte 10 0) bits) (byte 10 10) (ldb (byte 10 0) next))))
+ (if (= (logand code #xfffe) #xfffe)
+ (return-from decode-break-reason 4)
+ (code-char (+ #x10000 code))))))
+ (t (code-char bits))))
+ utf-16le->string-aref
+ string->utf-16le)
+
+(define-external-format/variable-width (:utf-16be :utf16be) t
+ (code-char #xfffd)
+ (let ((bits (char-code byte)))
+ (if (< bits #x10000) 2 4))
+ (cond
+ ((< bits #x10000)
+ (if (utf-noncharacter-code-p bits)
+ (external-format-encoding-error stream bits)
+ (setf (sap-ref-16be sap tail) bits)))
+ (t (if (= (logand bits #xfffe) #xfffe)
+ (external-format-encoding-error stream bits)
+ (let* ((new-bits (- bits #x10000))
+ (high (ldb (byte 10 10) new-bits))
+ (low (ldb (byte 10 0) new-bits)))
+ (setf (sap-ref-16be sap tail) (dpb high (byte 10 0) #xd800))
+ (setf (sap-ref-16be sap (+ tail 2)) (dpb low (byte 10 0) #xdc00))))))
+ (2 (if (<= #xd800 (sap-ref-16be sap head) #xdbff) 4 2))
+ (let ((bits (sap-ref-16be sap head)))
+ (cond
+ ((or (<= #xdc00 bits #xdfff)
+ (<= #xfdd0 bits #xfdef)
+ (= (logand bits #xfffe) #xfffe))
+ (return-from decode-break-reason 2))
+ ((<= #xd800 bits #xdbff)
+ (let ((next (sap-ref-16be sap (+ head 2))))
+ (unless (<= #xdc00 next #xdfff)
+ (return-from decode-break-reason 2))
+ (let ((code (dpb (ldb (byte 10 0) bits) (byte 10 10) (ldb (byte 10 0) next))))
+ (if (= (logand code #xfffe) #xfffe)
+ (return-from decode-break-reason 4)
+ (code-char (+ #x10000 code))))))
+ (t (code-char bits))))
+ utf-16be->string-aref
+ string->utf-16be)
+\f
+(declaim (inline char->utf-32le))
+(defun char->utf-32le (char dest string pos)
+ (declare (optimize speed (safety 0))
+ (type (array (unsigned-byte 8) (*)) dest))
+ (let ((code (char-code char)))
+ (if (utf-noncharacter-code-p code)
+ (let ((replacement (encoding-error :utf-32le string pos)))
+ (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
+ (dotimes (i (length replacement))
+ (vector-push-extend (aref replacement i) dest)))
+ (flet ((add-byte (b)
+ (declare (type (unsigned-byte 8) b))
+ (vector-push-extend b dest)))
+ (declare (inline add-byte))
+ (add-byte (ldb (byte 8 0) code))
+ (add-byte (ldb (byte 8 8) code))
+ (add-byte (ldb (byte 8 16) code))
+ (add-byte (ldb (byte 8 24) code))))))
+
+(declaim (inline char->utf-32be))
+(defun char->utf-32be (char dest string pos)
+ (declare (optimize speed (safety 0))
+ (type (array (unsigned-byte 8) (*)) dest))
+ (let ((code (char-code char)))
+ (if (utf-noncharacter-code-p code)
+ (let ((replacement (encoding-error :utf-32be string pos)))
+ (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
+ (dotimes (i (length replacement))
+ (vector-push-extend (aref replacement i) dest)))
+ (flet ((add-byte (b)
+ (declare (type (unsigned-byte 8) b))
+ (vector-push-extend b dest)))
+ (declare (inline add-byte))
+ (add-byte (ldb (byte 8 24) code))
+ (add-byte (ldb (byte 8 16) code))
+ (add-byte (ldb (byte 8 8) code))
+ (add-byte (ldb (byte 8 0) code))))))
+
+(defun string->utf-32le (string sstart send additional-space)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range sstart send additional-space))
+ (let ((array (make-array (* 4 (+ additional-space (- send sstart)))
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0 :adjustable t)))
+ (loop for i from sstart below send
+ do (char->utf-32le (char string i) array string i))
+ (dotimes (i (* 4 additional-space))
+ (vector-push-extend 0 array))
+ (coerce array '(simple-array (unsigned-byte 8) (*)))))
+
+(defun string->utf-32be (string sstart send additional-space)
+ (declare (optimize speed (safety 0))
+ (type simple-string string)
+ (type array-range sstart send additional-space))
+ (let ((array (make-array (* 4 (+ additional-space (- send sstart)))
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0 :adjustable t)))
+ (loop for i from sstart below send
+ do (char->utf-32be (char string i) array string i))
+ (dotimes (i (* 4 additional-space))
+ (vector-push-extend 0 array))
+ (coerce array '(simple-array (unsigned-byte 8) (*)))))
+
+;; Conversion from UTF-32{LE,BE}
+(defmacro define-bytes-per-utf32-character (accessor type)
+ (declare (ignore type))
+ (let ((name-le (make-od-name 'bytes-per-utf-32le-character accessor))
+ (name-be (make-od-name 'bytes-per-utf-32be-character accessor)))
+ `(progn
+ (defun ,name-le (array pos end)
+ (declare (ignore array pos end))
+ (values 4 nil))
+ (defun ,name-be (array pos end)
+ (declare (ignore array pos end))
+ (values 4 nil)))))
+(instantiate-octets-definition define-bytes-per-utf32-character)
+
+(defmacro define-simple-get-utf32-character (accessor type)
+ (let ((name-le (make-od-name 'simple-get-utf-32le-char accessor))
+ (name-be (make-od-name 'simple-get-utf-32be-char accessor)))
+ `(progn
+ (defun ,name-le (array pos bytes)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range pos)
+ (type (integer 1 4) bytes))
+ ;; Optimization for SYSTEM-AREA-POINTER: use SAP-REF-32LE that
+ ;; reads four bytes at once on some architectures.
+ (let ((code ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-32le array pos)
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 3) (byte 8 24)
+ (dpb (cref 2) (byte 8 16)
+ (dpb (cref 1) (byte 8 8) (cref 0))))))))
+ (if (and (< code char-code-limit)
+ (not (utf-noncharacter-code-p code)))
+ (code-char code)
+ (decoding-error array pos (+ pos bytes) :utf-32le
+ 'octet-decoding-error pos))))
+ (defun ,name-be (array pos bytes)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range pos)
+ (type (integer 1 4) bytes))
+ ;; Use SAP-REF-32BE even if it is not optimized
+ (let ((code ,(if (and (eq accessor 'sap-ref-8)
+ (eq type 'system-area-pointer))
+ '(sap-ref-32be array pos)
+ `(flet ((cref (x)
+ (,accessor array (the array-range (+ pos x)))))
+ (declare (inline cref))
+ (dpb (cref 0) (byte 8 24)
+ (dpb (cref 1) (byte 8 16)
+ (dpb (cref 2) (byte 8 8) (cref 3))))))))
+ (if (and (< code char-code-limit)
+ (not (utf-noncharacter-code-p code)))
+ (code-char code)
+ (decoding-error array pos (+ pos bytes) :utf-32be
+ 'octet-decoding-error pos)))))))
+
+(instantiate-octets-definition define-simple-get-utf32-character)
+
+(defmacro define-utf-32->string (accessor type)
+ (let ((name-le (make-od-name 'utf-32le->string accessor))
+ (name-be (make-od-name 'utf-32be->string accessor)))
+ `(progn
+ (defun ,name-le (array astart aend)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range astart aend))
+ (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
+ (loop with pos = astart
+ while (< pos aend)
+ do (multiple-value-bind (bytes invalid)
+ (,(make-od-name 'bytes-per-utf-32le-character accessor) array pos aend)
+ (declare (type (or null string) invalid))
+ (aver (null invalid))
+ (let ((thing (,(make-od-name 'simple-get-utf-32le-char accessor) array pos bytes)))
+ (typecase thing
+ (character (vector-push-extend thing string))
+ (string (dotimes (i (length thing))
+ (vector-push-extend (char thing i) string)))))
+ (incf pos bytes)))
+ string))
+ (defun ,name-be (array astart aend)
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range astart aend))
+ (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
+ (loop with pos = astart
+ while (< pos aend)
+ do (multiple-value-bind (bytes invalid)
+ (,(make-od-name 'bytes-per-utf-32be-character accessor) array pos aend)
+ (declare (type (or null string) invalid))
+ (aver (null invalid))
+ (let ((thing (,(make-od-name 'simple-get-utf-32be-char accessor) array pos bytes)))
+ (typecase thing
+ (character (vector-push-extend thing string))
+ (string (dotimes (i (length thing))
+ (vector-push-extend (char thing i) string)))))
+ (incf pos bytes)))
+ string)))))
+
+(instantiate-octets-definition define-utf-32->string)
+
+(define-external-format/variable-width (:utf-32le :utf32le) t
+ (code-char #xfffd)
+ 4
+ (if (utf-noncharacter-code-p bits)
+ (external-format-encoding-error stream bits)
+ (setf (sap-ref-32le sap tail) bits))
+ 4
+ (let ((code (sap-ref-32le sap head)))
+ (if (and (< code char-code-limit)
+ (not (utf-noncharacter-code-p code)))
+ (code-char code)
+ (return-from decode-break-reason 4)))
+ utf-32le->string-aref
+ string->utf-32le)
+
+(define-external-format/variable-width (:utf-32be :utf32be) t
+ (code-char #xfffd)
+ 4
+ (if (utf-noncharacter-code-p bits)
+ (external-format-encoding-error stream bits)
+ (setf (sap-ref-32be sap tail) bits))
+ 4
+ (let ((code (sap-ref-32be sap head)))
+ (if (and (< code char-code-limit)
+ (not (utf-noncharacter-code-p code)))
+ (code-char code)
+ (return-from decode-break-reason 4)))
+ utf-32be->string-aref
+ string->utf-32be)