until (zerop (sap-ref-8 sap offset))
finally (return offset))))
(let ((result (make-string length :element-type 'base-char)))
- (sb!kernel:copy-from-system-area sap 0
- result (* sb!vm:vector-data-offset
- sb!vm:n-word-bits)
- (* length sb!vm:n-byte-bits))
+ (sb!kernel:copy-ub8-from-system-area sap 0 result 0 length)
result))))
(defun %naturalize-utf8-string (sap)
(declare (type system-area-pointer sap))
(locally
(declare (optimize (speed 3) (safety 0)))
- (let ((length (do* ((offset 0)
- (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset))
- (index 0 (1+ index)))
- ((zerop byte) index)
- (declare (type fixnum offset index))
- (cond
- ;; FIXME: Here, and below, we don't defend
- ;; against malformed utf-8 with any degree of
- ;; rigour.
- ((< byte #x80) (incf offset))
- ((< byte #xe0) (incf offset 2))
- ((< byte #xf0) (incf offset 3))
- (t (incf offset 4))))))
- (let ((result (make-string length :element-type 'character)))
- (do* ((offset 0)
- (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset))
- (index 0 (1+ index)))
- ((>= index length) result)
- (declare (type fixnum offset index))
- (setf (char result index)
- (cond
- ((< byte #x80)
- (prog1 (code-char byte) (incf offset)))
- ((< byte #xe0)
- (prog1 (code-char (dpb byte (byte 5 6)
- (sap-ref-8 sap (1+ offset))))
- (incf offset 2)))
- ((< byte #xf0)
- (prog1 (code-char
- (dpb byte (byte 4 12)
- (dpb (sap-ref-8 sap (1+ offset)) (byte 6 6)
- (sap-ref-8 sap (+ 2 offset)))))
- (incf offset 3)))
- (t
- (prog1
- (code-char
- (dpb byte (byte 3 18)
- (dpb (sap-ref-8 sap (1+ offset)) (byte 6 12)
- (dpb (sap-ref-8 sap (+ 2 offset)) (byte 6 6)
- (sap-ref-8 sap (+ 3 offset))))))
- (incf offset 4))))))))))
+ (let ((byte-length (do* ((offset 0 (1+ offset))
+ (byte #1=(sap-ref-8 sap offset) #1#))
+ ((zerop byte) offset))))
+ (handler-bind ((sb!impl::octet-decoding-error #'sb!impl::use-unicode-replacement-char))
+ (sb!impl::utf8->string-sap-ref-8 sap 0 byte-length)))))
(defun %deport-utf8-string (string)
(declare (type simple-string string))
- (locally
- (declare (optimize (speed 3) (safety 0)))
- (let ((length (1+ (do* ((offset 0)
- (length (length string))
- (index 0 (1+ index)))
- ((= index length) offset)
- (declare (type fixnum offset))
- (let ((bits (char-code (char string index))))
- (cond
- ((< bits #x80) (incf offset 1))
- ((< bits #x800) (incf offset 2))
- ((< bits #x10000) (incf offset 3))
- (t (incf offset 4))))))))
- (let ((vector (make-array length :element-type '(unsigned-byte 8)
- :initial-element 0)))
- (do* ((offset 0)
- (length (length string))
- (index 0 (1+ index)))
- ((= index length) vector)
- (declare (type fixnum offset))
- (let ((bits (char-code (char string index))))
- (cond
- ((< bits #x80)
- (setf (aref vector offset) bits)
- (incf offset))
- ((< bits #x800)
- (setf (aref vector offset) (logior #xc0 (ldb (byte 5 6) bits)))
- (setf (aref vector (1+ offset))
- (logior #x80 (ldb (byte 6 0) bits)))
- (incf offset 2))
- ((< bits #x10000)
- (setf (aref vector offset) (logior #xe0 (ldb (byte 4 12) bits)))
- (setf (aref vector (1+ offset))
- (logior #x80 (ldb (byte 6 6) bits)))
- (setf (aref vector (+ offset 2))
- (logior #x80 (ldb (byte 6 0) bits)))
- (incf offset 3))
- (t
- (setf (aref vector offset) (logior #xf0 (ldb (byte 3 18) bits)))
- (setf (aref vector (1+ offset))
- (logior #x80 (ldb (byte 6 12) bits)))
- (setf (aref vector (+ offset 2))
- (logior #x80 (ldb (byte 6 6) bits)))
- (setf (aref vector (+ offset 3))
- (logior #x80 (ldb (byte 6 0) bits)))
- (incf offset 4)))))))))
+ (sb!impl::string->utf8 string 0 (length string) 1))