X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Foctets.lisp;h=32344cc886708ef6c0f289ed476d3c91aff51aa9;hb=8eee0d3a30bf39d9f201acff28c92059fe6c3e4e;hp=13320f9a10c28e3782bc4e356aef92b0fe37c80b;hpb=7effaab5d43dd5423938b00854848e01eb3a67c8;p=sbcl.git diff --git a/src/code/octets.lisp b/src/code/octets.lisp index 13320f9..32344cc 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -395,38 +395,61 @@ one-past-the-end" (t (bug "can't happen")))) (defun string->utf8 (string sstart send null-padding) - (declare (optimize speed (safety 0)) + (declare (optimize (speed 3) (safety 0)) (type simple-string string) (type (integer 0 1) null-padding) (type array-range sstart send)) - (let* ((utf8-length (loop for i of-type index from sstart below send - sum (char-len-as-utf8 (char-code (char string i))))) - (array (make-array (+ null-padding utf8-length) - :initial-element 0 - :element-type '(unsigned-byte 8))) - (index 0)) - (declare (type index index)) - (flet ((add-byte (b) - (setf (aref array index) b) - (incf index))) - (loop for i of-type index from sstart below send - do (let ((code (char-code (char string i)))) - (case (char-len-as-utf8 code) - (1 - (add-byte code)) - (2 - (add-byte (logior #b11000000 (ldb (byte 5 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) - (3 - (add-byte (logior #b11100000 (ldb (byte 4 12) code))) - (add-byte (logior #b10000000 (ldb (byte 6 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) - (4 - (add-byte (logior #b11110000 (ldb (byte 3 18) code))) - (add-byte (logior #b10000000 (ldb (byte 6 12) code))) - (add-byte (logior #b10000000 (ldb (byte 6 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))))) - finally (return array))))) + (macrolet ((ascii-bash () + '(let ((array (make-array (+ null-padding (- send sstart)) + :element-type '(unsigned-byte 8)))) + (loop for i from 0 + and j from sstart below send + do (setf (aref array i) (char-code (char string j)))) + array))) + (etypecase string + ((simple-array character (*)) + (let ((utf8-length 0)) + ;; Since it has to fit in a vector, it must be a fixnum! + (declare (type (and unsigned-byte fixnum) utf8-length)) + (loop for i of-type index from sstart below send + do (incf utf8-length (char-len-as-utf8 (char-code (char string i))))) + (if (= utf8-length (- send sstart)) + (ascii-bash) + (let ((array (make-array (+ null-padding utf8-length) + :element-type '(unsigned-byte 8))) + (index 0)) + (declare (type index index)) + (flet ((add-byte (b) + (setf (aref array index) b) + (incf index))) + (declare (inline add-byte)) + (loop for i of-type index from sstart below send + do (let ((code (char-code (char string i)))) + (case (char-len-as-utf8 code) + (1 + (add-byte code)) + (2 + (add-byte (logior #b11000000 (ldb (byte 5 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) + (3 + (add-byte (logior #b11100000 (ldb (byte 4 12) code))) + (add-byte (logior #b10000000 (ldb (byte 6 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) + (4 + (add-byte (logior #b11110000 (ldb (byte 3 18) code))) + (add-byte (logior #b10000000 (ldb (byte 6 12) code))) + (add-byte (logior #b10000000 (ldb (byte 6 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))))) + finally (return array))))))) + #!+sb-unicode + ((simple-array base-char (*)) + ;; On unicode builds BASE-STRINGs are limited to ASCII range, so we can take + ;; a fast path -- and get benefit of the element type information. On non-unicode + ;; build BASE-CHAR == CHARACTER. + (ascii-bash)) + ((simple-array nil (*)) + ;; Just get the error... + (aref string sstart))))) ;;;; to-string conversions @@ -581,7 +604,7 @@ one-past-the-end" ;; two-byte sequence `"initial (length 3)" ;; "non-continuation"' -- `#xef #x32') ;; signal only part of that sequence as - ;; erronous. + ;; erroneous. (loop for i from 1 below (min len remaining-bytes) always (valid-secondary-p i)) (enough-bytes-left-p len))) @@ -795,7 +818,8 @@ one-past-the-end" (declare (type (vector (unsigned-byte 8)) vector)) (with-array-data ((vector vector) (start start) - (end (%check-vector-sequence-bounds vector start end))) + (end end) + :check-fill-pointer t) (declare (type (simple-array (unsigned-byte 8) (*)) vector)) (funcall (symbol-function (first (external-formats-funs external-format))) vector start end))) @@ -805,7 +829,8 @@ one-past-the-end" (declare (type string string)) (with-array-data ((string string) (start start) - (end (%check-vector-sequence-bounds string start end))) + (end end) + :check-fill-pointer t) (declare (type simple-string string)) (funcall (symbol-function (second (external-formats-funs external-format))) string start end (if null-terminate 1 0))))