X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Foctets.lisp;h=4f725a7300aef85cde047353e3e663ce1d43aab5;hb=9af8ab0a80bbd4d579ed4a12d2a2819a7490901a;hp=13320f9a10c28e3782bc4e356aef92b0fe37c80b;hpb=7effaab5d43dd5423938b00854848e01eb3a67c8;p=sbcl.git diff --git a/src/code/octets.lisp b/src/code/octets.lisp index 13320f9..4f725a7 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -135,26 +135,6 @@ one-past-the-end" (,definer aref (simple-array (unsigned-byte 8) (*))) (,definer sap-ref-8 system-area-pointer))) -;;; maps into TO-SEQ from elements of FROM-SEQ via MAPPER. MAPPER -;;; returns two values: the number of elments stored in TO-SEQ, and -;;; the number used up from FROM-SEQ. MAPPER is responsible for -;;; getting out if either sequence runs out of room. -(declaim (inline varimap)) -(defun varimap (to-seq to-start to-end from-start from-end mapper) - (declare (optimize speed (safety 0)) - (type array-range to-start to-end from-start from-end) - (type function mapper)) - (loop with from-size of-type array-range = 0 - and to-size of-type array-range = 0 - for to-pos of-type array-range = to-start then (+ to-pos to-size) - for from-pos of-type array-range = from-start then (+ from-pos from-size) - while (and (< to-pos to-end) - (< from-pos from-end)) - do (multiple-value-bind (ts fs) (funcall mapper to-pos from-pos) - (setf to-size ts - from-size fs)) - finally (return (values to-seq to-pos from-pos)))) - ;;; FIXME: find out why the comment about SYMBOLICATE below is true ;;; and fix it, or else replace with SYMBOLICATE. ;;; @@ -395,38 +375,61 @@ one-past-the-end" (t (bug "can't happen")))) (defun string->utf8 (string sstart send null-padding) - (declare (optimize speed (safety 0)) + (declare (optimize (speed 3) (safety 0)) (type simple-string string) (type (integer 0 1) null-padding) (type array-range sstart send)) - (let* ((utf8-length (loop for i of-type index from sstart below send - sum (char-len-as-utf8 (char-code (char string i))))) - (array (make-array (+ null-padding utf8-length) - :initial-element 0 - :element-type '(unsigned-byte 8))) - (index 0)) - (declare (type index index)) - (flet ((add-byte (b) - (setf (aref array index) b) - (incf index))) - (loop for i of-type index from sstart below send - do (let ((code (char-code (char string i)))) - (case (char-len-as-utf8 code) - (1 - (add-byte code)) - (2 - (add-byte (logior #b11000000 (ldb (byte 5 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) - (3 - (add-byte (logior #b11100000 (ldb (byte 4 12) code))) - (add-byte (logior #b10000000 (ldb (byte 6 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) - (4 - (add-byte (logior #b11110000 (ldb (byte 3 18) code))) - (add-byte (logior #b10000000 (ldb (byte 6 12) code))) - (add-byte (logior #b10000000 (ldb (byte 6 6) code))) - (add-byte (logior #b10000000 (ldb (byte 6 0) code)))))) - finally (return array))))) + (macrolet ((ascii-bash () + '(let ((array (make-array (+ null-padding (- send sstart)) + :element-type '(unsigned-byte 8)))) + (loop for i from 0 + and j from sstart below send + do (setf (aref array i) (char-code (char string j)))) + array))) + (etypecase string + ((simple-array character (*)) + (let ((utf8-length 0)) + ;; Since it has to fit in a vector, it must be a fixnum! + (declare (type (and unsigned-byte fixnum) utf8-length)) + (loop for i of-type index from sstart below send + do (incf utf8-length (char-len-as-utf8 (char-code (char string i))))) + (if (= utf8-length (- send sstart)) + (ascii-bash) + (let ((array (make-array (+ null-padding utf8-length) + :element-type '(unsigned-byte 8))) + (index 0)) + (declare (type index index)) + (flet ((add-byte (b) + (setf (aref array index) b) + (incf index))) + (declare (inline add-byte)) + (loop for i of-type index from sstart below send + do (let ((code (char-code (char string i)))) + (case (char-len-as-utf8 code) + (1 + (add-byte code)) + (2 + (add-byte (logior #b11000000 (ldb (byte 5 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) + (3 + (add-byte (logior #b11100000 (ldb (byte 4 12) code))) + (add-byte (logior #b10000000 (ldb (byte 6 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))) + (4 + (add-byte (logior #b11110000 (ldb (byte 3 18) code))) + (add-byte (logior #b10000000 (ldb (byte 6 12) code))) + (add-byte (logior #b10000000 (ldb (byte 6 6) code))) + (add-byte (logior #b10000000 (ldb (byte 6 0) code)))))) + finally (return array))))))) + #!+sb-unicode + ((simple-array base-char (*)) + ;; On unicode builds BASE-STRINGs are limited to ASCII range, so we can take + ;; a fast path -- and get benefit of the element type information. On non-unicode + ;; build BASE-CHAR == CHARACTER. + (ascii-bash)) + ((simple-array nil (*)) + ;; Just get the error... + (aref string sstart))))) ;;;; to-string conversions @@ -466,11 +469,11 @@ one-past-the-end" (type ,type array) (type array-range sstart send astart aend) (function mapper)) - (varimap string sstart send - astart aend - (lambda (spos apos) - (setf (char string spos) (code-char (funcall mapper (,accessor array apos)))) - (values 1 1))))))) + (loop for spos from sstart below send + for apos from astart below aend + do (setf (char string spos) + (code-char (funcall mapper (,accessor array apos)))) + finally (return (values string spos apos))))))) (instantiate-octets-definition define-latin->string*) (defmacro define-latin1->string* (accessor type) @@ -494,7 +497,7 @@ one-past-the-end" (defmacro define-latin->string (accessor type) (let ((name (make-od-name 'latin->string accessor))) `(progn - (declaim (inline latin->string)) + (declaim (inline ,name)) (defun ,name (array astart aend mapper) (declare (optimize speed (safety 0)) (type ,type array) @@ -581,7 +584,7 @@ one-past-the-end" ;; two-byte sequence `"initial (length 3)" ;; "non-continuation"' -- `#xef #x32') ;; signal only part of that sequence as - ;; erronous. + ;; erroneous. (loop for i from 1 below (min len remaining-bytes) always (valid-secondary-p i)) (enough-bytes-left-p len))) @@ -795,7 +798,8 @@ one-past-the-end" (declare (type (vector (unsigned-byte 8)) vector)) (with-array-data ((vector vector) (start start) - (end (%check-vector-sequence-bounds vector start end))) + (end end) + :check-fill-pointer t) (declare (type (simple-array (unsigned-byte 8) (*)) vector)) (funcall (symbol-function (first (external-formats-funs external-format))) vector start end))) @@ -805,7 +809,8 @@ one-past-the-end" (declare (type string string)) (with-array-data ((string string) (start start) - (end (%check-vector-sequence-bounds string start end))) + (end end) + :check-fill-pointer t) (declare (type simple-string string)) (funcall (symbol-function (second (external-formats-funs external-format))) string start end (if null-terminate 1 0))))