(,definer aref (simple-array (unsigned-byte 8) (*)))
(,definer sap-ref-8 system-area-pointer)))
-;;; maps into TO-SEQ from elements of FROM-SEQ via MAPPER. MAPPER
-;;; returns two values: the number of elments stored in TO-SEQ, and
-;;; the number used up from FROM-SEQ. MAPPER is responsible for
-;;; getting out if either sequence runs out of room.
-(declaim (inline varimap))
-(defun varimap (to-seq to-start to-end from-start from-end mapper)
- (declare (optimize speed (safety 0))
- (type array-range to-start to-end from-start from-end)
- (type function mapper))
- (loop with from-size of-type array-range = 0
- and to-size of-type array-range = 0
- for to-pos of-type array-range = to-start then (+ to-pos to-size)
- for from-pos of-type array-range = from-start then (+ from-pos from-size)
- while (and (< to-pos to-end)
- (< from-pos from-end))
- do (multiple-value-bind (ts fs) (funcall mapper to-pos from-pos)
- (setf to-size ts
- from-size fs))
- finally (return (values to-seq to-pos from-pos))))
-
;;; FIXME: find out why the comment about SYMBOLICATE below is true
;;; and fix it, or else replace with SYMBOLICATE.
;;;
(t (bug "can't happen"))))
(defun string->utf8 (string sstart send null-padding)
- (declare (optimize speed (safety 0))
+ (declare (optimize (speed 3) (safety 0))
(type simple-string string)
(type (integer 0 1) null-padding)
(type array-range sstart send))
- (let* ((utf8-length (loop for i of-type index from sstart below send
- sum (char-len-as-utf8 (char-code (char string i)))))
- (array (make-array (+ null-padding utf8-length)
- :initial-element 0
- :element-type '(unsigned-byte 8)))
- (index 0))
- (declare (type index index))
- (flet ((add-byte (b)
- (setf (aref array index) b)
- (incf index)))
- (loop for i of-type index from sstart below send
- do (let ((code (char-code (char string i))))
- (case (char-len-as-utf8 code)
- (1
- (add-byte code))
- (2
- (add-byte (logior #b11000000 (ldb (byte 5 6) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 0) code))))
- (3
- (add-byte (logior #b11100000 (ldb (byte 4 12) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 6) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 0) code))))
- (4
- (add-byte (logior #b11110000 (ldb (byte 3 18) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 12) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 6) code)))
- (add-byte (logior #b10000000 (ldb (byte 6 0) code))))))
- finally (return array)))))
+ (macrolet ((ascii-bash ()
+ '(let ((array (make-array (+ null-padding (- send sstart))
+ :element-type '(unsigned-byte 8))))
+ (loop for i from 0
+ and j from sstart below send
+ do (setf (aref array i) (char-code (char string j))))
+ array)))
+ (etypecase string
+ ((simple-array character (*))
+ (let ((utf8-length 0))
+ ;; Since it has to fit in a vector, it must be a fixnum!
+ (declare (type (and unsigned-byte fixnum) utf8-length))
+ (loop for i of-type index from sstart below send
+ do (incf utf8-length (char-len-as-utf8 (char-code (char string i)))))
+ (if (= utf8-length (- send sstart))
+ (ascii-bash)
+ (let ((array (make-array (+ null-padding utf8-length)
+ :element-type '(unsigned-byte 8)))
+ (index 0))
+ (declare (type index index))
+ (flet ((add-byte (b)
+ (setf (aref array index) b)
+ (incf index)))
+ (declare (inline add-byte))
+ (loop for i of-type index from sstart below send
+ do (let ((code (char-code (char string i))))
+ (case (char-len-as-utf8 code)
+ (1
+ (add-byte code))
+ (2
+ (add-byte (logior #b11000000 (ldb (byte 5 6) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 0) code))))
+ (3
+ (add-byte (logior #b11100000 (ldb (byte 4 12) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 6) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 0) code))))
+ (4
+ (add-byte (logior #b11110000 (ldb (byte 3 18) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 12) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 6) code)))
+ (add-byte (logior #b10000000 (ldb (byte 6 0) code))))))
+ finally (return array)))))))
+ #!+sb-unicode
+ ((simple-array base-char (*))
+ ;; On unicode builds BASE-STRINGs are limited to ASCII range, so we can take
+ ;; a fast path -- and get benefit of the element type information. On non-unicode
+ ;; build BASE-CHAR == CHARACTER.
+ (ascii-bash))
+ ((simple-array nil (*))
+ ;; Just get the error...
+ (aref string sstart)))))
\f
;;;; to-string conversions
(type ,type array)
(type array-range sstart send astart aend)
(function mapper))
- (varimap string sstart send
- astart aend
- (lambda (spos apos)
- (setf (char string spos) (code-char (funcall mapper (,accessor array apos))))
- (values 1 1)))))))
+ (loop for spos from sstart below send
+ for apos from astart below aend
+ do (setf (char string spos)
+ (code-char (funcall mapper (,accessor array apos))))
+ finally (return (values string spos apos)))))))
(instantiate-octets-definition define-latin->string*)
(defmacro define-latin1->string* (accessor type)
(defmacro define-latin->string (accessor type)
(let ((name (make-od-name 'latin->string accessor)))
`(progn
- (declaim (inline latin->string))
+ (declaim (inline ,name))
(defun ,name (array astart aend mapper)
(declare (optimize speed (safety 0))
(type ,type array)
;; two-byte sequence `"initial (length 3)"
;; "non-continuation"' -- `#xef #x32')
;; signal only part of that sequence as
- ;; erronous.
+ ;; erroneous.
(loop for i from 1 below (min len remaining-bytes)
always (valid-secondary-p i))
(enough-bytes-left-p len)))
(declare (type (vector (unsigned-byte 8)) vector))
(with-array-data ((vector vector)
(start start)
- (end (%check-vector-sequence-bounds vector start end)))
+ (end end)
+ :check-fill-pointer t)
(declare (type (simple-array (unsigned-byte 8) (*)) vector))
(funcall (symbol-function (first (external-formats-funs external-format)))
vector start end)))
(declare (type string string))
(with-array-data ((string string)
(start start)
- (end (%check-vector-sequence-bounds string start end)))
+ (end end)
+ :check-fill-pointer t)
(declare (type simple-string string))
(funcall (symbol-function (second (external-formats-funs external-format)))
string start end (if null-terminate 1 0))))