- (declare (optimize speed (safety 0))
- (type ,type array)
- (type array-range pos end))
- ;; returns the number of bytes consumed and nil if it's a
- ;; valid character or the number of bytes consumed and a
- ;; replacement string if it's not.
- (let ((initial-byte (,accessor array pos))
- (reject-reason nil)
- (reject-position pos)
- (remaining-bytes (- end pos)))
- (declare (type array-range reject-position remaining-bytes))
- (labels ((valid-eucjp-starter-byte-p (b)
- (declare (type (unsigned-byte 8) b))
- (let ((ok (cond ((< b #x80) 1)
- ((or (= b #x8E) (<= #xA1 b #xFE)) 2)
- ((= b #x8F) 3))))
- (unless ok
- (setf reject-reason 'invalid-eucjp-starter-byte))
- ok))
- (enough-bytes-left-p (x)
- (let ((ok (> end (+ pos (1- x)))))
- (unless ok
- (setf reject-reason 'end-of-input-in-character))
- ok))
- (valid-secondary-p (x)
- (let* ((idx (the array-range (+ pos x)))
- (b (,accessor array idx))
- (ok (<= #xA1 b #xFE)))
- (unless ok
- (setf reject-reason 'invalid-eucjp-continuation-byte)
- (setf reject-position idx))
- ok))
- (preliminary-ok-for-length (maybe-len len)
- (and (eql maybe-len len)
- ;; Has to be done in this order so that
- ;; certain broken sequences (e.g., the
- ;; two-byte sequence `"initial (length 3)"
- ;; "non-continuation"' -- `#xef #x32')
- ;; signal only part of that sequence as
- ;; erronous.
- (loop for i from 1 below (min len remaining-bytes)
- always (valid-secondary-p i))
- (enough-bytes-left-p len))))
- (declare (inline valid-eucjp-starter-byte-p
- enough-bytes-left-p
- valid-secondary-p
- preliminary-ok-for-length))
- (let ((maybe-len (valid-eucjp-starter-byte-p initial-byte)))
- (cond ((eql maybe-len 1)
- (values 1 nil))
- ((preliminary-ok-for-length maybe-len 2)
- (values 2 nil))
- ((preliminary-ok-for-length maybe-len 3)
- (values 3 nil))
- (t
- (let* ((bad-end (ecase reject-reason
- (invalid-eucjp-starter-byte
- (1+ pos))
- (end-of-input-in-character
- end)
- (invalid-eucjp-continuation-byte
- reject-position)))
- (bad-len (- bad-end pos)))
- (declare (type array-range bad-end bad-len))
- (let ((replacement (decoding-error array pos bad-end :euc-jp reject-reason reject-position)))
- (values bad-len replacement))))))))))))
+ (declare (optimize speed (safety 0))
+ (type ,type array)
+ (type array-range pos end))
+ ;; returns the number of bytes consumed and nil if it's a
+ ;; valid character or the number of bytes consumed and a
+ ;; replacement string if it's not.
+ (let ((initial-byte (,accessor array pos))
+ (reject-reason nil)
+ (reject-position pos)
+ (remaining-bytes (- end pos)))
+ (declare (type array-range reject-position remaining-bytes))
+ (labels ((valid-eucjp-starter-byte-p (b)
+ (declare (type (unsigned-byte 8) b))
+ (let ((ok (cond ((< b #x80) 1)
+ ((or (= b #x8E) (<= #xA1 b #xFE)) 2)
+ ((= b #x8F) 3))))
+ (unless ok
+ (setf reject-reason 'invalid-eucjp-starter-byte))
+ ok))
+ (enough-bytes-left-p (x)
+ (let ((ok (> end (+ pos (1- x)))))
+ (unless ok
+ (setf reject-reason 'end-of-input-in-character))
+ ok))
+ (valid-secondary-p (x)
+ (let* ((idx (the array-range (+ pos x)))
+ (b (,accessor array idx))
+ (ok (<= #xA1 b #xFE)))
+ (unless ok
+ (setf reject-reason 'invalid-eucjp-continuation-byte)
+ (setf reject-position idx))
+ ok))
+ (preliminary-ok-for-length (maybe-len len)
+ (and (eql maybe-len len)
+ ;; Has to be done in this order so that
+ ;; certain broken sequences (e.g., the
+ ;; two-byte sequence `"initial (length 3)"
+ ;; "non-continuation"' -- `#xef #x32')
+ ;; signal only part of that sequence as
+ ;; erronous.
+ (loop for i from 1 below (min len remaining-bytes)
+ always (valid-secondary-p i))
+ (enough-bytes-left-p len))))
+ (declare (inline valid-eucjp-starter-byte-p
+ enough-bytes-left-p
+ valid-secondary-p
+ preliminary-ok-for-length))
+ (let ((maybe-len (valid-eucjp-starter-byte-p initial-byte)))
+ (cond ((eql maybe-len 1)
+ (values 1 nil))
+ ((preliminary-ok-for-length maybe-len 2)
+ (values 2 nil))
+ ((preliminary-ok-for-length maybe-len 3)
+ (values 3 nil))
+ (t
+ (let* ((bad-end (ecase reject-reason
+ (invalid-eucjp-starter-byte
+ (1+ pos))
+ (end-of-input-in-character
+ end)
+ (invalid-eucjp-continuation-byte
+ reject-position)))
+ (bad-len (- bad-end pos)))
+ (declare (type array-range bad-end bad-len))
+ (let ((replacement (decoding-error array pos bad-end :euc-jp reject-reason reject-position)))
+ (values bad-len replacement))))))))))))