(defparameter *reserved-path-characters*
(reserved-char-vector
(append *excluded-characters*
- '(#\;
+ '(#\; #\%
;;;;The rfc says this should be here, but it doesn't make sense.
;; #\=
#\/ #\?))))
(defun decode-escaped-encoding (string escape
&optional (reserved-chars
*reserved-characters*))
- ;; Return a string with the real characters.
+ ;;Return a string with the real characters.
(when (null escape) (return-from decode-escaped-encoding string))
- (do* ((i 0 (1+ i))
- (max (length string))
- (new-string (copy-seq string))
- (new-i 0 (1+ new-i))
- ch ch2 chc chc2)
- ((= i max)
- (shrink-vector new-string new-i))
- (if* (char= #\% (setq ch (char string i)))
- then (when (> (+ i 3) max)
- (.parse-error
- "Unsyntactic escaped encoding in ~s." string))
- (setq ch (char string (incf i)))
- (setq ch2 (char string (incf i)))
- (when (not (and (setq chc (digit-char-p ch 16))
- (setq chc2 (digit-char-p ch2 16))))
- (.parse-error
- "Non-hexidecimal digits after %: %c%c." ch ch2))
- (let ((ci (+ (* 16 chc) chc2)))
- (if* (or (null reserved-chars)
- (> ci 127) ; bug11527
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (setf (char new-string new-i)
- (code-char ci))
- else (setf (char new-string new-i) #\%)
- (setf (char new-string (incf new-i)) ch)
- (setf (char new-string (incf new-i)) ch2)))
- else (setf (char new-string new-i) ch))))
+ (let ((curpos 0)
+ (maxpos (length string))
+ (strs nil))
+ (flet ((next-ansii-substring ()
+ (let ((pos (or (position #\% string :start curpos)
+ maxpos)))
+ (when (and (= curpos 0)
+ (= pos maxpos))
+ (return-from decode-escaped-encoding string))
+ (when (< curpos pos)
+ (push (subseq string
+ curpos
+ pos)
+ strs)
+ (setf curpos pos))))
+ (next-encoded-substring ()
+ (let ((pos (or (loop for i from curpos below maxpos by 3
+ unless (char= (aref string i)
+ #\%)
+ return i)
+ maxpos)))
+ (when (< curpos pos)
+ (let ((octets (handler-case (make-array (/ (- pos curpos)
+ 3)
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0)
+ (error ()
+ (.parse-error "Unsyntactic escaped encoding in ~s." string)))))
+ (loop for i from curpos below pos by 3
+ do (vector-push (handler-case
+ (parse-integer string
+ :start (1+ i)
+ :end (+ i 3)
+ :radix 16)
+ (error ()
+ (.parse-error "Non-hexidecimal digits after %: ~c~c."
+ (aref string (1+ i))
+ (aref string (+ 2 i)))))
+ octets))
+
+ (let* ((decoded-string (babel:octets-to-string octets
+ :encoding :utf-8))
+ (rpos (if reserved-chars
+ (position-if #'(lambda (ch)
+ (not (or (> (char-code ch) 127)
+ (= (sbit reserved-chars (char-code ch)) 0))))
+ decoded-string))))
+ (push (if rpos
+ (with-output-to-string (out)
+ (loop for ch across decoded-string
+ with i = curpos
+ do (let ((code (char-code ch)))
+ (cond
+ ((or (null reserved-chars)
+ (> code 127)
+ (= (sbit reserved-chars code) 0))
+ (write-char ch out)
+ (incf i
+ (* (cond
+ ((< code #x80) 1)
+ ((< code #x800) 2)
+ ((< code #x10000) 3)
+ ((< code #x200000) 4)
+ ((< code #x4000000) 5)
+ (t 6))
+ 3)))
+ (t (write-string (subseq string i (+ i 3)) out)
+ (incf i 3))))))
+ decoded-string)
+ strs))))
+ (setf curpos pos))))
+ (loop
+ while (< curpos maxpos)
+ do (next-ansii-substring)
+ while (< curpos maxpos)
+ do (next-encoded-substring)))
+ (if (cdr strs)
+ (apply #'concatenate
+ 'string
+ (nreverse strs))
+ (or (car strs)
+ ""))))
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing
then (format stream "~a" (uri-string urn))
else (uri-string urn)))
-(defparameter *escaped-encoding*
- (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
-
(defun encode-escaped-encoding (string reserved-chars escape)
(when (null escape) (return-from encode-escaped-encoding string))
- ;; Make a string as big as it possibly needs to be (3 times the original
- ;; size), and truncate it at the end.
- (do* ((max (length string))
- (new-max (* 3 max)) ;; worst case new size
- (new-string (make-string new-max))
- (i 0 (1+ i))
- (new-i -1)
- c ci)
- ((= i max)
- (shrink-vector new-string (incf new-i)))
- (setq ci (char-int (setq c (char string i))))
- (if* (or (null reserved-chars)
- (> ci 127)
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (incf new-i)
- (setf (char new-string new-i) c)
- else ;; need to escape it
- (multiple-value-bind (q r) (truncate ci 16)
- (setf (char new-string (incf new-i)) #\%)
- (setf (char new-string (incf new-i)) (elt *escaped-encoding* q))
- (setf (char new-string (incf new-i))
- (elt *escaped-encoding* r))))))
+ (with-output-to-string (out)
+ (loop for ch across string
+ do (let ((code (char-code ch)))
+ (if (and (< code 128)
+ (or (null reserved-chars)
+ (= 0 (sbit reserved-chars code))))
+ (write-char ch out)
+ (loop for octet across (babel:string-to-octets (string ch) :encoding :utf-8)
+ do (format out "%~(~2,'0x~)" octet)))))))
(defmethod print-object ((uri uri) stream)
(if* *print-escape*