;; $Id$
(defpackage #:puri
- (:use #:cl)
+ (:use #:cl #:idna)
#-allegro (:nicknames #:net.uri)
(:export
#:uri ; the type and a function
#:merge-uris
#:enough-uri
#:uri-parsed-path
+ #:uri-parsed-host
#:render-uri
#:make-uri-space ; interning...
:initarg :parsed-path
:initform nil
:accessor .uri-parsed-path)
+ (parsed-host
+ :initarg :parsed-host
+ :initform nil
+ :accessor .uri-parsed-host)
(hashcode
;; cached sxhash, so we don't have to compute it more than once.
:initarg :hashcode :initform nil :accessor uri-hashcode)))
+(defmethod initialize-instance :after ((uri puri:uri) &key &allow-other-keys)
+ (let ((parsed-path (puri:uri-parsed-path uri))
+ (parsed-host (puri:uri-parsed-host uri)))
+ (when parsed-path
+ (setf (puri:uri-parsed-path uri)
+ parsed-path))
+ (when parsed-host
+ (setf (puri:uri-parsed-host uri)
+ parsed-host))))
+
(defclass urn (uri)
((nid :initarg :nid :initform nil :accessor urn-nid)
(nss :initarg :nss :initform nil :accessor urn-nss)))
(prog1 (call-next-method)
(setf (uri-string self) nil)
,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
+ ,@(when (eq name 'uri-host) `((setf (.uri-parsed-host self) nil)))
(setf (uri-hashcode self) nil))))
)
(clear-caching-on-slot-change uri-scheme)
-(clear-caching-on-slot-change uri-host)
(clear-caching-on-slot-change uri-port)
(clear-caching-on-slot-change uri-path)
(clear-caching-on-slot-change uri-query)
`(make-instance ',(class-name (class-of self))
:scheme ,(uri-scheme self)
:host ,(uri-host self)
+ :parsed-host ',(.uri-parsed-host self)
:port ,(uri-port self)
:path ',(uri-path self)
:query ,(uri-query self)
&key place
(scheme (when uri (uri-scheme uri)))
(host (when uri (uri-host uri)))
+ (parsed-host
+ (when uri (copy-seq (.uri-parsed-host uri))))
(port (when uri (uri-port uri)))
(path (when uri (uri-path uri)))
(parsed-path
(if* place
then (setf (uri-scheme place) scheme)
(setf (uri-host place) host)
+ (setf (.uri-parsed-host place) parsed-host)
(setf (uri-port place) port)
(setf (uri-path place) path)
(setf (.uri-parsed-path place) parsed-path)
then ;; allow the compiler to optimize the call to make-instance:
(make-instance 'uri
:scheme scheme :host host :port port :path path
- :parsed-path parsed-path
+ :parsed-path parsed-path :parsed-host parsed-host
:query query :fragment fragment :plist plist
:escaped escaped :string nil :hashcode nil)
else (make-instance class
:scheme scheme :host host :port port :path path
- :parsed-path parsed-path
+ :parsed-path parsed-path :parsed-host parsed-host
:query query :fragment fragment :plist plist
:escaped escaped :string nil :hashcode nil)))
(setf (.uri-parsed-path uri) path-list)
path-list)
+(defmethod uri-parsed-host ((uri uri))
+ (when (uri-host uri)
+ (when (null (.uri-parsed-host uri))
+ (setf (.uri-parsed-host uri)
+ (to-unicode (uri-host uri))))
+ (.uri-parsed-host uri)))
+
+(defmethod (setf uri-parsed-host) (host (uri uri))
+ (setf (uri-host uri) (to-ascii host))
+ (setf (.uri-parsed-host uri) (to-unicode host)))
+
(defun uri-authority (uri)
(when (uri-host uri)
(let ((*print-pretty* nil))
;; Parsing
(defparameter *excluded-characters*
- '(;; `delims' (except #\%, because it's handled specially):
+ (append
+ ;; exclude control characters
+ (loop for i from 0 to #x1f
+ collect (code-char i))
+ '(;; `delims' (except #\%, because it's handled specially):
#\< #\> #\" #\space #\#
+ #\Rubout ;; (code-char #x7f)
;; `unwise':
#\{ #\} #\| #\\ #\^ #\[ #\] #\`))
+ "Excluded charcters from RFC2369 (http://www.ietf.org/rfc/rfc2396.txt 2.4.3)")
(defun reserved-char-vector (chars &key except)
- (do* ((a (make-array 127 :element-type 'bit :initial-element 0))
+ (do* ((a (make-array 128 :element-type 'bit :initial-element 0))
(chars chars (cdr chars))
(c (car chars) (car chars)))
((null chars) a)
(defparameter *reserved-path-characters*
(reserved-char-vector
(append *excluded-characters*
- '(#\;
+ '(#\; #\%
;;;;The rfc says this should be here, but it doesn't make sense.
;; #\=
#\/ #\?))))
(when (and scheme (eq :urn scheme))
(return-from parse-uri
- (make-instance 'urn :scheme scheme :nid host :nss path)))
+ (make-instance 'urn :scheme scheme :nid host :nss (to-ascii path))))
- (when host (setq host (decode-escaped-encoding host escape)))
+ (when host (setq host (to-ascii (decode-escaped-encoding host escape))))
(when port
(setq port (read-from-string port))
(when (not (numberp port)) (error "port is not a number: ~s." port))
(defun decode-escaped-encoding (string escape
&optional (reserved-chars
*reserved-characters*))
- ;; Return a string with the real characters.
+ ;;Return a string with the real characters.
(when (null escape) (return-from decode-escaped-encoding string))
- (do* ((i 0 (1+ i))
- (max (length string))
- (new-string (copy-seq string))
- (new-i 0 (1+ new-i))
- ch ch2 chc chc2)
- ((= i max)
- (shrink-vector new-string new-i))
- (if* (char= #\% (setq ch (char string i)))
- then (when (> (+ i 3) max)
- (.parse-error
- "Unsyntactic escaped encoding in ~s." string))
- (setq ch (char string (incf i)))
- (setq ch2 (char string (incf i)))
- (when (not (and (setq chc (digit-char-p ch 16))
- (setq chc2 (digit-char-p ch2 16))))
- (.parse-error
- "Non-hexidecimal digits after %: %c%c." ch ch2))
- (let ((ci (+ (* 16 chc) chc2)))
- (if* (or (null reserved-chars)
- (> ci 127) ; bug11527
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (setf (char new-string new-i)
- (code-char ci))
- else (setf (char new-string new-i) #\%)
- (setf (char new-string (incf new-i)) ch)
- (setf (char new-string (incf new-i)) ch2)))
- else (setf (char new-string new-i) ch))))
+ (let ((curpos 0)
+ (maxpos (length string))
+ (strs nil))
+ (flet ((next-ansii-substring ()
+ (let ((pos (or (position #\% string :start curpos)
+ maxpos)))
+ (when (and (= curpos 0)
+ (= pos maxpos))
+ (return-from decode-escaped-encoding string))
+ (when (< curpos pos)
+ (push (subseq string
+ curpos
+ pos)
+ strs)
+ (setf curpos pos))))
+ (next-encoded-substring ()
+ (let ((pos (or (loop for i from curpos below maxpos by 3
+ unless (char= (aref string i)
+ #\%)
+ return i)
+ maxpos)))
+ (when (< curpos pos)
+ (let ((octets (handler-case (make-array (/ (- pos curpos)
+ 3)
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0)
+ (error ()
+ (.parse-error "Unsyntactic escaped encoding in ~s." string)))))
+ (loop for i from curpos below pos by 3
+ do (vector-push (handler-case
+ (parse-integer string
+ :start (1+ i)
+ :end (+ i 3)
+ :radix 16)
+ (error ()
+ (.parse-error "Non-hexidecimal digits after %: ~c~c."
+ (aref string (1+ i))
+ (aref string (+ 2 i)))))
+ octets))
+
+ (let* ((decoded-string (babel:octets-to-string octets
+ :encoding :utf-8))
+ (rpos (if reserved-chars
+ (position-if #'(lambda (ch)
+ (not (or (> (char-code ch) 127)
+ (= (sbit reserved-chars (char-code ch)) 0))))
+ decoded-string))))
+ (push (if rpos
+ (with-output-to-string (out)
+ (loop for ch across decoded-string
+ with i = curpos
+ do (let ((code (char-code ch)))
+ (cond
+ ((or (null reserved-chars)
+ (> code 127)
+ (= (sbit reserved-chars code) 0))
+ (write-char ch out)
+ (incf i
+ (* (cond
+ ((< code #x80) 1)
+ ((< code #x800) 2)
+ ((< code #x10000) 3)
+ ((< code #x200000) 4)
+ ((< code #x4000000) 5)
+ (t 6))
+ 3)))
+ (t (write-string (subseq string i (+ i 3)) out)
+ (incf i 3))))))
+ decoded-string)
+ strs))))
+ (setf curpos pos))))
+ (loop
+ while (< curpos maxpos)
+ do (next-ansii-substring)
+ while (< curpos maxpos)
+ do (next-encoded-substring)))
+ (if (cdr strs)
+ (apply #'concatenate
+ 'string
+ (nreverse strs))
+ (or (car strs)
+ ""))))
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing
then (format stream "~a" (uri-string urn))
else (uri-string urn)))
-(defparameter *escaped-encoding*
- (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
-
(defun encode-escaped-encoding (string reserved-chars escape)
(when (null escape) (return-from encode-escaped-encoding string))
- ;; Make a string as big as it possibly needs to be (3 times the original
- ;; size), and truncate it at the end.
- (do* ((max (length string))
- (new-max (* 3 max)) ;; worst case new size
- (new-string (make-string new-max))
- (i 0 (1+ i))
- (new-i -1)
- c ci)
- ((= i max)
- (shrink-vector new-string (incf new-i)))
- (setq ci (char-int (setq c (char string i))))
- (if* (or (null reserved-chars)
- (> ci 127)
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (incf new-i)
- (setf (char new-string new-i) c)
- else ;; need to escape it
- (multiple-value-bind (q r) (truncate ci 16)
- (setf (char new-string (incf new-i)) #\%)
- (setf (char new-string (incf new-i)) (elt *escaped-encoding* q))
- (setf (char new-string (incf new-i))
- (elt *escaped-encoding* r))))))
+ (with-output-to-string (out)
+ (loop for ch across string
+ do (let ((code (char-code ch)))
+ (if (and (< code 128)
+ (or (null reserved-chars)
+ (= 0 (sbit reserved-chars code))))
+ (write-char ch out)
+ (loop for octet across (babel:string-to-octets (string ch) :encoding :utf-8)
+ do (format out "%~(~2,'0x~)" octet)))))))
(defmethod print-object ((uri uri) stream)
(if* *print-escape*