1 ;; -*- mode: common-lisp; package: puri -*-
2 ;; Support for URIs in Allegro.
3 ;; For general URI information see RFC2396.
5 ;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
6 ;; copyright (c) 2003 Kevin Rosenberg (porting changes)
8 ;; The software, data and information contained herein are proprietary
9 ;; to, and comprise valuable trade secrets of, Franz, Inc. They are
10 ;; given in confidence by Franz, Inc. pursuant to a written license
11 ;; agreement, and may be stored and used only in accordance with the terms
14 ;; Restricted Rights Legend
15 ;; ------------------------
16 ;; Use, duplication, and disclosure of the software, data and information
17 ;; contained herein by any agency, department or entity of the U.S.
18 ;; Government are subject to restrictions of Restricted Rights for
19 ;; Commercial Software developed at private expense as specified in
20 ;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
22 ;; Original version from ACL 6.1:
23 ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
25 ;; $Id: src.lisp,v 1.3 2003/07/18 21:00:54 kevin Exp $
30 #:uri ; the type and a function
34 #:uri-scheme ; and slots
40 #:uri-authority ; pseudo-slot accessor
43 #:urn-nid ; pseudo-slot accessor
44 #:urn-nss ; pseudo-slot accessor
53 #:make-uri-space ; interning...
62 (eval-when (compile) (declaim (optimize (speed 3))))
65 (eval-when (:compile-toplevel :load-toplevel :execute)
66 (defvar if*-keyword-list '("then" "thenret" "else" "elseif")))
69 (define-condition parse-error (error) ())
71 (defun .parse-error (fmt &rest args)
72 #+allegro (apply #'excl::.parse-error fmt args)
74 (make-condition 'parse-error :format-control fmt
75 :format-arguments args)))
77 (defun internal-reader-error (stream fmt &rest args)
79 (apply #'excl::internal-reader-error stream fmt args)
81 (apply #'format stream
82 "#u takes a string or list argument: ~s" args))
84 #-allegro (defvar *current-case-mode* :case-insensitive-upper)
86 (defun position-char (char string start max)
87 (declare (optimize (speed 3) (safety 0) (space 0))
88 (fixnum start max) (simple-string string))
89 (do* ((i start (1+ i)))
92 (when (char= char (schar string i)) (return i))))
95 (defun delimited-string-to-list (string &optional (separator #\space))
96 (excl:delimited-string-to-list string))
98 (defun delimited-string-to-list (string &optional (separator #\space)
100 (declare (optimize (speed 3) (safety 0) (space 0)
101 (compilation-speed 0))
103 (type character separator))
104 (do* ((len (length string))
107 (end (position-char separator string pos len)
108 (position-char separator string pos len)))
111 (push (subseq string pos) output)
112 (when (or (not skip-terminal) (zerop len))
115 (declare (type fixnum pos len)
116 (type (or null fixnum) end))
117 (push (subseq string pos end) output)
118 (setq pos (1+ end))))
120 (defmacro if* (&rest args)
121 (do ((xx (reverse args) (cdr xx))
128 (cond ((eq state :compl)
130 (t (error "if*: illegal form ~s" args))))
131 (cond ((and (symbolp (car xx))
132 (member (symbol-name (car xx))
134 :test #'string-equal))
135 (setq lookat (symbol-name (car xx)))))
137 (cond ((eq state :init)
138 (cond (lookat (cond ((string-equal lookat "thenret")
142 "if*: bad keyword ~a" lookat))))
145 (push (car xx) col))))
148 (cond ((string-equal lookat "else")
151 "if*: multiples elses")))
154 (push `(t ,@col) totalcol))
155 ((string-equal lookat "then")
157 (t (error "if*: bad keyword ~s"
159 (t (push (car xx) col))))
163 "if*: keyword ~s at the wrong place " (car xx)))
164 (t (setq state :compl)
165 (push `(,(car xx) ,@col) totalcol))))
167 (cond ((not (string-equal lookat "elseif"))
168 (error "if*: missing elseif clause ")))
169 (setq state :init)))))
175 (scheme :initarg :scheme :initform nil :accessor uri-scheme)
176 (host :initarg :host :initform nil :accessor uri-host)
177 (port :initarg :port :initform nil :accessor uri-port)
178 (path :initarg :path :initform nil :accessor uri-path)
179 (query :initarg :query :initform nil :accessor uri-query)
180 (fragment :initarg :fragment :initform nil :accessor uri-fragment)
181 (plist :initarg :plist :initform nil :accessor uri-plist)
185 ;; used to prevent unnessary work, looking for chars to escape and
187 :initarg :escaped :initform nil :accessor uri-escaped)
189 ;; the cached printable representation of the URI. It *might* be
190 ;; different than the original string, though, because the user might
191 ;; have escaped non-reserved chars--they won't be escaped when the URI
193 :initarg :string :initform nil :accessor uri-string)
195 ;; the cached parsed representation of the URI path.
196 :initarg :parsed-path
198 :accessor .uri-parsed-path)
200 ;; cached sxhash, so we don't have to compute it more than once.
201 :initarg :hashcode :initform nil :accessor uri-hashcode)))
204 ((nid :initarg :nid :initform nil :accessor urn-nid)
205 (nss :initarg :nss :initform nil :accessor urn-nss)))
207 (eval-when (compile eval)
208 (defmacro clear-caching-on-slot-change (name)
209 `(defmethod (setf ,name) :around (new-value (self uri))
210 (declare (ignore new-value))
211 (prog1 (call-next-method)
212 (setf (uri-string self) nil)
213 ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
214 (setf (uri-hashcode self) nil))))
217 (clear-caching-on-slot-change uri-scheme)
218 (clear-caching-on-slot-change uri-host)
219 (clear-caching-on-slot-change uri-port)
220 (clear-caching-on-slot-change uri-path)
221 (clear-caching-on-slot-change uri-query)
222 (clear-caching-on-slot-change uri-fragment)
225 (defmethod make-load-form ((self uri) &optional env)
226 (declare (ignore env))
227 `(make-instance ',(class-name (class-of self))
228 :scheme ,(uri-scheme self)
229 :host ,(uri-host self)
230 :port ,(uri-port self)
231 :path ',(uri-path self)
232 :query ,(uri-query self)
233 :fragment ,(uri-fragment self)
234 :plist ',(uri-plist self)
235 :string ,(uri-string self)
236 :parsed-path ',(.uri-parsed-path self)))
238 (defmethod uri-p ((thing uri)) t)
239 (defmethod uri-p ((thing t)) nil)
243 (scheme (when uri (uri-scheme uri)))
244 (host (when uri (uri-host uri)))
245 (port (when uri (uri-port uri)))
246 (path (when uri (uri-path uri)))
248 (when uri (copy-list (.uri-parsed-path uri))))
249 (query (when uri (uri-query uri)))
250 (fragment (when uri (uri-fragment uri)))
251 (plist (when uri (copy-list (uri-plist uri))))
252 (class (when uri (class-of uri)))
253 &aux (escaped (when uri (uri-escaped uri))))
255 then (setf (uri-scheme place) scheme)
256 (setf (uri-host place) host)
257 (setf (uri-port place) port)
258 (setf (uri-path place) path)
259 (setf (.uri-parsed-path place) parsed-path)
260 (setf (uri-query place) query)
261 (setf (uri-fragment place) fragment)
262 (setf (uri-plist place) plist)
263 (setf (uri-escaped place) escaped)
264 (setf (uri-string place) nil)
265 (setf (uri-hashcode place) nil)
267 elseif (eq 'uri class)
268 then ;; allow the compiler to optimize the call to make-instance:
270 :scheme scheme :host host :port port :path path
271 :parsed-path parsed-path
272 :query query :fragment fragment :plist plist
273 :escaped escaped :string nil :hashcode nil)
274 else (make-instance class
275 :scheme scheme :host host :port port :path path
276 :parsed-path parsed-path
277 :query query :fragment fragment :plist plist
278 :escaped escaped :string nil :hashcode nil)))
280 (defmethod uri-parsed-path ((uri uri))
282 (when (null (.uri-parsed-path uri))
283 (setf (.uri-parsed-path uri)
284 (parse-path (uri-path uri) (uri-escaped uri))))
285 (.uri-parsed-path uri)))
287 (defmethod (setf uri-parsed-path) (path-list (uri uri))
288 (assert (and (consp path-list)
289 (or (member (car path-list) '(:absolute :relative)
291 (setf (uri-path uri) (render-parsed-path path-list t))
292 (setf (.uri-parsed-path uri) path-list)
295 (defun uri-authority (uri)
297 (let ((*print-pretty* nil))
298 (format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
301 (if* (equalp "urn" (uri-scheme uri))
303 else (error "URI is not a URN: ~s." uri)))
306 (if* (equalp "urn" (uri-scheme uri))
308 else (error "URI is not a URN: ~s." uri)))
310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
313 (defparameter *excluded-characters*
314 '(;; `delims' (except #\%, because it's handled specially):
315 #\< #\> #\" #\space #\#
317 #\{ #\} #\| #\\ #\^ #\[ #\] #\`))
319 (defun reserved-char-vector (chars &key except)
320 (do* ((a (make-array 127 :element-type 'bit :initial-element 0))
321 (chars chars (cdr chars))
322 (c (car chars) (car chars)))
324 (if* (and except (member c except :test #'char=))
326 else (setf (sbit a (char-int c)) 1))))
328 (defparameter *reserved-characters*
329 (reserved-char-vector
330 (append *excluded-characters*
331 '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
332 (defparameter *reserved-authority-characters*
333 (reserved-char-vector
334 (append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
335 (defparameter *reserved-path-characters*
336 (reserved-char-vector
337 (append *excluded-characters*
339 ;;;;The rfc says this should be here, but it doesn't make sense.
342 (defparameter *reserved-path-characters2*
343 ;; These are the same characters that are in
344 ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
345 ;; representation of the path can contain the %2f converted into a /.
346 ;; That's the whole point of having the parsed representation, so that
347 ;; lisp programs can deal with the path element data in the most
349 (reserved-char-vector
350 (append *excluded-characters*
352 ;;;;The rfc says this should be here, but it doesn't make sense.
355 (defparameter *reserved-fragment-characters*
356 (reserved-char-vector (remove #\# *excluded-characters*)))
358 (eval-when (compile eval)
359 (defun gen-char-range-list (start end)
361 (endcode (1+ (char-int end)))
362 (chcode (char-int start)
366 ;; - has to be first, otherwise it signifies a range!
368 then (setq res (nreverse res))
371 else (nreverse res)))
372 (if* (= #.(char-int #\-) chcode)
374 else (push (code-char chcode) res))))
377 (defparameter *valid-nid-characters*
378 (reserved-char-vector
379 '#.(nconc (gen-char-range-list #\a #\z)
380 (gen-char-range-list #\A #\Z)
381 (gen-char-range-list #\0 #\9)
383 (defparameter *reserved-nss-characters*
384 (reserved-char-vector
385 (append *excluded-characters* '(#\& #\~ #\/ #\?))))
387 (defparameter *illegal-characters*
388 (reserved-char-vector (remove #\# *excluded-characters*)))
389 (defparameter *strict-illegal-query-characters*
390 (reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
391 (defparameter *illegal-query-characters*
392 (reserved-char-vector
393 *excluded-characters* :except '(#\^ #\| #\#)))
396 (defun parse-uri (thing &key (class 'uri) &aux escape)
397 (when (uri-p thing) (return-from parse-uri thing))
399 (setq escape (escape-p thing))
400 (multiple-value-bind (scheme host port path query fragment)
401 (parse-uri-string thing)
405 (case *current-case-mode*
406 ((:case-insensitive-upper :case-sensitive-upper)
408 ((:case-insensitive-lower :case-sensitive-lower)
410 (decode-escaped-encoding scheme escape))
411 (find-package :keyword))))
413 (when (and scheme (eq :urn scheme))
414 (return-from parse-uri
415 (make-instance 'urn :scheme scheme :nid host :nss path)))
417 (when host (setq host (decode-escaped-encoding host escape)))
419 (setq port (read-from-string port))
420 (when (not (numberp port)) (error "port is not a number: ~s." port))
421 (when (not (plusp port))
422 (error "port is not a positive integer: ~d." port))
423 (when (eql port (case scheme
429 (when (or (string= "" path)
430 (and ;; we canonicalize away a reference to just /:
432 (member scheme '(:http :https :ftp) :test #'eq)
437 (decode-escaped-encoding path escape *reserved-path-characters*)))
438 (when query (setq query (decode-escaped-encoding query escape)))
441 (decode-escaped-encoding fragment escape
442 *reserved-fragment-characters*)))
444 then ;; allow the compiler to optimize the make-instance call:
453 else ;; do it the slow way:
463 (defmethod uri ((thing uri))
466 (defmethod uri ((thing string))
469 (defmethod uri ((thing t))
470 (error "Cannot coerce ~s to a uri." thing))
472 (defvar *strict-parse* t)
474 (defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
475 (declare (optimize (speed 3)))
476 ;; Speed is important, so use a specialized state machine instead of
477 ;; regular expressions for parsing the URI string. The regexp we are
486 (end (length string))
491 (path-components '())
494 ;; namespace identifier, for urn parsing only:
496 (declare (fixnum state start end))
497 (flet ((read-token (kind &optional legal-chars)
501 else (let ((sindex start)
504 (declare (fixnum sindex))
507 (when (>= start end) (return nil))
508 (setq c (schar string start))
509 (let ((ci (char-int c)))
511 then (if* (and (eq :colon kind) (eq c #\:))
513 elseif (= 0 (sbit legal-chars ci))
516 URI ~s contains illegal character ~s at position ~d."
518 elseif (and (< ci 128)
520 (= 1 (sbit illegal-chars ci)))
521 then (.parse-error "~
522 URI ~s contains illegal character ~s at position ~d."
526 (#\? (return :question))
527 (#\# (return :hash))))
528 (:query (case c (#\# (return :hash))))
531 (#\: (return :colon))
532 (#\? (return :question))
534 (#\/ (return :slash)))))
536 (if* (> start sindex)
537 then ;; we found some chars
538 ;; before we stopped the parse
539 (setq tokval (subseq string sindex start))
541 else ;; immediately stopped at a special char
544 (failure (&optional why)
545 (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
548 (.parse-error "impossible state: ~d [~s]" state string)))
551 (0 ;; starting to parse
552 (ecase (read-token t)
554 (:question (setq state 7))
555 (:hash (setq state 8))
556 (:slash (setq state 3))
557 (:string (setq state 1))
558 (:end (setq state 9))))
559 (1 ;; seen <token><special char>
560 (let ((token tokval))
561 (ecase (read-token t)
562 (:colon (setq scheme token)
563 (if* (equalp "urn" scheme)
565 else (setq state 2)))
566 (:question (push token path-components)
568 (:hash (push token path-components)
570 (:slash (push token path-components)
571 (push "/" path-components)
574 (:end (push token path-components)
577 (ecase (read-token t)
579 (:question (setq state 7))
580 (:hash (setq state 8))
581 (:slash (setq state 3))
582 (:string (setq state 10))
583 (:end (setq state 9))))
584 (10 ;; seen <scheme>:<token>
585 (let ((token tokval))
586 (ecase (read-token t)
588 (:question (push token path-components)
590 (:hash (push token path-components)
592 (:slash (push token path-components)
595 (:end (push token path-components)
597 (3 ;; seen / or <scheme>:/
598 (ecase (read-token t)
600 (:question (push "/" path-components)
602 (:hash (push "/" path-components)
604 (:slash (setq state 4))
605 (:string (push "/" path-components)
606 (push tokval path-components)
608 (:end (push "/" path-components)
610 (4 ;; seen [<scheme>:]//
611 (ecase (read-token t)
613 (:question (failure))
616 (:string (setq host tokval)
619 (11 ;; seen [<scheme>:]//<host>
620 (ecase (read-token t)
621 (:colon (setq state 5))
622 (:question (setq state 7))
623 (:hash (setq state 8))
624 (:slash (push "/" path-components)
626 (:string (impossible))
627 (:end (setq state 9))))
628 (5 ;; seen [<scheme>:]//<host>:
629 (ecase (read-token t)
631 (:question (failure))
633 (:slash (push "/" path-components)
635 (:string (setq port tokval)
638 (12 ;; seen [<scheme>:]//<host>:[<port>]
639 (ecase (read-token t)
641 (:question (setq state 7))
642 (:hash (setq state 8))
643 (:slash (push "/" path-components)
645 (:string (impossible))
646 (:end (setq state 9))))
648 (ecase (read-token :path)
649 (:question (setq state 7))
650 (:hash (setq state 8))
651 (:string (push tokval path-components)
653 (:end (setq state 9))))
655 (ecase (read-token :path)
656 (:question (setq state 7))
657 (:hash (setq state 8))
658 (:string (impossible))
659 (:end (setq state 9))))
663 then *strict-illegal-query-characters*
664 else *illegal-query-characters*))
665 (ecase (prog1 (read-token :query)
666 (setq illegal-chars *illegal-characters*))
667 (:hash (setq state 8))
668 (:string (setq query tokval)
670 (:end (setq state 9))))
672 (ecase (read-token :query)
673 (:hash (setq state 8))
674 (:string (impossible))
675 (:end (setq state 9))))
677 (ecase (read-token :rest)
678 (:string (setq fragment tokval)
680 (:end (setq state 9))))
685 (apply #'concatenate 'simple-string (nreverse path-components))
688 (15 ;; seen urn:, read nid now
689 (case (read-token :colon *valid-nid-characters*)
690 (:string (setq nid tokval)
692 (t (failure "missing namespace identifier"))))
693 (16 ;; seen urn:<nid>
695 (:colon (setq state 17))
696 (t (failure "missing namespace specific string"))))
697 (17 ;; seen urn:<nid>:, rest is nss
698 (return (values scheme
702 (setq illegal-chars *reserved-nss-characters*)
706 "internal error in parse engine, wrong state: ~s." state)))))))
708 (defun escape-p (string)
709 (declare (optimize (speed 3)))
711 (max (the fixnum (length string))))
713 (declare (fixnum i max))
714 (when (char= #\% (schar string i))
717 (defun parse-path (path-string escape)
718 (do* ((xpath-list (delimited-string-to-list path-string #\/))
721 (if* (string= "" (car xpath-list))
722 then (setf (car xpath-list) :absolute)
723 else (push :relative xpath-list))
725 (pl (cdr path-list) (cdr pl))
727 ((null pl) path-list)
728 (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
729 then ;; there is a param
730 ;;; (setf (car pl) segments)
732 (mapcar #'(lambda (s)
733 (decode-escaped-encoding
734 s escape *reserved-path-characters2*))
737 ;;; (setf (car pl) (car segments))
739 (decode-escaped-encoding
740 (car segments) escape *reserved-path-characters2*)))))
742 (defun decode-escaped-encoding (string escape
743 &optional (reserved-chars
744 *reserved-characters*))
745 ;; Return a string with the real characters.
746 (when (null escape) (return-from decode-escaped-encoding string))
748 (max (length string))
749 (new-string (copy-seq string))
754 (excl::.primcall 'sys::shrink-svector new-string new-i)
756 (sb-kernel:shrink-vector new-string new-i)
758 (subseq new-string 0 new-i)
760 (if* (char= #\% (setq ch (schar string i)))
761 then (when (> (+ i 3) max)
763 "Unsyntactic escaped encoding in ~s." string))
764 (setq ch (schar string (incf i)))
765 (setq ch2 (schar string (incf i)))
766 (when (not (and (setq chc (digit-char-p ch 16))
767 (setq chc2 (digit-char-p ch2 16))))
769 "Non-hexidecimal digits after %: %c%c." ch ch2))
770 (let ((ci (+ (* 16 chc) chc2)))
771 (if* (or (null reserved-chars)
772 (= 0 (sbit reserved-chars ci)))
774 (setf (schar new-string new-i)
776 else (setf (schar new-string new-i) #\%)
777 (setf (schar new-string (incf new-i)) ch)
778 (setf (schar new-string (incf new-i)) ch2)))
779 else (setf (schar new-string new-i) ch))))
781 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
784 (defun render-uri (uri stream
785 &aux (escape (uri-escaped uri))
786 (*print-pretty* nil))
787 (when (null (uri-string uri))
788 (setf (uri-string uri)
789 (let ((scheme (uri-scheme uri))
790 (host (uri-host uri))
791 (port (uri-port uri))
792 (path (uri-path uri))
793 (query (uri-query uri))
794 (fragment (uri-fragment uri)))
795 (concatenate 'simple-string
797 (encode-escaped-encoding
798 (string-downcase ;; for upper case lisps
799 (symbol-name scheme))
800 *reserved-characters* escape))
804 (encode-escaped-encoding
805 host *reserved-authority-characters* escape))
808 ;;;; too slow until ACL 6.0:
809 ;;; (format nil "~d" port)
810 ;;; (princ-to-string port)
811 #-allegro (princ-to-string port)
813 (with-output-to-string (s)
814 (excl::maybe-print-fast s port))
817 (encode-escaped-encoding path
819 ;;*reserved-path-characters*
822 (when query (encode-escaped-encoding query nil escape))
824 (when fragment (encode-escaped-encoding fragment nil escape))))))
826 then (format stream "~a" (uri-string uri))
827 else (uri-string uri)))
829 (defun render-parsed-path (path-list escape)
831 (first (car path-list))
832 (pl (cdr path-list) (cdr pl))
833 (pe (car pl) (car pl)))
835 (when res (apply #'concatenate 'simple-string (nreverse res))))
836 (when (or (null first)
837 (prog1 (eq :absolute first)
842 (encode-escaped-encoding pe *reserved-path-characters* escape)
844 else ;; contains params
845 (push (encode-escaped-encoding
846 (car pe) *reserved-path-characters* escape)
848 (dolist (item (cdr pe))
850 (push (encode-escaped-encoding
851 item *reserved-path-characters* escape)
854 (defun render-urn (urn stream
855 &aux (*print-pretty* nil))
856 (when (null (uri-string urn))
857 (setf (uri-string urn)
858 (let ((nid (urn-nid urn))
860 (concatenate 'simple-string "urn:" nid ":" nss))))
862 then (format stream "~a" (uri-string urn))
863 else (uri-string urn)))
865 (defparameter *escaped-encoding*
866 (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
868 (defun encode-escaped-encoding (string reserved-chars escape)
869 (when (null escape) (return-from encode-escaped-encoding string))
870 ;; Make a string as big as it possibly needs to be (3 times the original
871 ;; size), and truncate it at the end.
872 (do* ((max (length string))
873 (new-max (* 3 max)) ;; worst case new size
874 (new-string (make-string new-max))
880 (excl::.primcall 'sys::shrink-svector new-string (incf new-i))
882 (sb-kernel:shrink-vector new-string (incf new-i))
884 (subseq new-string 0 (incf new-i))
886 (setq ci (char-int (setq c (schar string i))))
887 (if* (or (null reserved-chars)
889 (= 0 (sbit reserved-chars ci)))
892 (setf (schar new-string new-i) c)
893 else ;; need to escape it
894 (multiple-value-bind (q r) (truncate ci 16)
895 (setf (schar new-string (incf new-i)) #\%)
896 (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
897 (setf (schar new-string (incf new-i))
898 (elt *escaped-encoding* r))))))
900 (defmethod print-object ((uri uri) stream)
902 then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
903 else (render-uri uri stream)))
905 (defmethod print-object ((urn urn) stream)
907 then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
908 else (render-urn urn stream)))
910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; merging and unmerging
913 (defmethod merge-uris ((uri string) (base string) &optional place)
914 (merge-uris (parse-uri uri) (parse-uri base) place))
916 (defmethod merge-uris ((uri uri) (base string) &optional place)
917 (merge-uris uri (parse-uri base) place))
919 (defmethod merge-uris ((uri string) (base uri) &optional place)
920 (merge-uris (parse-uri uri) base place))
922 (defmethod merge-uris ((uri uri) (base uri) &optional place)
923 ;; The following is from
924 ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
925 ;; and is algorithm we use to merge URIs.
927 ;; For more information, see section 5.2 of the RFC.
931 (when (and (null (uri-parsed-path uri))
932 (null (uri-scheme uri))
933 (null (uri-host uri))
934 (null (uri-port uri))
935 (null (uri-query uri)))
936 (return-from merge-uris
937 (let ((new (copy-uri base :place place)))
938 (when (uri-query uri)
939 (setf (uri-query new) (uri-query uri)))
940 (when (uri-fragment uri)
941 (setf (uri-fragment new) (uri-fragment uri)))
944 (setq uri (copy-uri uri :place place))
947 (when (uri-scheme uri)
948 (return-from merge-uris uri))
949 (setf (uri-scheme uri) (uri-scheme base))
952 (when (uri-host uri) (go :done))
953 (setf (uri-host uri) (uri-host base))
954 (setf (uri-port uri) (uri-port base))
957 (let ((p (uri-parsed-path uri)))
958 (when (and p (eq :absolute (car p)))
959 (when (equal '(:absolute "") p)
960 ;; Canonicalize the way parsing does:
961 (setf (uri-path uri) nil))
966 (or (uri-parsed-path base)
967 ;; needed because we canonicalize away a path of just `/':
969 (path (uri-parsed-path uri))
971 (when (not (eq :absolute (car base-path)))
972 (error "Cannot merge ~a and ~a, since latter is not absolute."
977 (append (butlast base-path)
978 (if* path then (cdr path) else '(""))))
981 (let ((last (last new-path-list)))
982 (if* (atom (car last))
983 then (when (string= "." (car last))
984 (setf (car last) ""))
985 else (when (string= "." (caar last))
986 (setf (caar last) ""))))
988 (delete "." new-path-list :test #'(lambda (a b)
994 (let ((npl (cdr new-path-list))
997 (string= ".." (let ((l (car (last npl))))
1004 :test #'(lambda (a b)
1009 (when (null index) (return))
1011 ;; The RFC says, in 6g, "that the implementation may handle
1012 ;; this error by retaining these components in the resolved
1013 ;; path, by removing them from the resolved path, or by
1014 ;; avoiding traversal of the reference." The examples in C.2
1015 ;; imply that we should do the first thing (retain them), so
1016 ;; that's what we'll do.
1019 then (setq npl (cddr npl))
1021 (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
1022 (setf (cdr tmp) (cdddr tmp))))
1023 (setf (cdr new-path-list) npl)
1024 (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
1027 ;; don't complain if new-path-list starts with `..'. See comment
1028 ;; above about this step.
1031 (when (or (equal '(:absolute "") new-path-list)
1032 (equal '(:absolute) new-path-list))
1033 (setq new-path-list nil))
1034 (setf (uri-path uri)
1035 (render-parsed-path new-path-list
1036 ;; don't know, so have to assume:
1041 (return-from merge-uris uri)))
1043 (defmethod enough-uri ((uri string) (base string) &optional place)
1044 (enough-uri (parse-uri uri) (parse-uri base) place))
1046 (defmethod enough-uri ((uri uri) (base string) &optional place)
1047 (enough-uri uri (parse-uri base) place))
1049 (defmethod enough-uri ((uri string) (base uri) &optional place)
1050 (enough-uri (parse-uri uri) base place))
1052 (defmethod enough-uri ((uri uri) (base uri) &optional place)
1053 (let ((new-scheme nil)
1056 (new-parsed-path nil))
1058 (when (or (and (uri-scheme uri)
1059 (not (equalp (uri-scheme uri) (uri-scheme base))))
1061 (not (equalp (uri-host uri) (uri-host base))))
1062 (not (equalp (uri-port uri) (uri-port base))))
1063 (return-from enough-uri uri))
1065 (when (null (uri-host uri))
1066 (setq new-host (uri-host base)))
1067 (when (null (uri-port uri))
1068 (setq new-port (uri-port base)))
1070 (when (null (uri-scheme uri))
1071 (setq new-scheme (uri-scheme base)))
1073 ;; Now, for the hard one, path.
1074 ;; We essentially do here what enough-namestring does.
1075 (do* ((base-path (uri-parsed-path base))
1076 (path (uri-parsed-path uri))
1077 (bp base-path (cdr bp))
1079 ((or (null bp) (null p))
1080 ;; If p is nil, that means we have something like
1081 ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
1082 ;; new-parsed-path will be nil.
1084 (setq new-parsed-path (copy-list p))
1085 (when (not (symbolp (car new-parsed-path)))
1086 (push :relative new-parsed-path))))
1087 (if* (equal (car bp) (car p))
1089 else (setq new-parsed-path (copy-list p))
1090 (when (not (symbolp (car new-parsed-path)))
1091 (push :relative new-parsed-path))
1095 (when new-parsed-path
1096 (render-parsed-path new-parsed-path
1097 ;; don't know, so have to assume:
1099 (new-query (uri-query uri))
1100 (new-fragment (uri-fragment uri))
1101 (new-plist (copy-list (uri-plist uri))))
1102 (if* (and (null new-scheme)
1106 (null new-parsed-path)
1108 (null new-fragment))
1109 then ;; can't have a completely empty uri!
1111 :class (class-of uri)
1116 :class (class-of uri)
1122 :parsed-path new-parsed-path
1124 :fragment new-fragment
1125 :plist new-plist)))))
1127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1128 ;; support for interning URIs
1130 (defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
1132 (apply #'make-hash-table :size size
1133 :hash-function 'uri-hash
1134 :test 'uri= :values nil keys)
1136 (apply #'make-hash-table :size size keys))
1138 (defun gethash-uri (uri table)
1139 #+allegro (gethash uri table)
1141 (let* ((hash (uri-hash uri))
1142 (existing (gethash hash table)))
1143 (dolist (u existing)
1145 (return-from gethash-uri (values u t))))
1148 (defun puthash-uri (uri table)
1149 #+allegro (excl:puthash-key uri table)
1151 (let ((existing (gethash (uri-hash uri) table)))
1152 (dolist (u existing)
1154 (return-from puthash-uri u)))
1155 (setf (gethash (uri-hash uri) table)
1156 (cons uri existing))
1160 (defun uri-hash (uri)
1161 (if* (uri-hashcode uri)
1163 else (setf (uri-hashcode uri)
1166 (render-uri uri nil)
1169 (render-uri uri nil))))))
1171 (defvar *uris* (make-uri-space))
1173 (defun uri-space () *uris*)
1175 (defun (setf uri-space) (new-val)
1176 (setq *uris* new-val))
1178 ;; bootstrapping (uri= changed from function to method):
1179 (when (fboundp 'uri=) (fmakunbound 'uri=))
1181 (defmethod uri= ((uri1 uri) (uri2 uri))
1182 (when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
1183 (return-from uri= nil))
1184 ;; RFC2396 says: a URL with an explicit ":port", where the port is
1185 ;; the default for the scheme, is the equivalent to one where the
1186 ;; port is elided. Hmmmm. This means that this function has to be
1187 ;; scheme dependent. Grrrr.
1188 (let ((default-port (case (uri-scheme uri1)
1193 (and (equalp (uri-host uri1) (uri-host uri2))
1194 (eql (or (uri-port uri1) default-port)
1195 (or (uri-port uri2) default-port))
1196 (string= (uri-path uri1) (uri-path uri2))
1197 (string= (uri-query uri1) (uri-query uri2))
1198 (string= (uri-fragment uri1) (uri-fragment uri2)))))
1200 (defmethod uri= ((urn1 urn) (urn2 urn))
1201 (when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
1202 (return-from uri= nil))
1203 (and (equalp (urn-nid urn1) (urn-nid urn2))
1204 (urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
1206 (defun urn-nss-equal (nss1 nss2 &aux len)
1207 ;; Return t iff the nss values are the same.
1208 ;; %2c and %2C are equivalent.
1209 (when (or (null nss1) (null nss2)
1210 (not (= (setq len (length nss1))
1212 (return-from urn-nss-equal nil))
1217 (setq c1 (schar nss1 i))
1218 (setq c2 (schar nss2 i))
1221 (if* (and (char= #\% c1) (char= #\% c2))
1222 then (setq state :percent+1)
1223 elseif (char/= c1 c2)
1226 (when (char-not-equal c1 c2) (return nil))
1227 (setq state :percent+2))
1229 (when (char-not-equal c1 c2) (return nil))
1230 (setq state :char)))))
1232 (defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
1233 (let ((uri (gethash-uri xuri uri-space)))
1236 else (puthash-uri xuri uri-space))))
1238 (defmethod intern-uri ((uri string) &optional (uri-space *uris*))
1239 (intern-uri (parse-uri uri) uri-space))
1241 (defun unintern-uri (uri &optional (uri-space *uris*))
1243 then (clrhash uri-space)
1245 then (remhash uri uri-space)
1246 else (error "bad uri: ~s." uri)))
1248 (defmacro do-all-uris ((var &optional uri-space result-form)
1251 "do-all-uris (var [[uri-space] result-form])
1252 {declaration}* {tag | statement}*
1253 Executes the forms once for each uri with var bound to the current uri"
1256 (g-uri-space (gensym))
1257 (body #+allegro (third (excl::parse-body forms env))
1259 `(let ((,g-uri-space (or ,uri-space *uris*)))
1261 (flet ((,f (,var &optional ,g-ignore)
1262 (declare (ignore-if-unused ,var ,g-ignore))
1264 (maphash #',f ,g-uri-space))
1265 (return ,result-form)))))
1267 (defun sharp-u (stream chr arg)
1268 (declare (ignore chr arg))
1269 (let ((arg (read stream nil nil t)))
1273 then (parse-uri arg)
1276 (internal-reader-error
1278 "#u takes a string or list argument: ~s" arg)))))
1283 (locally (declare (special std-lisp-readtable))
1284 (let ((*readtable* std-lisp-readtable))
1285 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
1287 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)
1289 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1295 ;; (don't run under emacs with M-x fi:common-lisp)
1298 (defun time-uri-module ()
1299 (declare (optimize (speed 3) (safety 0) (debug 0)))
1300 (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
1301 (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
1302 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1303 (format t "~&;;; starting timing testing 1...~%")
1304 (time (dotimes (i 100000) (parse-uri uri)))
1306 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1307 (format t "~&;;; starting timing testing 2...~%")
1308 (let ((uri (parse-uri uri)))
1309 (time (dotimes (i 100000)
1310 ;; forces no caching of the printed representation:
1311 (setf (uri-string uri) nil)
1312 (format nil "~a" uri))))
1314 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1315 (format t "~&;;; starting timing testing 3...~%")
1318 (dotimes (i 100000) (parse-uri uri2))
1319 (let ((uri (parse-uri uri)))
1321 ;; forces no caching of the printed representation:
1322 (setf (uri-string uri) nil)
1323 (format nil "~a" uri)))))))
1325 ;;******** reference output (ultra, modified 5.0.1):
1326 ;;; starting timing testing 1...
1327 ; cpu time (non-gc) 13,710 msec user, 0 msec system
1328 ; cpu time (gc) 600 msec user, 10 msec system
1329 ; cpu time (total) 14,310 msec user, 10 msec system
1330 ; real time 14,465 msec
1332 ; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
1333 ;;; starting timing testing 2...
1334 ; cpu time (non-gc) 27,500 msec user, 0 msec system
1335 ; cpu time (gc) 280 msec user, 20 msec system
1336 ; cpu time (total) 27,780 msec user, 20 msec system
1337 ; real time 27,897 msec
1339 ; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1340 ;;; starting timing testing 3...
1341 ; cpu time (non-gc) 52,290 msec user, 10 msec system
1342 ; cpu time (gc) 1,290 msec user, 30 msec system
1343 ; cpu time (total) 53,580 msec user, 40 msec system
1344 ; real time 54,062 msec
1346 ; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
1348 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1349 ;;; after improving decode-escaped-encoding/encode-escaped-encoding:
1351 ;;; starting timing testing 1...
1352 ; cpu time (non-gc) 14,520 msec user, 0 msec system
1353 ; cpu time (gc) 400 msec user, 0 msec system
1354 ; cpu time (total) 14,920 msec user, 0 msec system
1355 ; real time 15,082 msec
1357 ; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
1358 ;;; starting timing testing 2...
1359 ; cpu time (non-gc) 27,490 msec user, 10 msec system
1360 ; cpu time (gc) 300 msec user, 0 msec system
1361 ; cpu time (total) 27,790 msec user, 10 msec system
1362 ; real time 28,025 msec
1364 ; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1365 ;;; starting timing testing 3...
1366 ; cpu time (non-gc) 47,900 msec user, 20 msec system
1367 ; cpu time (gc) 920 msec user, 10 msec system
1368 ; cpu time (total) 48,820 msec user, 30 msec system
1369 ; real time 49,188 msec
1371 ; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes