;; -*- mode: common-lisp; package: puri -*-
-;; Support for URIs in Allegro.
+;; Support for URIs
;; For general URI information see RFC2396.
;;
-;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
-;; copyright (c) 2003 Kevin Rosenberg (porting changes)
+;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved.
+;; copyright (c) 2002-2005 Franz Inc, Oakland, CA - All rights reserved.
+;; copyright (c) 2003-2010 Kevin Rosenberg
;;
-;; The software, data and information contained herein are proprietary
-;; to, and comprise valuable trade secrets of, Franz, Inc. They are
-;; given in confidence by Franz, Inc. pursuant to a written license
-;; agreement, and may be stored and used only in accordance with the terms
-;; of such license.
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the
+;; preamble found here:
+;; http://opensource.franz.com/preamble.html
;;
-;; Restricted Rights Legend
-;; ------------------------
-;; Use, duplication, and disclosure of the software, data and information
-;; contained herein by any agency, department or entity of the U.S.
-;; Government are subject to restrictions of Restricted Rights for
-;; Commercial Software developed at private expense as specified in
-;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
-;;
-;; Original version from ACL 6.1:
+;; Versions ported from Franz's opensource release
;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
+;; uri.cl,v 2.9.84.1 2005/08/11 18:38:52 layer
+
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
;;
-;; $Id: src.lisp,v 1.3 2003/07/18 21:00:54 kevin Exp $
+;; $Id$
(defpackage #:puri
(:use #:cl)
+ #-allegro (:nicknames #:net.uri)
(:export
- #:uri ; the type and a function
+ #:uri ; the type and a function
#:uri-p
#:copy-uri
- #:uri-scheme ; and slots
+ #:uri-scheme ; and slots
#:uri-host #:uri-port
#:uri-path
#:uri-query
#:uri-fragment
#:uri-plist
- #:uri-authority ; pseudo-slot accessor
+ #:uri-authority ; pseudo-slot accessor
+
+ #:urn ; class
+ #:urn-nid ; pseudo-slot accessor
+ #:urn-nss ; pseudo-slot accessor
- #:urn ; class
- #:urn-nid ; pseudo-slot accessor
- #:urn-nss ; pseudo-slot accessor
-
#:*strict-parse*
#:parse-uri
#:merge-uris
#:uri-parsed-path
#:render-uri
- #:make-uri-space ; interning...
+ #:make-uri-space ; interning...
#:uri-space
#:uri=
#:intern-uri
#:unintern-uri
- #:do-all-uris))
+ #:do-all-uris
-(in-package #:puri)
+ #:uri-parse-error ;; Added by KMR
+ ))
-(eval-when (compile) (declaim (optimize (speed 3))))
+(in-package #:puri)
+(eval-when (:compile-toplevel) (declaim (optimize (speed 3))))
-(eval-when (:compile-toplevel :load-toplevel :execute)
- (defvar if*-keyword-list '("then" "thenret" "else" "elseif")))
#-allegro
-(define-condition parse-error (error) ())
+(defun parse-body (forms &optional env)
+ "Parses a body, returns (VALUES docstring declarations forms)"
+ (declare (ignore env))
+ ;; fixme -- need to add parsing of multiple declarations
+ (let (docstring declarations)
+ (when (stringp (car forms))
+ (setq docstring (car forms))
+ (setq forms (cdr forms)))
+ (when (and (listp (car forms))
+ (symbolp (caar forms))
+ (string-equal (symbol-name '#:declare)
+ (symbol-name (caar forms))))
+ (setq declarations (car forms))
+ (setq forms (cdr forms)))
+ (values docstring declarations forms)))
+
+
+(defun shrink-vector (str size)
+ #+allegro
+ (excl::.primcall 'sys::shrink-svector str size)
+ #+sbcl
+ (setq str (sb-kernel:shrink-vector str size))
+ #+cmu
+ (lisp::shrink-vector str size)
+ #+lispworks
+ (system::shrink-vector$vector str size)
+ #+scl
+ (common-lisp::shrink-vector str size)
+ #-(or allegro cmu lispworks sbcl scl)
+ (setq str (subseq str 0 size))
+ str)
+
+
+;; KMR: Added new condition to handle cross-implementation variances
+;; in the parse-error condition many implementations define
+
+(define-condition uri-parse-error (parse-error)
+ ((fmt-control :initarg :fmt-control :accessor fmt-control)
+ (fmt-arguments :initarg :fmt-arguments :accessor fmt-arguments ))
+ (:report (lambda (c stream)
+ (format stream "Parse error:")
+ (apply #'format stream (fmt-control c) (fmt-arguments c)))))
(defun .parse-error (fmt &rest args)
- #+allegro (apply #'excl::.parse-error fmt args)
- #-allegro (error
- (make-condition 'parse-error :format-control fmt
- :format-arguments args)))
+ (error 'uri-parse-error :fmt-control fmt :fmt-arguments args))
+#-allegro
(defun internal-reader-error (stream fmt &rest args)
- #+allegro
- (apply #'excl::internal-reader-error stream fmt args)
- #-allegro
- (apply #'format stream
- "#u takes a string or list argument: ~s" args))
+ (apply #'format stream fmt args))
#-allegro (defvar *current-case-mode* :case-insensitive-upper)
+#+allegro (eval-when (:compile-toplevel :load-toplevel :execute)
+ (import '(excl:*current-case-mode*
+ excl:delimited-string-to-list
+ excl::parse-body
+ excl::internal-reader-error
+ excl:if*)))
-(defun position-char (char string start max)
+#-allegro
+(defmethod position-char (char (string string) start max)
(declare (optimize (speed 3) (safety 0) (space 0))
- (fixnum start max) (simple-string string))
+ (fixnum start max) (string string))
(do* ((i start (1+ i)))
((= i max) nil)
(declare (fixnum i))
- (when (char= char (schar string i)) (return i))))
-
-#+allegro
-(defun delimited-string-to-list (string &optional (separator #\space))
- (excl:delimited-string-to-list string))
+ (when (char= char (char string i)) (return i))))
-(defun delimited-string-to-list (string &optional (separator #\space)
- skip-terminal)
+#-allegro
+(defun delimited-string-to-list (string &optional (separator #\space)
+ skip-terminal)
(declare (optimize (speed 3) (safety 0) (space 0)
- (compilation-speed 0))
- (type string string)
- (type character separator))
+ (compilation-speed 0))
+ (type string string)
+ (type character separator))
(do* ((len (length string))
- (output '())
- (pos 0)
- (end (position-char separator string pos len)
- (position-char separator string pos len)))
+ (output '())
+ (pos 0)
+ (end (position-char separator string pos len)
+ (position-char separator string pos len)))
((null end)
- (if (< pos len)
- (push (subseq string pos) output)
- (when (or (not skip-terminal) (zerop len))
- (push "" output)))
- (nreverse output))
+ (if (< pos len)
+ (push (subseq string pos) output)
+ (when (and (plusp len) (not skip-terminal))
+ (push "" output)))
+ (nreverse output))
(declare (type fixnum pos len)
- (type (or null fixnum) end))
+ (type (or null fixnum) end))
(push (subseq string pos end) output)
(setq pos (1+ end))))
-
-(defmacro if* (&rest args)
- (do ((xx (reverse args) (cdr xx))
- (state :init)
- (elseseen nil)
- (totalcol nil)
- (lookat nil nil)
- (col nil))
- ((null xx)
- (cond ((eq state :compl)
- `(cond ,@totalcol))
- (t (error "if*: illegal form ~s" args))))
- (cond ((and (symbolp (car xx))
- (member (symbol-name (car xx))
- if*-keyword-list
- :test #'string-equal))
- (setq lookat (symbol-name (car xx)))))
+
+#-allegro
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defvar if*-keyword-list '("then" "thenret" "else" "elseif"))
+
+ (defmacro if* (&rest args)
+ (do ((xx (reverse args) (cdr xx))
+ (state :init)
+ (elseseen nil)
+ (totalcol nil)
+ (lookat nil nil)
+ (col nil))
+ ((null xx)
+ (cond ((eq state :compl)
+ `(cond ,@totalcol))
+ (t (error "if*: illegal form ~s" args))))
+ (cond ((and (symbolp (car xx))
+ (member (symbol-name (car xx))
+ if*-keyword-list
+ :test #'string-equal))
+ (setq lookat (symbol-name (car xx)))))
(cond ((eq state :init)
- (cond (lookat (cond ((string-equal lookat "thenret")
- (setq col nil
- state :then))
- (t (error
- "if*: bad keyword ~a" lookat))))
- (t (setq state :col
- col nil)
- (push (car xx) col))))
- ((eq state :col)
- (cond (lookat
- (cond ((string-equal lookat "else")
- (cond (elseseen
- (error
- "if*: multiples elses")))
- (setq elseseen t)
- (setq state :init)
- (push `(t ,@col) totalcol))
- ((string-equal lookat "then")
- (setq state :then))
- (t (error "if*: bad keyword ~s"
- lookat))))
- (t (push (car xx) col))))
- ((eq state :then)
- (cond (lookat
- (error
- "if*: keyword ~s at the wrong place " (car xx)))
- (t (setq state :compl)
- (push `(,(car xx) ,@col) totalcol))))
- ((eq state :compl)
- (cond ((not (string-equal lookat "elseif"))
- (error "if*: missing elseif clause ")))
- (setq state :init)))))
+ (cond (lookat (cond ((string-equal lookat "thenret")
+ (setq col nil
+ state :then))
+ (t (error
+ "if*: bad keyword ~a" lookat))))
+ (t (setq state :col
+ col nil)
+ (push (car xx) col))))
+ ((eq state :col)
+ (cond (lookat
+ (cond ((string-equal lookat "else")
+ (cond (elseseen
+ (error
+ "if*: multiples elses")))
+ (setq elseseen t)
+ (setq state :init)
+ (push `(t ,@col) totalcol))
+ ((string-equal lookat "then")
+ (setq state :then))
+ (t (error "if*: bad keyword ~s"
+ lookat))))
+ (t (push (car xx) col))))
+ ((eq state :then)
+ (cond (lookat
+ (error
+ "if*: keyword ~s at the wrong place " (car xx)))
+ (t (setq state :compl)
+ (push `(,(car xx) ,@col) totalcol))))
+ ((eq state :compl)
+ (cond ((not (string-equal lookat "elseif"))
+ (error "if*: missing elseif clause ")))
+ (setq state :init))))))
(defclass uri ()
((nid :initarg :nid :initform nil :accessor urn-nid)
(nss :initarg :nss :initform nil :accessor urn-nss)))
-(eval-when (compile eval)
+(eval-when (:compile-toplevel :execute)
(defmacro clear-caching-on-slot-change (name)
`(defmethod (setf ,name) :around (new-value (self uri))
(declare (ignore new-value))
(prog1 (call-next-method)
- (setf (uri-string self) nil)
- ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
- (setf (uri-hashcode self) nil))))
+ (setf (uri-string self) nil)
+ ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
+ (setf (uri-hashcode self) nil))))
)
(clear-caching-on-slot-change uri-scheme)
(defmethod uri-p ((thing t)) nil)
(defun copy-uri (uri
- &key place
- (scheme (when uri (uri-scheme uri)))
- (host (when uri (uri-host uri)))
- (port (when uri (uri-port uri)))
- (path (when uri (uri-path uri)))
- (parsed-path
- (when uri (copy-list (.uri-parsed-path uri))))
- (query (when uri (uri-query uri)))
- (fragment (when uri (uri-fragment uri)))
- (plist (when uri (copy-list (uri-plist uri))))
- (class (when uri (class-of uri)))
- &aux (escaped (when uri (uri-escaped uri))))
+ &key place
+ (scheme (when uri (uri-scheme uri)))
+ (host (when uri (uri-host uri)))
+ (port (when uri (uri-port uri)))
+ (path (when uri (uri-path uri)))
+ (parsed-path
+ (when uri (copy-list (.uri-parsed-path uri))))
+ (query (when uri (uri-query uri)))
+ (fragment (when uri (uri-fragment uri)))
+ (plist (when uri (copy-list (uri-plist uri))))
+ (class (when uri (class-of uri)))
+ &aux (escaped (when uri (uri-escaped uri))))
(if* place
then (setf (uri-scheme place) scheme)
- (setf (uri-host place) host)
- (setf (uri-port place) port)
- (setf (uri-path place) path)
- (setf (.uri-parsed-path place) parsed-path)
- (setf (uri-query place) query)
- (setf (uri-fragment place) fragment)
- (setf (uri-plist place) plist)
- (setf (uri-escaped place) escaped)
- (setf (uri-string place) nil)
- (setf (uri-hashcode place) nil)
- place
+ (setf (uri-host place) host)
+ (setf (uri-port place) port)
+ (setf (uri-path place) path)
+ (setf (.uri-parsed-path place) parsed-path)
+ (setf (uri-query place) query)
+ (setf (uri-fragment place) fragment)
+ (setf (uri-plist place) plist)
+ (setf (uri-escaped place) escaped)
+ (setf (uri-string place) nil)
+ (setf (uri-hashcode place) nil)
+ place
elseif (eq 'uri class)
then ;; allow the compiler to optimize the call to make-instance:
- (make-instance 'uri
- :scheme scheme :host host :port port :path path
- :parsed-path parsed-path
- :query query :fragment fragment :plist plist
- :escaped escaped :string nil :hashcode nil)
+ (make-instance 'uri
+ :scheme scheme :host host :port port :path path
+ :parsed-path parsed-path
+ :query query :fragment fragment :plist plist
+ :escaped escaped :string nil :hashcode nil)
else (make-instance class
- :scheme scheme :host host :port port :path path
- :parsed-path parsed-path
- :query query :fragment fragment :plist plist
- :escaped escaped :string nil :hashcode nil)))
+ :scheme scheme :host host :port port :path path
+ :parsed-path parsed-path
+ :query query :fragment fragment :plist plist
+ :escaped escaped :string nil :hashcode nil)))
(defmethod uri-parsed-path ((uri uri))
(when (uri-path uri)
(when (null (.uri-parsed-path uri))
(setf (.uri-parsed-path uri)
- (parse-path (uri-path uri) (uri-escaped uri))))
+ (parse-path (uri-path uri) (uri-escaped uri))))
(.uri-parsed-path uri)))
(defmethod (setf uri-parsed-path) (path-list (uri uri))
(assert (and (consp path-list)
- (or (member (car path-list) '(:absolute :relative)
- :test #'eq))))
+ (or (member (car path-list) '(:absolute :relative)
+ :test #'eq))))
(setf (uri-path uri) (render-parsed-path path-list t))
(setf (.uri-parsed-path uri) path-list)
path-list)
(defun reserved-char-vector (chars &key except)
(do* ((a (make-array 127 :element-type 'bit :initial-element 0))
- (chars chars (cdr chars))
- (c (car chars) (car chars)))
+ (chars chars (cdr chars))
+ (c (car chars) (car chars)))
((null chars) a)
(if* (and except (member c except :test #'char=))
thenret
(defparameter *reserved-characters*
(reserved-char-vector
(append *excluded-characters*
- '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
+ '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
(defparameter *reserved-authority-characters*
(reserved-char-vector
(append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
(defparameter *reserved-path-characters*
(reserved-char-vector
(append *excluded-characters*
- '(#\;
-;;;;The rfc says this should be here, but it doesn't make sense.
- ;; #\=
- #\/ #\?))))
-(defparameter *reserved-path-characters2*
- ;; These are the same characters that are in
- ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
- ;; representation of the path can contain the %2f converted into a /.
- ;; That's the whole point of having the parsed representation, so that
- ;; lisp programs can deal with the path element data in the most
- ;; convenient form.
- (reserved-char-vector
- (append *excluded-characters*
- '(#\;
+ '(#\;
;;;;The rfc says this should be here, but it doesn't make sense.
- ;; #\=
- #\?))))
+ ;; #\=
+ #\/ #\?))))
+
(defparameter *reserved-fragment-characters*
(reserved-char-vector (remove #\# *excluded-characters*)))
-(eval-when (compile eval)
+(eval-when (:compile-toplevel :execute)
(defun gen-char-range-list (start end)
(do* ((res '())
- (endcode (1+ (char-int end)))
- (chcode (char-int start)
- (1+ chcode))
- (hyphen nil))
+ (endcode (1+ (char-int end)))
+ (chcode (char-int start)
+ (1+ chcode))
+ (hyphen nil))
((= chcode endcode)
;; - has to be first, otherwise it signifies a range!
(if* hyphen
- then (setq res (nreverse res))
- (push #\- res)
- res
- else (nreverse res)))
+ then (setq res (nreverse res))
+ (push #\- res)
+ res
+ else (nreverse res)))
(if* (= #.(char-int #\-) chcode)
then (setq hyphen t)
else (push (code-char chcode) res))))
(defparameter *valid-nid-characters*
(reserved-char-vector
'#.(nconc (gen-char-range-list #\a #\z)
- (gen-char-range-list #\A #\Z)
- (gen-char-range-list #\0 #\9)
- '(#\- #\. #\+))))
+ (gen-char-range-list #\A #\Z)
+ (gen-char-range-list #\0 #\9)
+ '(#\- #\. #\+))))
(defparameter *reserved-nss-characters*
(reserved-char-vector
(append *excluded-characters* '(#\& #\~ #\/ #\?))))
(defun parse-uri (thing &key (class 'uri) &aux escape)
(when (uri-p thing) (return-from parse-uri thing))
-
+
(setq escape (escape-p thing))
(multiple-value-bind (scheme host port path query fragment)
(parse-uri-string thing)
(when scheme
(setq scheme
- (intern (funcall
- (case *current-case-mode*
- ((:case-insensitive-upper :case-sensitive-upper)
- #'string-upcase)
- ((:case-insensitive-lower :case-sensitive-lower)
- #'string-downcase))
- (decode-escaped-encoding scheme escape))
- (find-package :keyword))))
-
+ (intern (funcall
+ (case *current-case-mode*
+ ((:case-insensitive-upper :case-sensitive-upper)
+ #'string-upcase)
+ ((:case-insensitive-lower :case-sensitive-lower)
+ #'string-downcase))
+ (decode-escaped-encoding scheme escape))
+ (find-package :keyword))))
+
(when (and scheme (eq :urn scheme))
(return-from parse-uri
- (make-instance 'urn :scheme scheme :nid host :nss path)))
-
+ (make-instance 'urn :scheme scheme :nid host :nss path)))
+
(when host (setq host (decode-escaped-encoding host escape)))
(when port
(setq port (read-from-string port))
(when (not (numberp port)) (error "port is not a number: ~s." port))
(when (not (plusp port))
- (error "port is not a positive integer: ~d." port))
+ (error "port is not a positive integer: ~d." port))
(when (eql port (case scheme
- (:http 80)
- (:https 443)
- (:ftp 21)
- (:telnet 23)))
- (setq port nil)))
+ (:http 80)
+ (:https 443)
+ (:ftp 21)
+ (:telnet 23)))
+ (setq port nil)))
(when (or (string= "" path)
- (and ;; we canonicalize away a reference to just /:
- scheme
- (member scheme '(:http :https :ftp) :test #'eq)
- (string= "/" path)))
+ (and ;; we canonicalize away a reference to just /:
+ scheme
+ (member scheme '(:http :https :ftp) :test #'eq)
+ (string= "/" path)))
(setq path nil))
(when path
(setq path
- (decode-escaped-encoding path escape *reserved-path-characters*)))
+ (decode-escaped-encoding path escape *reserved-path-characters*)))
(when query (setq query (decode-escaped-encoding query escape)))
(when fragment
(setq fragment
- (decode-escaped-encoding fragment escape
- *reserved-fragment-characters*)))
+ (decode-escaped-encoding fragment escape
+ *reserved-fragment-characters*)))
(if* (eq 'uri class)
then ;; allow the compiler to optimize the make-instance call:
- (make-instance 'uri
- :scheme scheme
- :host host
- :port port
- :path path
- :query query
- :fragment fragment
- :escaped escape)
+ (make-instance 'uri
+ :scheme scheme
+ :host host
+ :port port
+ :path path
+ :query query
+ :fragment fragment
+ :escaped escape)
else ;; do it the slow way:
- (make-instance class
- :scheme scheme
- :host host
- :port port
- :path path
- :query query
- :fragment fragment
- :escaped escape))))
+ (make-instance class
+ :scheme scheme
+ :host host
+ :port port
+ :path path
+ :query query
+ :fragment fragment
+ :escaped escape))))
(defmethod uri ((thing uri))
thing)
;; (\?([^#]*))?
;; (#(.*))?
(let* ((state 0)
- (start 0)
- (end (length string))
- (tokval nil)
- (scheme nil)
- (host nil)
- (port nil)
- (path-components '())
- (query nil)
- (fragment nil)
- ;; namespace identifier, for urn parsing only:
- (nid nil))
+ (start 0)
+ (end (length string))
+ (tokval nil)
+ (scheme nil)
+ (host nil)
+ (port nil)
+ (path-components '())
+ (query nil)
+ (fragment nil)
+ ;; namespace identifier, for urn parsing only:
+ (nid nil))
(declare (fixnum state start end))
(flet ((read-token (kind &optional legal-chars)
- (setq tokval nil)
- (if* (>= start end)
- then :end
- else (let ((sindex start)
- (res nil)
- c)
- (declare (fixnum sindex))
- (setq res
- (loop
- (when (>= start end) (return nil))
- (setq c (schar string start))
- (let ((ci (char-int c)))
- (if* legal-chars
- then (if* (and (eq :colon kind) (eq c #\:))
- then (return :colon)
- elseif (= 0 (sbit legal-chars ci))
- then (.parse-error
- "~
+ (setq tokval nil)
+ (if* (>= start end)
+ then :end
+ else (let ((sindex start)
+ (res nil)
+ c)
+ (declare (fixnum sindex))
+ (setq res
+ (loop
+ (when (>= start end) (return nil))
+ (setq c (char string start))
+ (let ((ci (char-int c)))
+ (if* legal-chars
+ then (if* (and (eq :colon kind) (eq c #\:))
+ then (return :colon)
+ elseif (= 0 (sbit legal-chars ci))
+ then (.parse-error
+ "~
URI ~s contains illegal character ~s at position ~d."
- string c start))
- elseif (and (< ci 128)
- *strict-parse*
- (= 1 (sbit illegal-chars ci)))
- then (.parse-error "~
+ string c start))
+ elseif (and (< ci 128)
+ *strict-parse*
+ (= 1 (sbit illegal-chars ci)))
+ then (.parse-error "~
URI ~s contains illegal character ~s at position ~d."
- string c start)))
- (case kind
- (:path (case c
- (#\? (return :question))
- (#\# (return :hash))))
- (:query (case c (#\# (return :hash))))
- (:rest)
- (t (case c
- (#\: (return :colon))
- (#\? (return :question))
- (#\# (return :hash))
- (#\/ (return :slash)))))
- (incf start)))
- (if* (> start sindex)
- then ;; we found some chars
- ;; before we stopped the parse
- (setq tokval (subseq string sindex start))
- :string
- else ;; immediately stopped at a special char
- (incf start)
- res))))
- (failure (&optional why)
- (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
- string state why))
- (impossible ()
- (.parse-error "impossible state: ~d [~s]" state string)))
+ string c start)))
+ (case kind
+ (:path (case c
+ (#\? (return :question))
+ (#\# (return :hash))))
+ (:query (case c (#\# (return :hash))))
+ (:rest)
+ (t (case c
+ (#\: (return :colon))
+ (#\? (return :question))
+ (#\# (return :hash))
+ (#\/ (return :slash)))))
+ (incf start)))
+ (if* (> start sindex)
+ then ;; we found some chars
+ ;; before we stopped the parse
+ (setq tokval (subseq string sindex start))
+ :string
+ else ;; immediately stopped at a special char
+ (incf start)
+ res))))
+ (failure (&optional why)
+ (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
+ string state why))
+ (impossible ()
+ (.parse-error "impossible state: ~d [~s]" state string)))
(loop
- (case state
- (0 ;; starting to parse
- (ecase (read-token t)
- (:colon (failure))
- (:question (setq state 7))
- (:hash (setq state 8))
- (:slash (setq state 3))
- (:string (setq state 1))
- (:end (setq state 9))))
- (1 ;; seen <token><special char>
- (let ((token tokval))
- (ecase (read-token t)
- (:colon (setq scheme token)
- (if* (equalp "urn" scheme)
- then (setq state 15)
- else (setq state 2)))
- (:question (push token path-components)
- (setq state 7))
- (:hash (push token path-components)
- (setq state 8))
- (:slash (push token path-components)
- (push "/" path-components)
- (setq state 6))
- (:string (failure))
- (:end (push token path-components)
- (setq state 9)))))
- (2 ;; seen <scheme>:
- (ecase (read-token t)
- (:colon (failure))
- (:question (setq state 7))
- (:hash (setq state 8))
- (:slash (setq state 3))
- (:string (setq state 10))
- (:end (setq state 9))))
- (10 ;; seen <scheme>:<token>
- (let ((token tokval))
- (ecase (read-token t)
- (:colon (failure))
- (:question (push token path-components)
- (setq state 7))
- (:hash (push token path-components)
- (setq state 8))
- (:slash (push token path-components)
- (setq state 6))
- (:string (failure))
- (:end (push token path-components)
- (setq state 9)))))
- (3 ;; seen / or <scheme>:/
- (ecase (read-token t)
- (:colon (failure))
- (:question (push "/" path-components)
- (setq state 7))
- (:hash (push "/" path-components)
- (setq state 8))
- (:slash (setq state 4))
- (:string (push "/" path-components)
- (push tokval path-components)
- (setq state 6))
- (:end (push "/" path-components)
- (setq state 9))))
- (4 ;; seen [<scheme>:]//
- (ecase (read-token t)
- (:colon (failure))
- (:question (failure))
- (:hash (failure))
- (:slash (failure))
- (:string (setq host tokval)
- (setq state 11))
- (:end (failure))))
- (11 ;; seen [<scheme>:]//<host>
- (ecase (read-token t)
- (:colon (setq state 5))
- (:question (setq state 7))
- (:hash (setq state 8))
- (:slash (push "/" path-components)
- (setq state 6))
- (:string (impossible))
- (:end (setq state 9))))
- (5 ;; seen [<scheme>:]//<host>:
- (ecase (read-token t)
- (:colon (failure))
- (:question (failure))
- (:hash (failure))
- (:slash (push "/" path-components)
- (setq state 6))
- (:string (setq port tokval)
- (setq state 12))
- (:end (failure))))
- (12 ;; seen [<scheme>:]//<host>:[<port>]
- (ecase (read-token t)
- (:colon (failure))
- (:question (setq state 7))
- (:hash (setq state 8))
- (:slash (push "/" path-components)
- (setq state 6))
- (:string (impossible))
- (:end (setq state 9))))
- (6 ;; seen /
- (ecase (read-token :path)
- (:question (setq state 7))
- (:hash (setq state 8))
- (:string (push tokval path-components)
- (setq state 13))
- (:end (setq state 9))))
- (13 ;; seen path
- (ecase (read-token :path)
- (:question (setq state 7))
- (:hash (setq state 8))
- (:string (impossible))
- (:end (setq state 9))))
- (7 ;; seen ?
- (setq illegal-chars
- (if* *strict-parse*
- then *strict-illegal-query-characters*
- else *illegal-query-characters*))
- (ecase (prog1 (read-token :query)
- (setq illegal-chars *illegal-characters*))
- (:hash (setq state 8))
- (:string (setq query tokval)
- (setq state 14))
- (:end (setq state 9))))
- (14 ;; query
- (ecase (read-token :query)
- (:hash (setq state 8))
- (:string (impossible))
- (:end (setq state 9))))
- (8 ;; seen #
- (ecase (read-token :rest)
- (:string (setq fragment tokval)
- (setq state 9))
- (:end (setq state 9))))
- (9 ;; done
- (return
- (values
- scheme host port
- (apply #'concatenate 'simple-string (nreverse path-components))
- query fragment)))
- ;; URN parsing:
- (15 ;; seen urn:, read nid now
- (case (read-token :colon *valid-nid-characters*)
- (:string (setq nid tokval)
- (setq state 16))
- (t (failure "missing namespace identifier"))))
- (16 ;; seen urn:<nid>
- (case (read-token t)
- (:colon (setq state 17))
- (t (failure "missing namespace specific string"))))
- (17 ;; seen urn:<nid>:, rest is nss
- (return (values scheme
- nid
- nil
- (progn
- (setq illegal-chars *reserved-nss-characters*)
- (read-token :rest)
- tokval))))
- (t (.parse-error
- "internal error in parse engine, wrong state: ~s." state)))))))
+ (case state
+ (0 ;; starting to parse
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:slash (setq state 3))
+ (:string (setq state 1))
+ (:end (setq state 9))))
+ (1 ;; seen <token><special char>
+ (let ((token tokval))
+ (ecase (read-token t)
+ (:colon (setq scheme token)
+ (if* (equalp "urn" scheme)
+ then (setq state 15)
+ else (setq state 2)))
+ (:question (push token path-components)
+ (setq state 7))
+ (:hash (push token path-components)
+ (setq state 8))
+ (:slash (push token path-components)
+ (push "/" path-components)
+ (setq state 6))
+ (:string (failure))
+ (:end (push token path-components)
+ (setq state 9)))))
+ (2 ;; seen <scheme>:
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:slash (setq state 3))
+ (:string (setq state 10))
+ (:end (setq state 9))))
+ (10 ;; seen <scheme>:<token>
+ (let ((token tokval))
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (push token path-components)
+ (setq state 7))
+ (:hash (push token path-components)
+ (setq state 8))
+ (:slash (push token path-components)
+ (setq state 6))
+ (:string (failure))
+ (:end (push token path-components)
+ (setq state 9)))))
+ (3 ;; seen / or <scheme>:/
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (push "/" path-components)
+ (setq state 7))
+ (:hash (push "/" path-components)
+ (setq state 8))
+ (:slash (setq state 4))
+ (:string (push "/" path-components)
+ (push tokval path-components)
+ (setq state 6))
+ (:end (push "/" path-components)
+ (setq state 9))))
+ (4 ;; seen [<scheme>:]//
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (failure))
+ (:hash (failure))
+ (:slash
+ (if* (and (equalp "file" scheme)
+ (null host))
+ then ;; file:///...
+ (push "/" path-components)
+ (setq state 6)
+ else (failure)))
+ (:string (setq host tokval)
+ (setq state 11))
+ (:end (failure))))
+ (11 ;; seen [<scheme>:]//<host>
+ (ecase (read-token t)
+ (:colon (setq state 5))
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:slash (push "/" path-components)
+ (setq state 6))
+ (:string (impossible))
+ (:end (setq state 9))))
+ (5 ;; seen [<scheme>:]//<host>:
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (failure))
+ (:hash (failure))
+ (:slash (push "/" path-components)
+ (setq state 6))
+ (:string (setq port tokval)
+ (setq state 12))
+ (:end (failure))))
+ (12 ;; seen [<scheme>:]//<host>:[<port>]
+ (ecase (read-token t)
+ (:colon (failure))
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:slash (push "/" path-components)
+ (setq state 6))
+ (:string (impossible))
+ (:end (setq state 9))))
+ (6 ;; seen /
+ (ecase (read-token :path)
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:string (push tokval path-components)
+ (setq state 13))
+ (:end (setq state 9))))
+ (13 ;; seen path
+ (ecase (read-token :path)
+ (:question (setq state 7))
+ (:hash (setq state 8))
+ (:string (impossible))
+ (:end (setq state 9))))
+ (7 ;; seen ?
+ (setq illegal-chars
+ (if* *strict-parse*
+ then *strict-illegal-query-characters*
+ else *illegal-query-characters*))
+ (ecase (prog1 (read-token :query)
+ (setq illegal-chars *illegal-characters*))
+ (:hash (setq state 8))
+ (:string (setq query tokval)
+ (setq state 14))
+ (:end (setq state 9))))
+ (14 ;; query
+ (ecase (read-token :query)
+ (:hash (setq state 8))
+ (:string (impossible))
+ (:end (setq state 9))))
+ (8 ;; seen #
+ (ecase (read-token :rest)
+ (:string (setq fragment tokval)
+ (setq state 9))
+ (:end (setq state 9))))
+ (9 ;; done
+ (return
+ (values
+ scheme host port
+ (apply #'concatenate 'string (nreverse path-components))
+ query fragment)))
+ ;; URN parsing:
+ (15 ;; seen urn:, read nid now
+ (case (read-token :colon *valid-nid-characters*)
+ (:string (setq nid tokval)
+ (setq state 16))
+ (t (failure "missing namespace identifier"))))
+ (16 ;; seen urn:<nid>
+ (case (read-token t)
+ (:colon (setq state 17))
+ (t (failure "missing namespace specific string"))))
+ (17 ;; seen urn:<nid>:, rest is nss
+ (return (values scheme
+ nid
+ nil
+ (progn
+ (setq illegal-chars *reserved-nss-characters*)
+ (read-token :rest)
+ tokval))))
+ (t (.parse-error
+ "internal error in parse engine, wrong state: ~s." state)))))))
(defun escape-p (string)
(declare (optimize (speed 3)))
(do* ((i 0 (1+ i))
- (max (the fixnum (length string))))
+ (max (the fixnum (length string))))
((= i max) nil)
(declare (fixnum i max))
- (when (char= #\% (schar string i))
+ (when (char= #\% (char string i))
(return t))))
(defun parse-path (path-string escape)
(do* ((xpath-list (delimited-string-to-list path-string #\/))
- (path-list
- (progn
- (if* (string= "" (car xpath-list))
- then (setf (car xpath-list) :absolute)
- else (push :relative xpath-list))
- xpath-list))
- (pl (cdr path-list) (cdr pl))
- segments)
+ (path-list
+ (progn
+ (if* (string= "" (car xpath-list))
+ then (setf (car xpath-list) :absolute)
+ else (push :relative xpath-list))
+ xpath-list))
+ (pl (cdr path-list) (cdr pl))
+ segments)
((null pl) path-list)
- (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
+
+ (if* (cdr (setq segments
+ (if* (string= "" (car pl))
+ then '("")
+ else (delimited-string-to-list (car pl) #\;))))
then ;; there is a param
-;;; (setf (car pl) segments)
- (setf (car pl)
- (mapcar #'(lambda (s)
- (decode-escaped-encoding
- s escape *reserved-path-characters2*))
- segments))
+ (setf (car pl)
+ (mapcar #'(lambda (s)
+ (decode-escaped-encoding s escape
+ ;; decode all %xx:
+ nil))
+ segments))
else ;; no param
-;;; (setf (car pl) (car segments))
- (setf (car pl)
- (decode-escaped-encoding
- (car segments) escape *reserved-path-characters2*)))))
+ (setf (car pl)
+ (decode-escaped-encoding (car segments) escape
+ ;; decode all %xx:
+ nil)))))
(defun decode-escaped-encoding (string escape
- &optional (reserved-chars
- *reserved-characters*))
- ;; Return a string with the real characters.
+ &optional (reserved-chars
+ *reserved-characters*))
+ ;;Return a string with the real characters.
(when (null escape) (return-from decode-escaped-encoding string))
- (do* ((i 0 (1+ i))
- (max (length string))
- (new-string (copy-seq string))
- (new-i 0 (1+ new-i))
- ch ch2 chc chc2)
- ((= i max)
- #+allegro
- (excl::.primcall 'sys::shrink-svector new-string new-i)
- #+sbcl
- (sb-kernel:shrink-vector new-string new-i)
- #-(or allegro sbcl)
- (subseq new-string 0 new-i)
- new-string)
- (if* (char= #\% (setq ch (schar string i)))
- then (when (> (+ i 3) max)
- (.parse-error
- "Unsyntactic escaped encoding in ~s." string))
- (setq ch (schar string (incf i)))
- (setq ch2 (schar string (incf i)))
- (when (not (and (setq chc (digit-char-p ch 16))
- (setq chc2 (digit-char-p ch2 16))))
- (.parse-error
- "Non-hexidecimal digits after %: %c%c." ch ch2))
- (let ((ci (+ (* 16 chc) chc2)))
- (if* (or (null reserved-chars)
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (setf (schar new-string new-i)
- (code-char ci))
- else (setf (schar new-string new-i) #\%)
- (setf (schar new-string (incf new-i)) ch)
- (setf (schar new-string (incf new-i)) ch2)))
- else (setf (schar new-string new-i) ch))))
+ (let ((curpos 0)
+ (maxpos (length string))
+ (strs nil))
+ (flet ((next-ansii-substring ()
+ (let ((pos (or (position #\% string :start curpos)
+ maxpos)))
+ (when (and (= curpos 0)
+ (= pos maxpos))
+ (return-from decode-escaped-encoding string))
+ (when (< curpos pos)
+ (push (subseq string
+ curpos
+ pos)
+ strs)
+ (setf curpos pos))))
+ (next-encoded-substring ()
+ (let ((pos (or (loop for i from curpos below maxpos by 3
+ unless (char= (aref string i)
+ #\%)
+ return i)
+ maxpos)))
+ (when (< curpos pos)
+ (let ((octets (handler-case (make-array (/ (- pos curpos)
+ 3)
+ :element-type '(unsigned-byte 8)
+ :fill-pointer 0)
+ (error ()
+ (.parse-error "Unsyntactic escaped encoding in ~s." string)))))
+ (loop for i from curpos below pos by 3
+ do (vector-push (handler-case
+ (parse-integer string
+ :start (1+ i)
+ :end (+ i 3)
+ :radix 16)
+ (error ()
+ (.parse-error "Non-hexidecimal digits after %: ~c~c."
+ (aref string (1+ i))
+ (aref string (+ 2 i)))))
+ octets))
+
+ (let* ((decoded-string (babel:octets-to-string octets
+ :encoding :utf-8))
+ (rpos (if reserved-chars
+ (position-if #'(lambda (ch)
+ (not (or (> (char-code ch) 127)
+ (= (sbit reserved-chars (char-code ch)) 0))))
+ decoded-string))))
+ (push (if rpos
+ (with-output-to-string (out)
+ (loop for ch across decoded-string
+ with i = curpos
+ do (let ((octet (char-code ch)))
+ (cond
+ ((or (null reserved-chars)
+ (> octet 127)
+ (= (sbit reserved-chars octet) 0))
+ (write-char ch out)
+ (incf i
+ (* (cond
+ ((< octet #x80) 1)
+ ((< octet #x800) 2)
+ ((< octet #x10000) 3)
+ ((< octet #x200000) 4)
+ ((< octet #x4000000) 5)
+ (t 6))
+ 3)))
+ (t (write-string (subseq string i (+ i 3)) out)
+ (incf i 3)
+ )))))
+ decoded-string)
+ strs))))
+ (setf curpos pos))))
+ (loop
+ while (< curpos maxpos)
+ do (next-ansii-substring)
+ while (< curpos maxpos)
+ do (next-encoded-substring)))
+ (if (cdr strs)
+ (apply #'concatenate
+ 'string
+ (nreverse strs))
+ (or (car strs)
+ ""))))
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing
(defun render-uri (uri stream
- &aux (escape (uri-escaped uri))
- (*print-pretty* nil))
+ &aux (escape (uri-escaped uri))
+ (*print-pretty* nil))
(when (null (uri-string uri))
(setf (uri-string uri)
(let ((scheme (uri-scheme uri))
- (host (uri-host uri))
- (port (uri-port uri))
- (path (uri-path uri))
- (query (uri-query uri))
- (fragment (uri-fragment uri)))
- (concatenate 'simple-string
- (when scheme
- (encode-escaped-encoding
- (string-downcase ;; for upper case lisps
- (symbol-name scheme))
- *reserved-characters* escape))
- (when scheme ":")
- (when host "//")
- (when host
- (encode-escaped-encoding
- host *reserved-authority-characters* escape))
- (when port ":")
- (when port
-;;;; too slow until ACL 6.0:
-;;; (format nil "~d" port)
-;;; (princ-to-string port)
- #-allegro (princ-to-string port)
- #+allegro
- (with-output-to-string (s)
- (excl::maybe-print-fast s port))
- )
- (when path
- (encode-escaped-encoding path
- nil
- ;;*reserved-path-characters*
- escape))
- (when query "?")
- (when query (encode-escaped-encoding query nil escape))
- (when fragment "#")
- (when fragment (encode-escaped-encoding fragment nil escape))))))
+ (host (uri-host uri))
+ (port (uri-port uri))
+ (path (uri-path uri))
+ (query (uri-query uri))
+ (fragment (uri-fragment uri)))
+ (concatenate 'string
+ (when scheme
+ (encode-escaped-encoding
+ (string-downcase ;; for upper case lisps
+ (symbol-name scheme))
+ *reserved-characters* escape))
+ (when scheme ":")
+ (when (or host (eq :file scheme)) "//")
+ (when host
+ (encode-escaped-encoding
+ host *reserved-authority-characters* escape))
+ (when port ":")
+ (when port
+ #-allegro (format nil "~D" port)
+ #+allegro (with-output-to-string (s)
+ (excl::maybe-print-fast s port))
+ )
+ (encode-escaped-encoding (or path "/")
+ nil
+ ;;*reserved-path-characters*
+ escape)
+ (when query "?")
+ (when query (encode-escaped-encoding query nil escape))
+ (when fragment "#")
+ (when fragment (encode-escaped-encoding fragment nil escape))))))
(if* stream
then (format stream "~a" (uri-string uri))
else (uri-string uri)))
(defun render-parsed-path (path-list escape)
(do* ((res '())
- (first (car path-list))
- (pl (cdr path-list) (cdr pl))
- (pe (car pl) (car pl)))
+ (first (car path-list))
+ (pl (cdr path-list) (cdr pl))
+ (pe (car pl) (car pl)))
((null pl)
- (when res (apply #'concatenate 'simple-string (nreverse res))))
+ (when res (apply #'concatenate 'string (nreverse res))))
(when (or (null first)
- (prog1 (eq :absolute first)
- (setq first nil)))
+ (prog1 (eq :absolute first)
+ (setq first nil)))
(push "/" res))
(if* (atom pe)
then (push
- (encode-escaped-encoding pe *reserved-path-characters* escape)
- res)
+ (encode-escaped-encoding pe *reserved-path-characters* escape)
+ res)
else ;; contains params
- (push (encode-escaped-encoding
- (car pe) *reserved-path-characters* escape)
- res)
- (dolist (item (cdr pe))
- (push ";" res)
- (push (encode-escaped-encoding
- item *reserved-path-characters* escape)
- res)))))
+ (push (encode-escaped-encoding
+ (car pe) *reserved-path-characters* escape)
+ res)
+ (dolist (item (cdr pe))
+ (push ";" res)
+ (push (encode-escaped-encoding
+ item *reserved-path-characters* escape)
+ res)))))
(defun render-urn (urn stream
- &aux (*print-pretty* nil))
+ &aux (*print-pretty* nil))
(when (null (uri-string urn))
(setf (uri-string urn)
(let ((nid (urn-nid urn))
- (nss (urn-nss urn)))
- (concatenate 'simple-string "urn:" nid ":" nss))))
+ (nss (urn-nss urn)))
+ (concatenate 'string "urn:" nid ":" nss))))
(if* stream
then (format stream "~a" (uri-string urn))
else (uri-string urn)))
-(defparameter *escaped-encoding*
- (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
-
(defun encode-escaped-encoding (string reserved-chars escape)
(when (null escape) (return-from encode-escaped-encoding string))
- ;; Make a string as big as it possibly needs to be (3 times the original
- ;; size), and truncate it at the end.
- (do* ((max (length string))
- (new-max (* 3 max)) ;; worst case new size
- (new-string (make-string new-max))
- (i 0 (1+ i))
- (new-i -1)
- c ci)
- ((= i max)
- #+allegro
- (excl::.primcall 'sys::shrink-svector new-string (incf new-i))
- #+sbcl
- (sb-kernel:shrink-vector new-string (incf new-i))
- #-(or allegro sbcl)
- (subseq new-string 0 (incf new-i))
- new-string)
- (setq ci (char-int (setq c (schar string i))))
- (if* (or (null reserved-chars)
- (> ci 127)
- (= 0 (sbit reserved-chars ci)))
- then ;; ok as is
- (incf new-i)
- (setf (schar new-string new-i) c)
- else ;; need to escape it
- (multiple-value-bind (q r) (truncate ci 16)
- (setf (schar new-string (incf new-i)) #\%)
- (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
- (setf (schar new-string (incf new-i))
- (elt *escaped-encoding* r))))))
+ (with-output-to-string (out)
+ (loop for ch across string
+ do (let ((code (char-code ch)))
+ (if (and (< code 128)
+ (or (null reserved-chars)
+ (= 0 (sbit reserved-chars code))))
+ (write-char ch out)
+ (loop for octet across (babel:string-to-octets (string ch) :encoding :utf-8)
+ do (format out "%~(~2,'0x~)" octet)))))))
(defmethod print-object ((uri uri) stream)
(if* *print-escape*
- then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
+ then (print-unreadable-object (uri stream :type t) (render-uri uri stream))
else (render-uri uri stream)))
(defmethod print-object ((urn urn) stream)
(if* *print-escape*
- then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
+ then (print-unreadable-object (urn stream :type t) (render-urn urn stream))
else (render-urn urn stream)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defmethod merge-uris ((uri string) (base uri) &optional place)
(merge-uris (parse-uri uri) base place))
+
(defmethod merge-uris ((uri uri) (base uri) &optional place)
- ;; The following is from
- ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
- ;; and is algorithm we use to merge URIs.
- ;;
- ;; For more information, see section 5.2 of the RFC.
+ ;; See ../doc/rfc2396.txt for info on the algorithm we use to merge
+ ;; URIs.
;;
(tagbody
;;;; step 2
(when (and (null (uri-parsed-path uri))
- (null (uri-scheme uri))
- (null (uri-host uri))
- (null (uri-port uri))
- (null (uri-query uri)))
+ (null (uri-scheme uri))
+ (null (uri-host uri))
+ (null (uri-port uri))
+ (null (uri-query uri)))
(return-from merge-uris
- (let ((new (copy-uri base :place place)))
- (when (uri-query uri)
- (setf (uri-query new) (uri-query uri)))
- (when (uri-fragment uri)
- (setf (uri-fragment new) (uri-fragment uri)))
- new)))
+ (let ((new (copy-uri base :place place)))
+ (when (uri-query uri)
+ (setf (uri-query new) (uri-query uri)))
+ (when (uri-fragment uri)
+ (setf (uri-fragment new) (uri-fragment uri)))
+ new)))
(setq uri (copy-uri uri :place place))
(when (uri-scheme uri)
(return-from merge-uris uri))
(setf (uri-scheme uri) (uri-scheme base))
-
+
;;;; step 4
(when (uri-host uri) (go :done))
(setf (uri-host uri) (uri-host base))
(setf (uri-port uri) (uri-port base))
-
+
;;;; step 5
(let ((p (uri-parsed-path uri)))
+
+ ;; bug13133:
+ ;; The following form causes our implementation to be at odds with
+ ;; RFC 2396, however this is apparently what was intended by the
+ ;; authors of the RFC. Specifically, (merge-uris "?y" "/foo")
+ ;; should return #<uri /foo?y> instead of #<uri ?y>, according to
+ ;; this:
+;;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query
+ (when (null p)
+ (setf (uri-path uri) (uri-path base))
+ (go :done))
+
(when (and p (eq :absolute (car p)))
- (when (equal '(:absolute "") p)
- ;; Canonicalize the way parsing does:
- (setf (uri-path uri) nil))
- (go :done)))
-
+ (when (equal '(:absolute "") p)
+ ;; Canonicalize the way parsing does:
+ (setf (uri-path uri) nil))
+ (go :done)))
+
;;;; step 6
(let* ((base-path
- (or (uri-parsed-path base)
- ;; needed because we canonicalize away a path of just `/':
- '(:absolute "")))
- (path (uri-parsed-path uri))
- new-path-list)
+ (or (uri-parsed-path base)
+ ;; needed because we canonicalize away a path of just `/':
+ '(:absolute "")))
+ (path (uri-parsed-path uri))
+ new-path-list)
(when (not (eq :absolute (car base-path)))
- (error "Cannot merge ~a and ~a, since latter is not absolute."
- uri base))
+ (error "Cannot merge ~a and ~a, since latter is not absolute."
+ uri base))
;; steps 6a and 6b:
(setq new-path-list
- (append (butlast base-path)
- (if* path then (cdr path) else '(""))))
+ (append (butlast base-path)
+ (if* path then (cdr path) else '(""))))
;; steps 6c and 6d:
(let ((last (last new-path-list)))
- (if* (atom (car last))
- then (when (string= "." (car last))
- (setf (car last) ""))
- else (when (string= "." (caar last))
- (setf (caar last) ""))))
+ (if* (atom (car last))
+ then (when (string= "." (car last))
+ (setf (car last) ""))
+ else (when (string= "." (caar last))
+ (setf (caar last) ""))))
(setq new-path-list
- (delete "." new-path-list :test #'(lambda (a b)
- (if* (atom b)
- then (string= a b)
- else nil))))
+ (delete "." new-path-list :test #'(lambda (a b)
+ (if* (atom b)
+ then (string= a b)
+ else nil))))
;; steps 6e and 6f:
(let ((npl (cdr new-path-list))
- index tmp fix-tail)
- (setq fix-tail
- (string= ".." (let ((l (car (last npl))))
- (if* (atom l)
- then l
- else (car l)))))
- (loop
- (setq index
- (position ".." npl
- :test #'(lambda (a b)
- (string= a
- (if* (atom b)
- then b
- else (car b))))))
- (when (null index) (return))
- (when (= 0 index)
- ;; The RFC says, in 6g, "that the implementation may handle
- ;; this error by retaining these components in the resolved
- ;; path, by removing them from the resolved path, or by
- ;; avoiding traversal of the reference." The examples in C.2
- ;; imply that we should do the first thing (retain them), so
- ;; that's what we'll do.
- (return))
- (if* (= 1 index)
- then (setq npl (cddr npl))
- else (setq tmp npl)
- (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
- (setf (cdr tmp) (cdddr tmp))))
- (setf (cdr new-path-list) npl)
- (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
+ index tmp fix-tail)
+ (setq fix-tail
+ (string= ".." (let ((l (car (last npl))))
+ (if* (atom l)
+ then l
+ else (car l)))))
+ (loop
+ (setq index
+ (position ".." npl
+ :test #'(lambda (a b)
+ (string= a
+ (if* (atom b)
+ then b
+ else (car b))))))
+ (when (null index) (return))
+ (when (= 0 index)
+ ;; The RFC says, in 6g, "that the implementation may handle
+ ;; this error by retaining these components in the resolved
+ ;; path, by removing them from the resolved path, or by
+ ;; avoiding traversal of the reference." The examples in C.2
+ ;; imply that we should do the first thing (retain them), so
+ ;; that's what we'll do.
+ (return))
+ (if* (= 1 index)
+ then (setq npl (cddr npl))
+ else (setq tmp npl)
+ (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
+ (setf (cdr tmp) (cdddr tmp))))
+ (setf (cdr new-path-list) npl)
+ (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
;; step 6g:
;; don't complain if new-path-list starts with `..'. See comment
;; step 6h:
(when (or (equal '(:absolute "") new-path-list)
- (equal '(:absolute) new-path-list))
- (setq new-path-list nil))
+ (equal '(:absolute) new-path-list))
+ (setq new-path-list nil))
(setf (uri-path uri)
- (render-parsed-path new-path-list
- ;; don't know, so have to assume:
- t)))
+ (render-parsed-path new-path-list
+ ;; don't know, so have to assume:
+ t)))
;;;; step 7
:done
(defmethod enough-uri ((uri uri) (base uri) &optional place)
(let ((new-scheme nil)
- (new-host nil)
- (new-port nil)
- (new-parsed-path nil))
+ (new-host nil)
+ (new-port nil)
+ (new-parsed-path nil))
(when (or (and (uri-scheme uri)
- (not (equalp (uri-scheme uri) (uri-scheme base))))
- (and (uri-host uri)
- (not (equalp (uri-host uri) (uri-host base))))
- (not (equalp (uri-port uri) (uri-port base))))
+ (not (equalp (uri-scheme uri) (uri-scheme base))))
+ (and (uri-host uri)
+ (not (equalp (uri-host uri) (uri-host base))))
+ (not (equalp (uri-port uri) (uri-port base))))
(return-from enough-uri uri))
(when (null (uri-host uri))
(setq new-host (uri-host base)))
(when (null (uri-port uri))
(setq new-port (uri-port base)))
-
+
(when (null (uri-scheme uri))
(setq new-scheme (uri-scheme base)))
;; Now, for the hard one, path.
;; We essentially do here what enough-namestring does.
(do* ((base-path (uri-parsed-path base))
- (path (uri-parsed-path uri))
- (bp base-path (cdr bp))
- (p path (cdr p)))
- ((or (null bp) (null p))
- ;; If p is nil, that means we have something like
- ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
- ;; new-parsed-path will be nil.
- (when (null bp)
- (setq new-parsed-path (copy-list p))
- (when (not (symbolp (car new-parsed-path)))
- (push :relative new-parsed-path))))
+ (path (uri-parsed-path uri))
+ (bp base-path (cdr bp))
+ (p path (cdr p)))
+ ((or (null bp) (null p))
+ ;; If p is nil, that means we have something like
+ ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
+ ;; new-parsed-path will be nil.
+ (when (null bp)
+ (setq new-parsed-path (copy-list p))
+ (when (not (symbolp (car new-parsed-path)))
+ (push :relative new-parsed-path))))
(if* (equal (car bp) (car p))
- thenret ;; skip it
- else (setq new-parsed-path (copy-list p))
- (when (not (symbolp (car new-parsed-path)))
- (push :relative new-parsed-path))
- (return)))
-
- (let ((new-path
- (when new-parsed-path
- (render-parsed-path new-parsed-path
- ;; don't know, so have to assume:
- t)))
- (new-query (uri-query uri))
- (new-fragment (uri-fragment uri))
- (new-plist (copy-list (uri-plist uri))))
+ thenret ;; skip it
+ else (setq new-parsed-path (copy-list p))
+ (when (not (symbolp (car new-parsed-path)))
+ (push :relative new-parsed-path))
+ (return)))
+
+ (let ((new-path
+ (when new-parsed-path
+ (render-parsed-path new-parsed-path
+ ;; don't know, so have to assume:
+ t)))
+ (new-query (uri-query uri))
+ (new-fragment (uri-fragment uri))
+ (new-plist (copy-list (uri-plist uri))))
(if* (and (null new-scheme)
- (null new-host)
- (null new-port)
- (null new-path)
- (null new-parsed-path)
- (null new-query)
- (null new-fragment))
- then ;; can't have a completely empty uri!
- (copy-uri nil
- :class (class-of uri)
- :place place
- :path "/"
- :plist new-plist)
- else (copy-uri nil
- :class (class-of uri)
- :place place
- :scheme new-scheme
- :host new-host
- :port new-port
- :path new-path
- :parsed-path new-parsed-path
- :query new-query
- :fragment new-fragment
- :plist new-plist)))))
+ (null new-host)
+ (null new-port)
+ (null new-path)
+ (null new-parsed-path)
+ (null new-query)
+ (null new-fragment))
+ then ;; can't have a completely empty uri!
+ (copy-uri nil
+ :class (class-of uri)
+ :place place
+ :path "/"
+ :plist new-plist)
+ else (copy-uri nil
+ :class (class-of uri)
+ :place place
+ :scheme new-scheme
+ :host new-host
+ :port new-port
+ :path new-path
+ :parsed-path new-parsed-path
+ :query new-query
+ :fragment new-fragment
+ :plist new-plist)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; support for interning URIs
(defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
#+allegro
(apply #'make-hash-table :size size
- :hash-function 'uri-hash
- :test 'uri= :values nil keys)
+ :hash-function 'uri-hash
+ :test 'uri= :values nil keys)
#-allegro
(apply #'make-hash-table :size size keys))
(defun gethash-uri (uri table)
#+allegro (gethash uri table)
- #-allegro
+ #-allegro
(let* ((hash (uri-hash uri))
- (existing (gethash hash table)))
+ (existing (gethash hash table)))
(dolist (u existing)
(when (uri= u uri)
- (return-from gethash-uri (values u t))))
+ (return-from gethash-uri (values u t))))
(values nil nil)))
(defun puthash-uri (uri table)
#+allegro (excl:puthash-key uri table)
- #-allegro
+ #-allegro
(let ((existing (gethash (uri-hash uri) table)))
(dolist (u existing)
(when (uri= u uri)
- (return-from puthash-uri u)))
+ (return-from puthash-uri u)))
(setf (gethash (uri-hash uri) table)
(cons uri existing))
uri))
(if* (uri-hashcode uri)
thenret
else (setf (uri-hashcode uri)
- (sxhash
- #+allegro
- (render-uri uri nil)
- #-allegro
- (string-downcase
- (render-uri uri nil))))))
+ (sxhash
+ #+allegro
+ (render-uri uri nil)
+ #-allegro
+ (string-downcase
+ (render-uri uri nil))))))
(defvar *uris* (make-uri-space))
;; bootstrapping (uri= changed from function to method):
(when (fboundp 'uri=) (fmakunbound 'uri=))
+(defgeneric uri= (uri1 uri2))
(defmethod uri= ((uri1 uri) (uri2 uri))
(when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
(return-from uri= nil))
;; port is elided. Hmmmm. This means that this function has to be
;; scheme dependent. Grrrr.
(let ((default-port (case (uri-scheme uri1)
- (:http 80)
- (:https 443)
- (:ftp 21)
- (:telnet 23))))
+ (:http 80)
+ (:https 443)
+ (:ftp 21)
+ (:telnet 23))))
(and (equalp (uri-host uri1) (uri-host uri2))
- (eql (or (uri-port uri1) default-port)
- (or (uri-port uri2) default-port))
- (string= (uri-path uri1) (uri-path uri2))
- (string= (uri-query uri1) (uri-query uri2))
- (string= (uri-fragment uri1) (uri-fragment uri2)))))
+ (eql (or (uri-port uri1) default-port)
+ (or (uri-port uri2) default-port))
+ (string= (uri-path uri1) (uri-path uri2))
+ (string= (uri-query uri1) (uri-query uri2))
+ (string= (uri-fragment uri1) (uri-fragment uri2)))))
(defmethod uri= ((urn1 urn) (urn2 urn))
(when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
;; Return t iff the nss values are the same.
;; %2c and %2C are equivalent.
(when (or (null nss1) (null nss2)
- (not (= (setq len (length nss1))
- (length nss2))))
+ (not (= (setq len (length nss1))
+ (length nss2))))
(return-from urn-nss-equal nil))
(do* ((i 0 (1+ i))
- (state :char)
- c1 c2)
+ (state :char)
+ c1 c2)
((= i len) t)
- (setq c1 (schar nss1 i))
- (setq c2 (schar nss2 i))
+ (setq c1 (char nss1 i))
+ (setq c2 (char nss2 i))
(ecase state
(:char
(if* (and (char= #\% c1) (char= #\% c2))
- then (setq state :percent+1)
- elseif (char/= c1 c2)
- then (return nil)))
+ then (setq state :percent+1)
+ elseif (char/= c1 c2)
+ then (return nil)))
(:percent+1
(when (char-not-equal c1 c2) (return nil))
(setq state :percent+2))
else (error "bad uri: ~s." uri)))
(defmacro do-all-uris ((var &optional uri-space result-form)
- &rest forms
- &environment env)
+ &rest forms
+ &environment env)
"do-all-uris (var [[uri-space] result-form])
- {declaration}* {tag | statement}*
+ {declaration}* {tag | statement}*
Executes the forms once for each uri with var bound to the current uri"
(let ((f (gensym))
- (g-ignore (gensym))
- (g-uri-space (gensym))
- (body #+allegro (third (excl::parse-body forms env))
- #-allegro forms))
+ (g-ignore (gensym))
+ (g-uri-space (gensym))
+ (body (third (parse-body forms env))))
`(let ((,g-uri-space (or ,uri-space *uris*)))
(prog nil
- (flet ((,f (,var &optional ,g-ignore)
- (declare (ignore-if-unused ,var ,g-ignore))
- (tagbody ,@body)))
- (maphash #',f ,g-uri-space))
- (return ,result-form)))))
+ (flet ((,f (,var &optional ,g-ignore)
+ (declare (ignore-if-unused ,var ,g-ignore))
+ (tagbody ,@body)))
+ (maphash #',f ,g-uri-space))
+ (return ,result-form)))))
(defun sharp-u (stream chr arg)
(declare (ignore chr arg))
(let ((arg (read stream nil nil t)))
(if *read-suppress*
- nil
+ nil
(if* (stringp arg)
- then (parse-uri arg)
- else
+ then (parse-uri arg)
+ else
+
+ (internal-reader-error
+ stream
+ "#u takes a string or list argument: ~s" arg)))))
- (internal-reader-error
- stream
- "#u takes a string or list argument: ~s" arg)))))
-#+allegro
-excl::
-#+allegro
-(locally (declare (special std-lisp-readtable))
- (let ((*readtable* std-lisp-readtable))
- (set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
-#-allegro
(set-dispatch-macro-character #\# #\u #'puri::sharp-u)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; timings
;; (don't run under emacs with M-x fi:common-lisp)
-#+ignore
+#+allegro
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (import 'excl::gc))
+
+#-allegro
+(defun gc (&rest options)
+ (declare (ignore options))
+ #+sbcl (sb-ext::gc)
+ #+cmu (ext::gc)
+ )
+
(defun time-uri-module ()
(declare (optimize (speed 3) (safety 0) (debug 0)))
(let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
- (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
+ (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 1...~%")
(time (dotimes (i 100000) (parse-uri uri)))
-
+
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 2...~%")
(let ((uri (parse-uri uri)))
(time (dotimes (i 100000)
- ;; forces no caching of the printed representation:
- (setf (uri-string uri) nil)
- (format nil "~a" uri))))
-
+ ;; forces no caching of the printed representation:
+ (setf (uri-string uri) nil)
+ (format nil "~a" uri))))
+
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 3...~%")
(time
(progn
(dotimes (i 100000) (parse-uri uri2))
(let ((uri (parse-uri uri)))
- (dotimes (i 100000)
- ;; forces no caching of the printed representation:
- (setf (uri-string uri) nil)
- (format nil "~a" uri)))))))
+ (dotimes (i 100000)
+ ;; forces no caching of the printed representation:
+ (setf (uri-string uri) nil)
+ (format nil "~a" uri)))))))
;;******** reference output (ultra, modified 5.0.1):
;;; starting timing testing 1...