;; -*- mode: common-lisp; package: puri -*-
-;; Support for URIs in Allegro.
+;; Support for URIs
;; For general URI information see RFC2396.
;;
-;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
-;; copyright (c) 2003 Kevin Rosenberg (porting changes)
+;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved.
+;; copyright (c) 2002-2005 Franz Inc, Oakland, CA - All rights reserved.
+;; copyright (c) 2003-2006 Kevin Rosenberg (porting changes)
;;
-;; The software, data and information contained herein are proprietary
-;; to, and comprise valuable trade secrets of, Franz, Inc. They are
-;; given in confidence by Franz, Inc. pursuant to a written license
-;; agreement, and may be stored and used only in accordance with the terms
-;; of such license.
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the
+;; preamble found here:
+;; http://opensource.franz.com/preamble.html
;;
-;; Restricted Rights Legend
-;; ------------------------
-;; Use, duplication, and disclosure of the software, data and information
-;; contained herein by any agency, department or entity of the U.S.
-;; Government are subject to restrictions of Restricted Rights for
-;; Commercial Software developed at private expense as specified in
-;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
-;;
-;; Original version from ACL 6.1:
+;; Versions ported from Franz's opensource release
;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
+;; uri.cl,v 2.9.84.1 2005/08/11 18:38:52 layer
+
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
;;
-;; $Id: src.lisp,v 1.8 2003/07/20 16:25:21 kevin Exp $
+;; $Id$
(defpackage #:puri
(:use #:cl)
+ #-allegro (:nicknames #:net.uri)
(:export
#:uri ; the type and a function
#:uri-p
#:uri=
#:intern-uri
#:unintern-uri
- #:do-all-uris))
+ #:do-all-uris
-(in-package #:puri)
+ #:uri-parse-error ;; Added by KMR
+ ))
-(eval-when (:compile-toplevel)
- (declaim (optimize (speed 3))))
+(in-package #:puri)
+(eval-when (:compile-toplevel) (declaim (optimize (speed 3))))
#-allegro
#+allegro
(excl::.primcall 'sys::shrink-svector str size)
#+sbcl
- (sb-kernel:shrink-vector str size)
+ (setq str (sb-kernel:shrink-vector str size))
#+cmu
(lisp::shrink-vector str size)
#+lispworks
(system::shrink-vector$vector str size)
- #+(or allegro cmu sbcl lispworks)
- str
- #-(or allegro cmu sbcl lispworks)
- (subseq str 0 size))
-
-
-#-(or allegro lispworks)
-(define-condition parse-error (error)
- ((fmt-control :initarg :fmt-control
- :reader fmt-control)
- (fmt-args :initarg :fmt-args
- :reader fmt-args))
+ #+scl
+ (common-lisp::shrink-vector str size)
+ #-(or allegro cmu lispworks sbcl scl)
+ (setq str (subseq str 0 size))
+ str)
+
+
+;; KMR: Added new condition to handle cross-implementation variances
+;; in the parse-error condition many implementations define
+
+(define-condition uri-parse-error (parse-error)
+ ((fmt-control :initarg :fmt-control :accessor fmt-control)
+ (fmt-arguments :initarg :fmt-arguments :accessor fmt-arguments ))
(:report (lambda (c stream)
- (format stream "Parse error: ")
- (apply #'format stream (fmt-control c) (fmt-args c)))))
+ (format stream "Parse error:")
+ (apply #'format stream (fmt-control c) (fmt-arguments c)))))
-#-allegro
(defun .parse-error (fmt &rest args)
- (error (make-condition 'parse-error :fmt-control fmt :fmt-args args)))
+ (error 'uri-parse-error :fmt-control fmt :fmt-arguments args))
#-allegro
(defun internal-reader-error (stream fmt &rest args)
#+allegro (eval-when (:compile-toplevel :load-toplevel :execute)
(import '(excl:*current-case-mode*
excl:delimited-string-to-list
- excl::.parse-error
excl::parse-body
excl::internal-reader-error
excl:if*)))
#-allegro
-(defun position-char (char string start max)
+(defmethod position-char (char (string string) start max)
(declare (optimize (speed 3) (safety 0) (space 0))
- (fixnum start max) (simple-string string))
+ (fixnum start max) (string string))
(do* ((i start (1+ i)))
((= i max) nil)
(declare (fixnum i))
- (when (char= char (schar string i)) (return i))))
+ (when (char= char (char string i)) (return i))))
#-allegro
(defun delimited-string-to-list (string &optional (separator #\space)
- skip-terminal)
+ skip-terminal)
(declare (optimize (speed 3) (safety 0) (space 0)
(compilation-speed 0))
(type string string)
((null end)
(if (< pos len)
(push (subseq string pos) output)
- (when (or (not skip-terminal) (zerop len))
- (push "" output)))
- (nreverse output))
+ (when (and (plusp len) (not skip-terminal))
+ (push "" output)))
+ (nreverse output))
(declare (type fixnum pos len)
(type (or null fixnum) end))
(push (subseq string pos end) output)
;;;;The rfc says this should be here, but it doesn't make sense.
;; #\=
#\/ #\?))))
-(defparameter *reserved-path-characters2*
- ;; These are the same characters that are in
- ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
- ;; representation of the path can contain the %2f converted into a /.
- ;; That's the whole point of having the parsed representation, so that
- ;; lisp programs can deal with the path element data in the most
- ;; convenient form.
- (reserved-char-vector
- (append *excluded-characters*
- '(#\;
-;;;;The rfc says this should be here, but it doesn't make sense.
- ;; #\=
- #\?))))
+
(defparameter *reserved-fragment-characters*
(reserved-char-vector (remove #\# *excluded-characters*)))
(setq res
(loop
(when (>= start end) (return nil))
- (setq c (schar string start))
+ (setq c (char string start))
(let ((ci (char-int c)))
(if* legal-chars
then (if* (and (eq :colon kind) (eq c #\:))
(:colon (failure))
(:question (failure))
(:hash (failure))
- (:slash (failure))
+ (:slash
+ (if* (and (equalp "file" scheme)
+ (null host))
+ then ;; file:///...
+ (push "/" path-components)
+ (setq state 6)
+ else (failure)))
(:string (setq host tokval)
(setq state 11))
(:end (failure))))
(return
(values
scheme host port
- (apply #'concatenate 'simple-string (nreverse path-components))
+ (apply #'concatenate 'string (nreverse path-components))
query fragment)))
;; URN parsing:
(15 ;; seen urn:, read nid now
(max (the fixnum (length string))))
((= i max) nil)
(declare (fixnum i max))
- (when (char= #\% (schar string i))
+ (when (char= #\% (char string i))
(return t))))
(defun parse-path (path-string escape)
(pl (cdr path-list) (cdr pl))
segments)
((null pl) path-list)
- (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
+
+ (if* (cdr (setq segments
+ (if* (string= "" (car pl))
+ then '("")
+ else (delimited-string-to-list (car pl) #\;))))
then ;; there is a param
-;;; (setf (car pl) segments)
(setf (car pl)
(mapcar #'(lambda (s)
- (decode-escaped-encoding
- s escape *reserved-path-characters2*))
- segments))
+ (decode-escaped-encoding s escape
+ ;; decode all %xx:
+ nil))
+ segments))
else ;; no param
-;;; (setf (car pl) (car segments))
(setf (car pl)
- (decode-escaped-encoding
- (car segments) escape *reserved-path-characters2*)))))
+ (decode-escaped-encoding (car segments) escape
+ ;; decode all %xx:
+ nil)))))
(defun decode-escaped-encoding (string escape
&optional (reserved-chars
ch ch2 chc chc2)
((= i max)
(shrink-vector new-string new-i))
- (if* (char= #\% (setq ch (schar string i)))
+ (if* (char= #\% (setq ch (char string i)))
then (when (> (+ i 3) max)
(.parse-error
"Unsyntactic escaped encoding in ~s." string))
- (setq ch (schar string (incf i)))
- (setq ch2 (schar string (incf i)))
+ (setq ch (char string (incf i)))
+ (setq ch2 (char string (incf i)))
(when (not (and (setq chc (digit-char-p ch 16))
(setq chc2 (digit-char-p ch2 16))))
(.parse-error
"Non-hexidecimal digits after %: %c%c." ch ch2))
(let ((ci (+ (* 16 chc) chc2)))
(if* (or (null reserved-chars)
+ (> ci 127) ; bug11527
(= 0 (sbit reserved-chars ci)))
then ;; ok as is
- (setf (schar new-string new-i)
+ (setf (char new-string new-i)
(code-char ci))
- else (setf (schar new-string new-i) #\%)
- (setf (schar new-string (incf new-i)) ch)
- (setf (schar new-string (incf new-i)) ch2)))
- else (setf (schar new-string new-i) ch))))
+ else (setf (char new-string new-i) #\%)
+ (setf (char new-string (incf new-i)) ch)
+ (setf (char new-string (incf new-i)) ch2)))
+ else (setf (char new-string new-i) ch))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing
(path (uri-path uri))
(query (uri-query uri))
(fragment (uri-fragment uri)))
- (concatenate 'simple-string
+ (concatenate 'string
(when scheme
(encode-escaped-encoding
(string-downcase ;; for upper case lisps
(symbol-name scheme))
*reserved-characters* escape))
(when scheme ":")
- (when host "//")
+ (when (or host (eq :file scheme)) "//")
(when host
(encode-escaped-encoding
host *reserved-authority-characters* escape))
(pl (cdr path-list) (cdr pl))
(pe (car pl) (car pl)))
((null pl)
- (when res (apply #'concatenate 'simple-string (nreverse res))))
+ (when res (apply #'concatenate 'string (nreverse res))))
(when (or (null first)
(prog1 (eq :absolute first)
(setq first nil)))
(setf (uri-string urn)
(let ((nid (urn-nid urn))
(nss (urn-nss urn)))
- (concatenate 'simple-string "urn:" nid ":" nss))))
+ (concatenate 'string "urn:" nid ":" nss))))
(if* stream
then (format stream "~a" (uri-string urn))
else (uri-string urn)))
c ci)
((= i max)
(shrink-vector new-string (incf new-i)))
- (setq ci (char-int (setq c (schar string i))))
+ (setq ci (char-int (setq c (char string i))))
(if* (or (null reserved-chars)
(> ci 127)
(= 0 (sbit reserved-chars ci)))
then ;; ok as is
(incf new-i)
- (setf (schar new-string new-i) c)
+ (setf (char new-string new-i) c)
else ;; need to escape it
(multiple-value-bind (q r) (truncate ci 16)
- (setf (schar new-string (incf new-i)) #\%)
- (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
- (setf (schar new-string (incf new-i))
+ (setf (char new-string (incf new-i)) #\%)
+ (setf (char new-string (incf new-i)) (elt *escaped-encoding* q))
+ (setf (char new-string (incf new-i))
(elt *escaped-encoding* r))))))
(defmethod print-object ((uri uri) stream)
(if* *print-escape*
- then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
+ then (print-unreadable-object (uri stream :type t) (render-uri uri stream))
else (render-uri uri stream)))
(defmethod print-object ((urn urn) stream)
(if* *print-escape*
- then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
+ then (print-unreadable-object (urn stream :type t) (render-urn urn stream))
else (render-urn urn stream)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defmethod merge-uris ((uri string) (base uri) &optional place)
(merge-uris (parse-uri uri) base place))
+
(defmethod merge-uris ((uri uri) (base uri) &optional place)
- ;; The following is from
- ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
- ;; and is algorithm we use to merge URIs.
- ;;
- ;; For more information, see section 5.2 of the RFC.
+ ;; See ../doc/rfc2396.txt for info on the algorithm we use to merge
+ ;; URIs.
;;
(tagbody
;;;; step 2
(when (uri-fragment uri)
(setf (uri-fragment new) (uri-fragment uri)))
new)))
-
+
(setq uri (copy-uri uri :place place))
;;;; step 3
;;;; step 5
(let ((p (uri-parsed-path uri)))
+
+ ;; bug13133:
+ ;; The following form causes our implementation to be at odds with
+ ;; RFC 2396, however this is apparently what was intended by the
+ ;; authors of the RFC. Specifically, (merge-uris "?y" "/foo")
+ ;; should return #<uri /foo?y> instead of #<uri ?y>, according to
+ ;; this:
+;;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query
+ (when (null p)
+ (setf (uri-path uri) (uri-path base))
+ (go :done))
+
(when (and p (eq :absolute (car p)))
(when (equal '(:absolute "") p)
;; Canonicalize the way parsing does:
(state :char)
c1 c2)
((= i len) t)
- (setq c1 (schar nss1 i))
- (setq c2 (schar nss2 i))
+ (setq c1 (char nss1 i))
+ (setq c2 (char nss2 i))
(ecase state
(:char
(if* (and (char= #\% c1) (char= #\% c2))