X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src.lisp;h=9209ba7513876ef428322e6c8b216ef6818a334c;hb=HEAD;hp=8aee8122340183fdfb234e4e2119bb785c9dfebb;hpb=a12c7dce4427ac5a77ebcd4d295a4b18ab7f5cbc;p=puri-unicode.git diff --git a/src.lisp b/src.lisp index 8aee812..9209ba7 100644 --- a/src.lisp +++ b/src.lisp @@ -25,7 +25,7 @@ ;; $Id$ (defpackage #:puri - (:use #:cl) + (:use #:cl #:idna) #-allegro (:nicknames #:net.uri) (:export #:uri ; the type and a function @@ -49,6 +49,7 @@ #:merge-uris #:enough-uri #:uri-parsed-path + #:uri-parsed-host #:render-uri #:make-uri-space ; interning... @@ -240,10 +241,24 @@ :initarg :parsed-path :initform nil :accessor .uri-parsed-path) + (parsed-host + :initarg :parsed-host + :initform nil + :accessor .uri-parsed-host) (hashcode ;; cached sxhash, so we don't have to compute it more than once. :initarg :hashcode :initform nil :accessor uri-hashcode))) +(defmethod initialize-instance :after ((uri puri:uri) &key &allow-other-keys) + (let ((parsed-path (puri:uri-parsed-path uri)) + (parsed-host (puri:uri-parsed-host uri))) + (when parsed-path + (setf (puri:uri-parsed-path uri) + parsed-path)) + (when parsed-host + (setf (puri:uri-parsed-host uri) + parsed-host)))) + (defclass urn (uri) ((nid :initarg :nid :initform nil :accessor urn-nid) (nss :initarg :nss :initform nil :accessor urn-nss))) @@ -255,11 +270,11 @@ (prog1 (call-next-method) (setf (uri-string self) nil) ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil))) + ,@(when (eq name 'uri-host) `((setf (.uri-parsed-host self) nil))) (setf (uri-hashcode self) nil)))) ) (clear-caching-on-slot-change uri-scheme) -(clear-caching-on-slot-change uri-host) (clear-caching-on-slot-change uri-port) (clear-caching-on-slot-change uri-path) (clear-caching-on-slot-change uri-query) @@ -271,6 +286,7 @@ `(make-instance ',(class-name (class-of self)) :scheme ,(uri-scheme self) :host ,(uri-host self) + :parsed-host ',(.uri-parsed-host self) :port ,(uri-port self) :path ',(uri-path self) :query ,(uri-query self) @@ -286,6 +302,8 @@ &key place (scheme (when uri (uri-scheme uri))) (host (when uri (uri-host uri))) + (parsed-host + (when uri (copy-seq (.uri-parsed-host uri)))) (port (when uri (uri-port uri))) (path (when uri (uri-path uri))) (parsed-path @@ -298,6 +316,7 @@ (if* place then (setf (uri-scheme place) scheme) (setf (uri-host place) host) + (setf (.uri-parsed-host place) parsed-host) (setf (uri-port place) port) (setf (uri-path place) path) (setf (.uri-parsed-path place) parsed-path) @@ -312,12 +331,12 @@ then ;; allow the compiler to optimize the call to make-instance: (make-instance 'uri :scheme scheme :host host :port port :path path - :parsed-path parsed-path + :parsed-path parsed-path :parsed-host parsed-host :query query :fragment fragment :plist plist :escaped escaped :string nil :hashcode nil) else (make-instance class :scheme scheme :host host :port port :path path - :parsed-path parsed-path + :parsed-path parsed-path :parsed-host parsed-host :query query :fragment fragment :plist plist :escaped escaped :string nil :hashcode nil))) @@ -336,6 +355,17 @@ (setf (.uri-parsed-path uri) path-list) path-list) +(defmethod uri-parsed-host ((uri uri)) + (when (uri-host uri) + (when (null (.uri-parsed-host uri)) + (setf (.uri-parsed-host uri) + (to-unicode (uri-host uri)))) + (.uri-parsed-host uri))) + +(defmethod (setf uri-parsed-host) (host (uri uri)) + (setf (uri-host uri) (to-ascii host)) + (setf (.uri-parsed-host uri) (to-unicode host))) + (defun uri-authority (uri) (when (uri-host uri) (let ((*print-pretty* nil)) @@ -355,13 +385,19 @@ ;; Parsing (defparameter *excluded-characters* - '(;; `delims' (except #\%, because it's handled specially): + (append + ;; exclude control characters + (loop for i from 0 to #x1f + collect (code-char i)) + '(;; `delims' (except #\%, because it's handled specially): #\< #\> #\" #\space #\# + #\Rubout ;; (code-char #x7f) ;; `unwise': #\{ #\} #\| #\\ #\^ #\[ #\] #\`)) + "Excluded charcters from RFC2369 (http://www.ietf.org/rfc/rfc2396.txt 2.4.3)") (defun reserved-char-vector (chars &key except) - (do* ((a (make-array 127 :element-type 'bit :initial-element 0)) + (do* ((a (make-array 128 :element-type 'bit :initial-element 0)) (chars chars (cdr chars)) (c (car chars) (car chars))) ((null chars) a) @@ -379,7 +415,7 @@ (defparameter *reserved-path-characters* (reserved-char-vector (append *excluded-characters* - '(#\; + '(#\; #\% ;;;;The rfc says this should be here, but it doesn't make sense. ;; #\= #\/ #\?)))) @@ -444,9 +480,9 @@ (when (and scheme (eq :urn scheme)) (return-from parse-uri - (make-instance 'urn :scheme scheme :nid host :nss path))) + (make-instance 'urn :scheme scheme :nid host :nss (to-ascii path)))) - (when host (setq host (decode-escaped-encoding host escape))) + (when host (setq host (to-ascii (decode-escaped-encoding host escape)))) (when port (setq port (read-from-string port)) (when (not (numberp port)) (error "port is not a number: ~s." port)) @@ -837,13 +873,24 @@ URI ~s contains illegal character ~s at position ~d." (push (if rpos (with-output-to-string (out) (loop for ch across decoded-string - for i from curpos by 3 - do (let ((octet (char-code ch))) - (if (or (null reserved-chars) - (> octet 127) - (= (sbit reserved-chars octet) 0)) - (write-char ch out) - (write-string (subseq string i (+ i 3)) out))))) + with i = curpos + do (let ((code (char-code ch))) + (cond + ((or (null reserved-chars) + (> code 127) + (= (sbit reserved-chars code) 0)) + (write-char ch out) + (incf i + (* (cond + ((< code #x80) 1) + ((< code #x800) 2) + ((< code #x10000) 3) + ((< code #x200000) 4) + ((< code #x4000000) 5) + (t 6)) + 3))) + (t (write-string (subseq string i (+ i 3)) out) + (incf i 3)))))) decoded-string) strs)))) (setf curpos pos))))