X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src.lisp;h=9209ba7513876ef428322e6c8b216ef6818a334c;hb=HEAD;hp=6ba041700a3980191f81c9390027cd7bb1767aa8;hpb=feebbfdc402097d14c9a4cd27bf1a7a12120f7c9;p=puri-unicode.git diff --git a/src.lisp b/src.lisp index 6ba0417..9209ba7 100644 --- a/src.lisp +++ b/src.lisp @@ -4,7 +4,7 @@ ;; ;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved. ;; copyright (c) 2002-2005 Franz Inc, Oakland, CA - All rights reserved. -;; copyright (c) 2003-2006 Kevin Rosenberg (porting changes) +;; copyright (c) 2003-2010 Kevin Rosenberg ;; ;; This code is free software; you can redistribute it and/or ;; modify it under the terms of the version 2.1 of @@ -25,7 +25,7 @@ ;; $Id$ (defpackage #:puri - (:use #:cl) + (:use #:cl #:idna) #-allegro (:nicknames #:net.uri) (:export #:uri ; the type and a function @@ -49,6 +49,7 @@ #:merge-uris #:enough-uri #:uri-parsed-path + #:uri-parsed-host #:render-uri #:make-uri-space ; interning... @@ -240,10 +241,24 @@ :initarg :parsed-path :initform nil :accessor .uri-parsed-path) + (parsed-host + :initarg :parsed-host + :initform nil + :accessor .uri-parsed-host) (hashcode ;; cached sxhash, so we don't have to compute it more than once. :initarg :hashcode :initform nil :accessor uri-hashcode))) +(defmethod initialize-instance :after ((uri puri:uri) &key &allow-other-keys) + (let ((parsed-path (puri:uri-parsed-path uri)) + (parsed-host (puri:uri-parsed-host uri))) + (when parsed-path + (setf (puri:uri-parsed-path uri) + parsed-path)) + (when parsed-host + (setf (puri:uri-parsed-host uri) + parsed-host)))) + (defclass urn (uri) ((nid :initarg :nid :initform nil :accessor urn-nid) (nss :initarg :nss :initform nil :accessor urn-nss))) @@ -255,11 +270,11 @@ (prog1 (call-next-method) (setf (uri-string self) nil) ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil))) + ,@(when (eq name 'uri-host) `((setf (.uri-parsed-host self) nil))) (setf (uri-hashcode self) nil)))) ) (clear-caching-on-slot-change uri-scheme) -(clear-caching-on-slot-change uri-host) (clear-caching-on-slot-change uri-port) (clear-caching-on-slot-change uri-path) (clear-caching-on-slot-change uri-query) @@ -271,6 +286,7 @@ `(make-instance ',(class-name (class-of self)) :scheme ,(uri-scheme self) :host ,(uri-host self) + :parsed-host ',(.uri-parsed-host self) :port ,(uri-port self) :path ',(uri-path self) :query ,(uri-query self) @@ -286,6 +302,8 @@ &key place (scheme (when uri (uri-scheme uri))) (host (when uri (uri-host uri))) + (parsed-host + (when uri (copy-seq (.uri-parsed-host uri)))) (port (when uri (uri-port uri))) (path (when uri (uri-path uri))) (parsed-path @@ -298,6 +316,7 @@ (if* place then (setf (uri-scheme place) scheme) (setf (uri-host place) host) + (setf (.uri-parsed-host place) parsed-host) (setf (uri-port place) port) (setf (uri-path place) path) (setf (.uri-parsed-path place) parsed-path) @@ -312,12 +331,12 @@ then ;; allow the compiler to optimize the call to make-instance: (make-instance 'uri :scheme scheme :host host :port port :path path - :parsed-path parsed-path + :parsed-path parsed-path :parsed-host parsed-host :query query :fragment fragment :plist plist :escaped escaped :string nil :hashcode nil) else (make-instance class :scheme scheme :host host :port port :path path - :parsed-path parsed-path + :parsed-path parsed-path :parsed-host parsed-host :query query :fragment fragment :plist plist :escaped escaped :string nil :hashcode nil))) @@ -336,6 +355,17 @@ (setf (.uri-parsed-path uri) path-list) path-list) +(defmethod uri-parsed-host ((uri uri)) + (when (uri-host uri) + (when (null (.uri-parsed-host uri)) + (setf (.uri-parsed-host uri) + (to-unicode (uri-host uri)))) + (.uri-parsed-host uri))) + +(defmethod (setf uri-parsed-host) (host (uri uri)) + (setf (uri-host uri) (to-ascii host)) + (setf (.uri-parsed-host uri) (to-unicode host))) + (defun uri-authority (uri) (when (uri-host uri) (let ((*print-pretty* nil)) @@ -355,13 +385,19 @@ ;; Parsing (defparameter *excluded-characters* - '(;; `delims' (except #\%, because it's handled specially): + (append + ;; exclude control characters + (loop for i from 0 to #x1f + collect (code-char i)) + '(;; `delims' (except #\%, because it's handled specially): #\< #\> #\" #\space #\# + #\Rubout ;; (code-char #x7f) ;; `unwise': #\{ #\} #\| #\\ #\^ #\[ #\] #\`)) + "Excluded charcters from RFC2369 (http://www.ietf.org/rfc/rfc2396.txt 2.4.3)") (defun reserved-char-vector (chars &key except) - (do* ((a (make-array 127 :element-type 'bit :initial-element 0)) + (do* ((a (make-array 128 :element-type 'bit :initial-element 0)) (chars chars (cdr chars)) (c (car chars) (car chars))) ((null chars) a) @@ -379,7 +415,7 @@ (defparameter *reserved-path-characters* (reserved-char-vector (append *excluded-characters* - '(#\; + '(#\; #\% ;;;;The rfc says this should be here, but it doesn't make sense. ;; #\= #\/ #\?)))) @@ -444,9 +480,9 @@ (when (and scheme (eq :urn scheme)) (return-from parse-uri - (make-instance 'urn :scheme scheme :nid host :nss path))) + (make-instance 'urn :scheme scheme :nid host :nss (to-ascii path)))) - (when host (setq host (decode-escaped-encoding host escape))) + (when host (setq host (to-ascii (decode-escaped-encoding host escape)))) (when port (setq port (read-from-string port)) (when (not (numberp port)) (error "port is not a number: ~s." port)) @@ -784,37 +820,93 @@ URI ~s contains illegal character ~s at position ~d." (defun decode-escaped-encoding (string escape &optional (reserved-chars *reserved-characters*)) - ;; Return a string with the real characters. + ;;Return a string with the real characters. (when (null escape) (return-from decode-escaped-encoding string)) - (do* ((i 0 (1+ i)) - (max (length string)) - (new-string (copy-seq string)) - (new-i 0 (1+ new-i)) - ch ch2 chc chc2) - ((= i max) - (shrink-vector new-string new-i)) - (if* (char= #\% (setq ch (char string i))) - then (when (> (+ i 3) max) - (.parse-error - "Unsyntactic escaped encoding in ~s." string)) - (setq ch (char string (incf i))) - (setq ch2 (char string (incf i))) - (when (not (and (setq chc (digit-char-p ch 16)) - (setq chc2 (digit-char-p ch2 16)))) - (.parse-error - "Non-hexidecimal digits after %: %c%c." ch ch2)) - (let ((ci (+ (* 16 chc) chc2))) - (if* (or (null reserved-chars) - (> ci 127) ; bug11527 - (= 0 (sbit reserved-chars ci))) - then ;; ok as is - (setf (char new-string new-i) - (code-char ci)) - else (setf (char new-string new-i) #\%) - (setf (char new-string (incf new-i)) ch) - (setf (char new-string (incf new-i)) ch2))) - else (setf (char new-string new-i) ch)))) + (let ((curpos 0) + (maxpos (length string)) + (strs nil)) + (flet ((next-ansii-substring () + (let ((pos (or (position #\% string :start curpos) + maxpos))) + (when (and (= curpos 0) + (= pos maxpos)) + (return-from decode-escaped-encoding string)) + (when (< curpos pos) + (push (subseq string + curpos + pos) + strs) + (setf curpos pos)))) + (next-encoded-substring () + (let ((pos (or (loop for i from curpos below maxpos by 3 + unless (char= (aref string i) + #\%) + return i) + maxpos))) + (when (< curpos pos) + (let ((octets (handler-case (make-array (/ (- pos curpos) + 3) + :element-type '(unsigned-byte 8) + :fill-pointer 0) + (error () + (.parse-error "Unsyntactic escaped encoding in ~s." string))))) + (loop for i from curpos below pos by 3 + do (vector-push (handler-case + (parse-integer string + :start (1+ i) + :end (+ i 3) + :radix 16) + (error () + (.parse-error "Non-hexidecimal digits after %: ~c~c." + (aref string (1+ i)) + (aref string (+ 2 i))))) + octets)) + + (let* ((decoded-string (babel:octets-to-string octets + :encoding :utf-8)) + (rpos (if reserved-chars + (position-if #'(lambda (ch) + (not (or (> (char-code ch) 127) + (= (sbit reserved-chars (char-code ch)) 0)))) + decoded-string)))) + (push (if rpos + (with-output-to-string (out) + (loop for ch across decoded-string + with i = curpos + do (let ((code (char-code ch))) + (cond + ((or (null reserved-chars) + (> code 127) + (= (sbit reserved-chars code) 0)) + (write-char ch out) + (incf i + (* (cond + ((< code #x80) 1) + ((< code #x800) 2) + ((< code #x10000) 3) + ((< code #x200000) 4) + ((< code #x4000000) 5) + (t 6)) + 3))) + (t (write-string (subseq string i (+ i 3)) out) + (incf i 3)))))) + decoded-string) + strs)))) + (setf curpos pos)))) + (loop + while (< curpos maxpos) + do (next-ansii-substring) + while (< curpos maxpos) + do (next-encoded-substring))) + (if (cdr strs) + (apply #'concatenate + 'string + (nreverse strs)) + (or (car strs) + "")))) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Printing @@ -846,11 +938,10 @@ URI ~s contains illegal character ~s at position ~d." #+allegro (with-output-to-string (s) (excl::maybe-print-fast s port)) ) - (when path - (encode-escaped-encoding path - nil - ;;*reserved-path-characters* - escape)) + (encode-escaped-encoding (or path "/") + nil + ;;*reserved-path-characters* + escape) (when query "?") (when query (encode-escaped-encoding query nil escape)) (when fragment "#") @@ -895,34 +986,17 @@ URI ~s contains illegal character ~s at position ~d." then (format stream "~a" (uri-string urn)) else (uri-string urn))) -(defparameter *escaped-encoding* - (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f)) - (defun encode-escaped-encoding (string reserved-chars escape) (when (null escape) (return-from encode-escaped-encoding string)) - ;; Make a string as big as it possibly needs to be (3 times the original - ;; size), and truncate it at the end. - (do* ((max (length string)) - (new-max (* 3 max)) ;; worst case new size - (new-string (make-string new-max)) - (i 0 (1+ i)) - (new-i -1) - c ci) - ((= i max) - (shrink-vector new-string (incf new-i))) - (setq ci (char-int (setq c (char string i)))) - (if* (or (null reserved-chars) - (> ci 127) - (= 0 (sbit reserved-chars ci))) - then ;; ok as is - (incf new-i) - (setf (char new-string new-i) c) - else ;; need to escape it - (multiple-value-bind (q r) (truncate ci 16) - (setf (char new-string (incf new-i)) #\%) - (setf (char new-string (incf new-i)) (elt *escaped-encoding* q)) - (setf (char new-string (incf new-i)) - (elt *escaped-encoding* r)))))) + (with-output-to-string (out) + (loop for ch across string + do (let ((code (char-code ch))) + (if (and (< code 128) + (or (null reserved-chars) + (= 0 (sbit reserved-chars code)))) + (write-char ch out) + (loop for octet across (babel:string-to-octets (string ch) :encoding :utf-8) + do (format out "%~(~2,'0x~)" octet))))))) (defmethod print-object ((uri uri) stream) (if* *print-escape* @@ -1315,13 +1389,6 @@ Executes the forms once for each uri with var bound to the current uri" "#u takes a string or list argument: ~s" arg))))) -#+allegro -excl:: -#+allegro -(locally (declare (special std-lisp-readtable)) - (let ((*readtable* std-lisp-readtable)) - (set-dispatch-macro-character #\# #\u #'puri::sharp-u))) -#-allegro (set-dispatch-macro-character #\# #\u #'puri::sharp-u) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;