X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-char.lisp;h=aa52adbceca7b73f3364a379d225418180addc6c;hb=b14a61c6af3e3005c94e633e727177346240066e;hp=e217f8e8dbcb04925974211b044db475512328f0;hpb=88429c4045707ceaf99a10801d5c5efdca765afa;p=sbcl.git diff --git a/src/code/target-char.lisp b/src/code/target-char.lisp index e217f8e..aa52adb 100644 --- a/src/code/target-char.lisp +++ b/src/code/target-char.lisp @@ -19,7 +19,7 @@ (declaim (maybe-inline digit-char-p digit-weight)) (deftype char-code () - `(integer 0 (,char-code-limit))) + `(integer 0 (,sb!xc:char-code-limit))) #!+sb-unicode (progn @@ -45,32 +45,38 @@ array)))) (let ((character-database (read-ub8-vector (file "ucd" "dat"))) (decompositions (read-ub8-vector (file "decomp" "dat"))) - (long-decompositions (read-ub8-vector (file "ldecomp" "dat")))) + (long-decompositions (read-ub8-vector (file "ldecomp" "dat"))) + (primary-compositions (read-ub8-vector (file "comp" "dat")))) `(progn - (declaim (type (simple-array (unsigned-byte 8) (*)) **character-database** **character-decompositions** **character-long-decompositions**)) + (declaim (type (simple-array (unsigned-byte 8) (*)) + **character-database** + **character-decompositions** + **character-long-decompositions**)) (defglobal **character-database** ,character-database) (defglobal **character-decompositions** ,decompositions) (defglobal **character-long-decompositions** ,long-decompositions) - (defglobal **character-primary-compositions** - (let ((table (make-hash-table)) - (info ,(read-ub8-vector (file "comp" "dat")))) - (flet ((code (j) - (dpb (aref info (* 4 j)) - (byte 8 24) - (dpb (aref info (+ (* 4 j) 1)) - (byte 8 16) - (dpb (aref info (+ (* 4 j) 2)) - (byte 8 8) - (aref info (+ (* 4 j) 3))))))) - #!+sb-unicode - (dotimes (i (/ (length info) 12)) - (setf (gethash (dpb (code (* 3 i)) (byte 21 21) - (code (1+ (* 3 i)))) - table) - (code-char (code (+ (* 3 i) 2))))) - table))) + ;; KLUDGE: temporary value, fixed up in cold-load + (defglobal **character-primary-compositions** ,primary-compositions) (defun !character-database-cold-init () - (setf **character-database** ,character-database)) + (setf **character-database** ,character-database) + (setf **character-primary-compositions** + (let ((table (make-hash-table)) + (info ,primary-compositions)) + (flet ((code (j) + (dpb (aref info (* 4 j)) + (byte 8 24) + (dpb (aref info (+ (* 4 j) 1)) + (byte 8 16) + (dpb (aref info (+ (* 4 j) 2)) + (byte 8 8) + (aref info (+ (* 4 j) 3))))))) + #!+sb-unicode + (dotimes (i (/ (length info) 12)) + (setf (gethash (dpb (code (* 3 i)) (byte 21 21) + (code (1+ (* 3 i)))) + table) + (code-char (code (+ (* 3 i) 2))))) + table)))) ,(with-open-file (stream (file "ucd-names" "lisp-expr") :direction :input :element-type 'character) @@ -400,20 +406,20 @@ argument is an alphabetic character, A-Z or a-z; otherwise NIL." #!+sb-doc "The argument must be a character object; UPPER-CASE-P returns T if the argument is an upper-case character, NIL otherwise." - (< (ucd-value-0 char) 4)) + (< (ucd-value-0 char) 5)) (defun lower-case-p (char) #!+sb-doc "The argument must be a character object; LOWER-CASE-P returns T if the argument is a lower-case character, NIL otherwise." - (< 3 (ucd-value-0 char) 8)) + (< 4 (ucd-value-0 char) 9)) (defun both-case-p (char) #!+sb-doc "The argument must be a character object. BOTH-CASE-P returns T if the argument is an alphabetic character and if the character exists in both upper and lower case. For ASCII, this is the same as ALPHA-CHAR-P." - (< (ucd-value-0 char) 8)) + (< (ucd-value-0 char) 9)) (defun digit-char-p (char &optional (radix 10.)) #!+sb-doc @@ -513,12 +519,36 @@ is either numeric or alphabetic." (defmacro equal-char-code (character) (let ((ch (gensym))) `(let ((,ch ,character)) - (if (< (ucd-value-0 ,ch) 4) + (if (< (ucd-value-0 ,ch) 5) (ucd-value-1 ,ch) (char-code ,ch))))) (defun two-arg-char-equal (c1 c2) - (= (equal-char-code c1) (equal-char-code c2))) + (flet ((base-char-equal-p () + (let* ((code1 (char-code c1)) + (code2 (char-code c2)) + (sum (logxor code1 code2))) + (when (eql sum #x20) + (let ((sum (+ code1 code2))) + (or (and (> sum 161) (< sum 213)) + (and (> sum 415) (< sum 461)) + (and (> sum 463) (< sum 477)))))))) + (declare (inline base-char-equal-p)) + (or (eq c1 c2) + #!-sb-unicode + (base-char-equal-p) + #!+sb-unicode + (typecase c1 + (base-char + (and (base-char-p c2) + (base-char-equal-p))) + (t + (= (equal-char-code c1) (equal-char-code c2))))))) + +(defun char-equal-constant (x char reverse-case-char) + (declare (type character x)) + (or (eq char x) + (eq reverse-case-char x))) (defun char-equal (character &rest more-characters) #!+sb-doc @@ -609,14 +639,14 @@ Case is ignored." #!+sb-doc "Return CHAR converted to upper-case if that is possible. Don't convert lowercase eszet (U+DF)." - (if (< 3 (ucd-value-0 char) 8) + (if (< 4 (ucd-value-0 char) 9) (code-char (ucd-value-1 char)) char)) (defun char-downcase (char) #!+sb-doc "Return CHAR converted to lower-case if that is possible." - (if (< (ucd-value-0 char) 4) + (if (< (ucd-value-0 char) 5) (code-char (ucd-value-1 char)) char))