X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fchar.lisp;h=cad81a23e36ad1b35168da657050b4d472a61231;hb=266509b078969a40bded783057fc15a873c75723;hp=adb14f1e15b004aba8565a48d4020bacebf131e0;hpb=a38d48850754e354abe9c99848a087bfd7062214;p=jscl.git diff --git a/src/char.lisp b/src/char.lisp index adb14f1..cad81a2 100644 --- a/src/char.lisp +++ b/src/char.lisp @@ -1,14 +1,123 @@ +(/debug "loading char.lisp!") + +;; These comparison functions heavily borrowed from SBCL/CMUCL (public domain). + +(defun char= (character &rest more-characters) + (dolist (c more-characters t) + (unless (eql c character) (return nil)))) + +(defun char/= (character &rest more-characters) + (do* ((head character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (dolist (c list) + (when (eql head c) (return-from char/= nil))))) + +(defun char< (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (< (char-int c) + (char-int (car list))) + (return nil)))) + +(defun char> (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (> (char-int c) + (char-int (car list))) + (return nil)))) + +(defun char<= (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (<= (char-int c) + (char-int (car list))) + (return nil)))) + +(defun char>= (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (>= (char-int c) + (char-int (car list))) + (return nil)))) + +(defun equal-char-code (character) + (char-code (char-upcase character))) + +(defun two-arg-char-equal (c1 c2) + (= (equal-char-code c1) (equal-char-code c2))) + +(defun char-equal (character &rest more-characters) + (do ((clist more-characters (cdr clist))) + ((null clist) t) + (unless (two-arg-char-equal (car clist) character) + (return nil)))) + +(defun char-not-equal (character &rest more-characters) + (do* ((head character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (do* ((l list (cdr l))) + ((null l) t) + (when (two-arg-char-equal head (car l)) + (return nil))) + (return nil)))) + +(defun two-arg-char-lessp (c1 c2) + (< (equal-char-code c1) (equal-char-code c2))) + +(defun char-lessp (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (two-arg-char-lessp c (car list)) + (return nil)))) + +(defun two-arg-char-greaterp (c1 c2) + (> (equal-char-code c1) (equal-char-code c2))) + +(defun char-greaterp (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (two-arg-char-greaterp c (car list)) + (return nil)))) + +(defun two-arg-char-not-greaterp (c1 c2) + (<= (equal-char-code c1) (equal-char-code c2))) + +(defun char-not-greaterp (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (two-arg-char-not-greaterp c (car list)) + (return nil)))) + +(defun two-arg-char-not-lessp (c1 c2) + (>= (equal-char-code c1) (equal-char-code c2))) + +(defun char-not-lessp (character &rest more-characters) + (do* ((c character (car list)) + (list more-characters (cdr list))) + ((null list) t) + (unless (two-arg-char-not-lessp c (car list)) + (return nil)))) + (defun character (character) (cond ((characterp character) - character) - ((and (stringp character) - (= 1 (length character))) - (char character 0)) - ((and (symbolp character) - (= 1 (length (symbol-name character)))) - (symbol-name character)) - (t - (error "not a valid character designator")))) + character) + ((and (stringp character) + (= 1 (length character))) + (char character 0)) + ((and (symbolp character) + (= 1 (length (symbol-name character)))) + (symbol-name character)) + (t + (error "not a valid character designator")))) ;; This list comes from SBCL: everything that's ALPHA-CHAR-P, but ;; not SB-IMPL::UCD-DECIMAL-DIGIT (to work around ), @@ -112,7 +221,7 @@ (let ((code (char-code char))) (dolist (alpha-pair +unicode-alphas+) (when (<= (car alpha-pair) code (cdr alpha-pair)) - (return-from alpha-char-p t))) + (return-from alpha-char-p t))) nil)) (defun alphanumericp (char) @@ -133,10 +242,10 @@ (defun unicode-digit-value (char) (let ((code (char-code char))) (if (= code 6618) - 1 ;; it's special! + 1 ;; it's special! (dolist (z +unicode-zeroes+) - (when (<= z code (+ z 9)) - (return-from unicode-digit-value (- code z))))))) + (when (<= z code (+ z 9)) + (return-from unicode-digit-value (- code z))))))) ;; from SBCL/CMUCL: (defun digit-char (weight &optional (radix 10)) @@ -147,21 +256,26 @@ character exists." (>= weight 0) (< weight radix) (< weight 36) (code-char (if (< weight 10) (+ 48 weight) (+ 55 weight))))) -;; borrowed from my proposed fix to SBCL: https://bugs.launchpad.net/sbcl/+bug/1177986 +;; From comment #4 on : (defun digit-char-p (char &optional (radix 10)) - (let ((number (unicode-digit-value char)) - (code (char-code char)) - (little-a (char-code #\a)) - (big-a (char-code #\A))) - (cond ((and number (< number radix)) - number) - (number - nil) - ((<= big-a code (+ big-a radix -10 -1)) - (+ code (- big-a) 10)) - ((<= little-a code (+ little-a radix -10 -1)) - (+ code (- little-a) 10)) - (t nil)))) + "Includes ASCII 0-9 a-z A-Z, plus Unicode HexDigit characters (fullwidth variants of 0-9 and A-F)." + (let* ((number (unicode-digit-value char)) + (code (char-code char)) + (upper (char-upcase char)) + (code-upper (char-code upper)) + (potential (cond (number number) + ((char<= #\0 char #\9) + (- code (char-code #\0))) + ((<= 65296 code 65305) ;; FULLWIDTH_DIGIT_ZERO, FULLWIDTH_DIGIT_NINE + (- code 65296)) + ((char<= #\A upper #\Z) + (+ 10 (- code-upper (char-code #\A)))) + ((<= 65313 (char-code upper) 65318) ;; FULLWIDTH_LATIN_CAPITAL_LETTER_A, FULLWIDTH_LATIN_CAPITAL_LETTER_F + (+ 10 (- code-upper 65313))) + (t nil)))) + (if (and potential (< potential radix)) + potential + nil))) (defun graphic-char-p (char) ;; from SBCL/CMUCL: @@ -175,8 +289,68 @@ character exists." (or (< 31 n 127) (= n 10))))) +(defun upper-case-p (character) + (char/= character (char-downcase character))) + +(defun lower-case-p (character) + (char/= character (char-upcase character))) + +(defun both-case-p (character) + (or (upper-case-p character) (lower-case-p character))) + (defun char-int (character) ;; no implementation-defined character attributes (char-code character)) (defconstant char-code-limit 1114111) ;; 0x10FFFF + +(defconstant +ascii-names+ + #("NULL" "START_OF_HEADING" "START_OF_TEXT" "END_OF_TEXT" "END_OF_TRANSMISSION" "ENQUIRY" "ACKNOWLEDGE" + "BELL" "Backspace" "Tab" "Newline" "LINE_TABULATION" "Page" "Return" "SHIFT_OUT" "SHIFT_IN" + "DATA_LINK_ESCAPE" "DEVICE_CONTROL_ONE" "DEVICE_CONTROL_TWO" "DEVICE_CONTROL_THREE" "DEVICE_CONTROL_FOUR" + "NEGATIVE_ACKNOWLEDGE" "SYNCHRONOUS_IDLE" "END_OF_TRANSMISSION_BLOCK" "CANCEL" "END_OF_MEDIUM" "SUBSTITUTE" + "ESCAPE" "INFORMATION_SEPARATOR_FOUR" "INFORMATION_SEPARATOR_THREE" "INFORMATION_SEPARATOR_TWO" + "INFORMATION_SEPARATOR_ONE" "Space" "EXCLAMATION_MARK" "QUOTATION_MARK" "NUMBER_SIGN" "DOLLAR_SIGN" + "PERCENT_SIGN" "AMPERSAND" "APOSTROPHE" "LEFT_PARENTHESIS" "RIGHT_PARENTHESIS" "ASTERISK" "PLUS_SIGN" + "COMMA" "HYPHEN-MINUS" "FULL_STOP" "SOLIDUS" "DIGIT_ZERO" "DIGIT_ONE" "DIGIT_TWO" "DIGIT_THREE" "DIGIT_FOUR" + "DIGIT_FIVE" "DIGIT_SIX" "DIGIT_SEVEN" "DIGIT_EIGHT" "DIGIT_NINE" "COLON" "SEMICOLON" "LESS-THAN_SIGN" + "EQUALS_SIGN" "GREATER-THAN_SIGN" "QUESTION_MARK" "COMMERCIAL_AT" "LATIN_CAPITAL_LETTER_A" + "LATIN_CAPITAL_LETTER_B" "LATIN_CAPITAL_LETTER_C" "LATIN_CAPITAL_LETTER_D" "LATIN_CAPITAL_LETTER_E" + "LATIN_CAPITAL_LETTER_F" "LATIN_CAPITAL_LETTER_G" "LATIN_CAPITAL_LETTER_H" "LATIN_CAPITAL_LETTER_I" + "LATIN_CAPITAL_LETTER_J" "LATIN_CAPITAL_LETTER_K" "LATIN_CAPITAL_LETTER_L" "LATIN_CAPITAL_LETTER_M" + "LATIN_CAPITAL_LETTER_N" "LATIN_CAPITAL_LETTER_O" "LATIN_CAPITAL_LETTER_P" "LATIN_CAPITAL_LETTER_Q" + "LATIN_CAPITAL_LETTER_R" "LATIN_CAPITAL_LETTER_S" "LATIN_CAPITAL_LETTER_T" "LATIN_CAPITAL_LETTER_U" + "LATIN_CAPITAL_LETTER_V" "LATIN_CAPITAL_LETTER_W" "LATIN_CAPITAL_LETTER_X" "LATIN_CAPITAL_LETTER_Y" + "LATIN_CAPITAL_LETTER_Z" "LEFT_SQUARE_BRACKET" "REVERSE_SOLIDUS" "RIGHT_SQUARE_BRACKET" "CIRCUMFLEX_ACCENT" + "LOW_LINE" "GRAVE_ACCENT" "LATIN_SMALL_LETTER_A" "LATIN_SMALL_LETTER_B" "LATIN_SMALL_LETTER_C" + "LATIN_SMALL_LETTER_D" "LATIN_SMALL_LETTER_E" "LATIN_SMALL_LETTER_F" "LATIN_SMALL_LETTER_G" + "LATIN_SMALL_LETTER_H" "LATIN_SMALL_LETTER_I" "LATIN_SMALL_LETTER_J" "LATIN_SMALL_LETTER_K" + "LATIN_SMALL_LETTER_L" "LATIN_SMALL_LETTER_M" "LATIN_SMALL_LETTER_N" "LATIN_SMALL_LETTER_O" + "LATIN_SMALL_LETTER_P" "LATIN_SMALL_LETTER_Q" "LATIN_SMALL_LETTER_R" "LATIN_SMALL_LETTER_S" + "LATIN_SMALL_LETTER_T" "LATIN_SMALL_LETTER_U" "LATIN_SMALL_LETTER_V" "LATIN_SMALL_LETTER_W" + "LATIN_SMALL_LETTER_X" "LATIN_SMALL_LETTER_Y" "LATIN_SMALL_LETTER_Z" "LEFT_CURLY_BRACKET" "VERTICAL_LINE" + "RIGHT_CURLY_BRACKET" "TILDE" "Rubout") + "Names/codepoints of the first 128 characters from Unicode 6.2, +except with Common Lisp's suggested changes. +For the first 32 characters ('C0 controls'), the first +'Commonly used alternative alias' is used -- note that this differs from SBCL, which uses abbreviations.") +;; I hope being slightly different from SBCL doesn't bite me down the road. +;; I'll figure out a good way to add the other 21701 names later. + +(defun char-name (char) + ;; For consistency, I'm using the SBCL convention of the Unicode + ;; name, with spaces as underscores. It would be nice to use + ;; their "Uxxxx" convention for names I don't know, but there's + ;; not much in FORMAT yet. I'm only implementing ASCII names right + ;; now, since Unicode is kind of big. + (let ((code (char-code char))) + (if (<= code 127) + (aref +ascii-names+ code) + nil))) ;; for now, no name + +(defun name-char (name) + (let ((name-upcase (string-upcase (string name)))) + (dotimes (i (length +ascii-names+)) + (when (string= name-upcase (string-upcase (aref +ascii-names+ i))) ;; poor man's STRING-EQUAL + (return-from name-char (code-char i)))) + nil))