1 ;; These comparison functions heavily borrowed from SBCL/CMUCL (public domain).
3 (defun char= (character &rest more-characters)
4 (dolist (c more-characters t)
5 (unless (eql c character) (return nil))))
7 (defun char/= (character &rest more-characters)
8 (do* ((head character (car list))
9 (list more-characters (cdr list)))
12 (when (eql head c) (return-from char/= nil)))))
14 (defun char< (character &rest more-characters)
15 (do* ((c character (car list))
16 (list more-characters (cdr list)))
18 (unless (< (char-int c)
19 (char-int (car list)))
22 (defun char> (character &rest more-characters)
23 (do* ((c character (car list))
24 (list more-characters (cdr list)))
26 (unless (> (char-int c)
27 (char-int (car list)))
30 (defun char<= (character &rest more-characters)
31 (do* ((c character (car list))
32 (list more-characters (cdr list)))
34 (unless (<= (char-int c)
35 (char-int (car list)))
38 (defun char>= (character &rest more-characters)
39 (do* ((c character (car list))
40 (list more-characters (cdr list)))
42 (unless (>= (char-int c)
43 (char-int (car list)))
46 (defun character (character)
47 (cond ((characterp character)
49 ((and (stringp character)
50 (= 1 (length character)))
52 ((and (symbolp character)
53 (= 1 (length (symbol-name character))))
54 (symbol-name character))
56 (error "not a valid character designator"))))
58 ;; This list comes from SBCL: everything that's ALPHA-CHAR-P, but
59 ;; not SB-IMPL::UCD-DECIMAL-DIGIT (to work around <https://bugs.launchpad.net/sbcl/+bug/1177986>),
60 ;; then combined into a much smaller set of ranges. Yes, this can be compressed better.
61 (defconstant +unicode-alphas+
62 '((65 . 90) (97 . 122) (170 . 170) (181 . 181) (186 . 186) (192 . 214)
63 (216 . 246) (248 . 705) (710 . 721) (736 . 740) (748 . 748) (750 . 750)
64 (880 . 884) (886 . 887) (890 . 893) (902 . 902) (904 . 906) (908 . 908)
65 (910 . 929) (931 . 1013) (1015 . 1153) (1162 . 1317) (1329 . 1366)
66 (1369 . 1369) (1377 . 1415) (1488 . 1514) (1520 . 1522) (1569 . 1610)
67 (1646 . 1647) (1649 . 1747) (1749 . 1749) (1765 . 1766) (1774 . 1775)
68 (1786 . 1788) (1791 . 1791) (1808 . 1808) (1810 . 1839) (1869 . 1957)
69 (1969 . 1969) (1994 . 2026) (2036 . 2037) (2042 . 2042) (2048 . 2069)
70 (2074 . 2074) (2084 . 2084) (2088 . 2088) (2308 . 2361) (2365 . 2365)
71 (2384 . 2384) (2392 . 2401) (2417 . 2418) (2425 . 2431) (2437 . 2444)
72 (2447 . 2448) (2451 . 2472) (2474 . 2480) (2482 . 2482) (2486 . 2489)
73 (2493 . 2493) (2510 . 2510) (2524 . 2525) (2527 . 2529) (2544 . 2545)
74 (2565 . 2570) (2575 . 2576) (2579 . 2600) (2602 . 2608) (2610 . 2611)
75 (2613 . 2614) (2616 . 2617) (2649 . 2652) (2654 . 2654) (2674 . 2676)
76 (2693 . 2701) (2703 . 2705) (2707 . 2728) (2730 . 2736) (2738 . 2739)
77 (2741 . 2745) (2749 . 2749) (2768 . 2768) (2784 . 2785) (2821 . 2828)
78 (2831 . 2832) (2835 . 2856) (2858 . 2864) (2866 . 2867) (2869 . 2873)
79 (2877 . 2877) (2908 . 2909) (2911 . 2913) (2929 . 2929) (2947 . 2947)
80 (2949 . 2954) (2958 . 2960) (2962 . 2965) (2969 . 2970) (2972 . 2972)
81 (2974 . 2975) (2979 . 2980) (2984 . 2986) (2990 . 3001) (3024 . 3024)
82 (3077 . 3084) (3086 . 3088) (3090 . 3112) (3114 . 3123) (3125 . 3129)
83 (3133 . 3133) (3160 . 3161) (3168 . 3169) (3205 . 3212) (3214 . 3216)
84 (3218 . 3240) (3242 . 3251) (3253 . 3257) (3261 . 3261) (3294 . 3294)
85 (3296 . 3297) (3333 . 3340) (3342 . 3344) (3346 . 3368) (3370 . 3385)
86 (3389 . 3389) (3424 . 3425) (3450 . 3455) (3461 . 3478) (3482 . 3505)
87 (3507 . 3515) (3517 . 3517) (3520 . 3526) (3585 . 3632) (3634 . 3635)
88 (3648 . 3654) (3713 . 3714) (3716 . 3716) (3719 . 3720) (3722 . 3722)
89 (3725 . 3725) (3732 . 3735) (3737 . 3743) (3745 . 3747) (3749 . 3749)
90 (3751 . 3751) (3754 . 3755) (3757 . 3760) (3762 . 3763) (3773 . 3773)
91 (3776 . 3780) (3782 . 3782) (3804 . 3805) (3840 . 3840) (3904 . 3911)
92 (3913 . 3948) (3976 . 3979) (4096 . 4138) (4159 . 4159) (4176 . 4181)
93 (4186 . 4189) (4193 . 4193) (4197 . 4198) (4206 . 4208) (4213 . 4225)
94 (4238 . 4238) (4256 . 4293) (4304 . 4346) (4348 . 4348) (4352 . 4680)
95 (4682 . 4685) (4688 . 4694) (4696 . 4696) (4698 . 4701) (4704 . 4744)
96 (4746 . 4749) (4752 . 4784) (4786 . 4789) (4792 . 4798) (4800 . 4800)
97 (4802 . 4805) (4808 . 4822) (4824 . 4880) (4882 . 4885) (4888 . 4954)
98 (4992 . 5007) (5024 . 5108) (5121 . 5740) (5743 . 5759) (5761 . 5786)
99 (5792 . 5866) (5888 . 5900) (5902 . 5905) (5920 . 5937) (5952 . 5969)
100 (5984 . 5996) (5998 . 6000) (6016 . 6067) (6103 . 6103) (6108 . 6108)
101 (6176 . 6263) (6272 . 6312) (6314 . 6314) (6320 . 6389) (6400 . 6428)
102 (6480 . 6509) (6512 . 6516) (6528 . 6571) (6593 . 6599) (6656 . 6678)
103 (6688 . 6740) (6823 . 6823) (6917 . 6963) (6981 . 6987) (7043 . 7072)
104 (7086 . 7087) (7168 . 7203) (7245 . 7247) (7258 . 7293) (7401 . 7404)
105 (7406 . 7409) (7424 . 7615) (7680 . 7957) (7960 . 7965) (7968 . 8005)
106 (8008 . 8013) (8016 . 8023) (8025 . 8025) (8027 . 8027) (8029 . 8029)
107 (8031 . 8061) (8064 . 8116) (8118 . 8124) (8126 . 8126) (8130 . 8132)
108 (8134 . 8140) (8144 . 8147) (8150 . 8155) (8160 . 8172) (8178 . 8180)
109 (8182 . 8188) (8305 . 8305) (8319 . 8319) (8336 . 8340) (8450 . 8450)
110 (8455 . 8455) (8458 . 8467) (8469 . 8469) (8473 . 8477) (8484 . 8484)
111 (8486 . 8486) (8488 . 8488) (8490 . 8493) (8495 . 8505) (8508 . 8511)
112 (8517 . 8521) (8526 . 8526) (8579 . 8580) (11264 . 11310) (11312 . 11358)
113 (11360 . 11492) (11499 . 11502) (11520 . 11557) (11568 . 11621)
114 (11631 . 11631) (11648 . 11670) (11680 . 11686) (11688 . 11694)
115 (11696 . 11702) (11704 . 11710) (11712 . 11718) (11720 . 11726)
116 (11728 . 11734) (11736 . 11742) (11823 . 11823) (12293 . 12294)
117 (12337 . 12341) (12347 . 12348) (12353 . 12438) (12445 . 12447)
118 (12449 . 12538) (12540 . 12543) (12549 . 12589) (12593 . 12686)
119 (12704 . 12727) (12784 . 12799) (13312 . 19893) (19968 . 40907)
120 (40960 . 42124) (42192 . 42237) (42240 . 42508) (42512 . 42527)
121 (42538 . 42539) (42560 . 42591) (42594 . 42606) (42623 . 42647)
122 (42656 . 42725) (42775 . 42783) (42786 . 42888) (42891 . 42892)
123 (43003 . 43009) (43011 . 43013) (43015 . 43018) (43020 . 43042)
124 (43072 . 43123) (43138 . 43187) (43250 . 43255) (43259 . 43259)
125 (43274 . 43301) (43312 . 43334) (43360 . 43388) (43396 . 43442)
126 (43471 . 43471) (43520 . 43560) (43584 . 43586) (43588 . 43595)
127 (43616 . 43638) (43642 . 43642) (43648 . 43695) (43697 . 43697)
128 (43701 . 43702) (43705 . 43709) (43712 . 43712) (43714 . 43714)
129 (43739 . 43741) (43968 . 44002) (44032 . 55203) (55216 . 55238)
130 (55243 . 55291) (63744 . 64045) (64048 . 64109) (64112 . 64217)
131 (64256 . 64262) (64275 . 64279) (64285 . 64285) (64287 . 64296)
132 (64298 . 64310) (64312 . 64316) (64318 . 64318) (64320 . 64321)
133 (64323 . 64324) (64326 . 64433) (64467 . 64829) (64848 . 64911)
134 (64914 . 64967) (65008 . 65019) (65136 . 65140) (65142 . 65276)
135 (65313 . 65338) (65345 . 65370) (65382 . 65470) (65474 . 65479)
136 (65482 . 65487) (65490 . 65495) (65498 . 65500) (65536 . 65547)
137 (65549 . 65574) (65576 . 65594) (65596 . 65597) (65599 . 65613)
138 (65616 . 65629) (65664 . 65786) (66176 . 66204) (66208 . 66256)
139 (66304 . 66334) (66352 . 66368) (66370 . 66377) (66432 . 66461)
140 (66464 . 66499) (66504 . 66511) (66560 . 66717) (67584 . 67589)
141 (67592 . 67592) (67594 . 67637) (67639 . 67640) (67644 . 67644)
142 (67647 . 67669) (67840 . 67861) (67872 . 67897) (68096 . 68096)
143 (68112 . 68115) (68117 . 68119) (68121 . 68147) (68192 . 68220)
144 (68352 . 68405) (68416 . 68437) (68448 . 68466) (68608 . 68680)
145 (69763 . 69807) (73728 . 74606) (77824 . 78894) (119808 . 119892)
146 (119894 . 119964) (119966 . 119967) (119970 . 119970) (119973 . 119974)
147 (119977 . 119980) (119982 . 119993) (119995 . 119995) (119997 . 120003)
148 (120005 . 120069) (120071 . 120074) (120077 . 120084) (120086 . 120092)
149 (120094 . 120121) (120123 . 120126) (120128 . 120132) (120134 . 120134)
150 (120138 . 120144) (120146 . 120485) (120488 . 120512) (120514 . 120538)
151 (120540 . 120570) (120572 . 120596) (120598 . 120628) (120630 . 120654)
152 (120656 . 120686) (120688 . 120712) (120714 . 120744) (120746 . 120770)
153 (120772 . 120779) (131072 . 173782) (173824 . 177972) (194560 . 195101))
154 "(Start . End) ranges of codepoints for alphabetic characters, as of Unicode 6.2.")
156 (defun alpha-char-p (char)
157 (let ((code (char-code char)))
158 (dolist (alpha-pair +unicode-alphas+)
159 (when (<= (car alpha-pair) code (cdr alpha-pair))
160 (return-from alpha-char-p t)))
163 (defun alphanumericp (char)
164 ;; from the hyperspec:
165 (or (alpha-char-p char)
166 (not (null (digit-char-p char)))))
168 ;; I made this list by running DIGIT-CHAR-P in SBCL on every codepoint up to CHAR-CODE-LIMIT,
169 ;; filtering on only those with SB-IMPL::UCD-GENERAL-CATEGORY 12 (Nd), and then grouping
170 ;; consecutive sets. There's 37 spans of 10, plus 1 extra digit (6618).
171 (defconstant +unicode-zeroes+
172 '(48 1632 1776 1984 2406 2534 2662 2790 2918 3046 3174 3302 3430 3664
173 3792 3872 4160 4240 6112 6160 6470 6608 6784 6800 6992 7088 7232 7248
174 42528 43216 43264 43472 43600 44016 65296 66720 120782)
175 "Unicode codepoints which have Digit value 0, followed by 1, 2, ..., 9, as of Unicode 6.2")
177 ;; The "Digit value" of a (Unicode) character, or NIL, if it doesn't have one.
178 (defun unicode-digit-value (char)
179 (let ((code (char-code char)))
182 (dolist (z +unicode-zeroes+)
183 (when (<= z code (+ z 9))
184 (return-from unicode-digit-value (- code z)))))))
187 (defun digit-char (weight &optional (radix 10))
188 "All arguments must be integers. Returns a character object that represents
189 a digit of the given weight in the specified radix. Returns NIL if no such
191 (and ;; (typep weight 'fixnum)
192 (>= weight 0) (< weight radix) (< weight 36)
193 (code-char (if (< weight 10) (+ 48 weight) (+ 55 weight)))))
195 ;; borrowed from my proposed fix to SBCL: https://bugs.launchpad.net/sbcl/+bug/1177986
196 (defun digit-char-p (char &optional (radix 10))
197 (let ((number (unicode-digit-value char))
198 (code (char-code char))
199 (little-a (char-code #\a))
200 (big-a (char-code #\A)))
201 (cond ((and number (< number radix))
205 ((<= big-a code (+ big-a radix -10 -1))
206 (+ code (- big-a) 10))
207 ((<= little-a code (+ little-a radix -10 -1))
208 (+ code (- little-a) 10))
211 (defun graphic-char-p (char)
213 (let ((n (char-code char)))
217 (defun standard-char-p (char)
219 (and (let ((n (char-code char)))
223 (defun char-int (character)
224 ;; no implementation-defined character attributes
225 (char-code character))
227 (defconstant char-code-limit 1114111) ;; 0x10FFFF
229 (defconstant +ascii-names+
230 #("NULL" "START_OF_HEADING" "START_OF_TEXT" "END_OF_TEXT" "END_OF_TRANSMISSION" "ENQUIRY" "ACKNOWLEDGE"
231 "BELL" "Backspace" "Tab" "Newline" "LINE_TABULATION" "Page" "Return" "SHIFT_OUT" "SHIFT_IN"
232 "DATA_LINK_ESCAPE" "DEVICE_CONTROL_ONE" "DEVICE_CONTROL_TWO" "DEVICE_CONTROL_THREE" "DEVICE_CONTROL_FOUR"
233 "NEGATIVE_ACKNOWLEDGE" "SYNCHRONOUS_IDLE" "END_OF_TRANSMISSION_BLOCK" "CANCEL" "END_OF_MEDIUM" "SUBSTITUTE"
234 "ESCAPE" "INFORMATION_SEPARATOR_FOUR" "INFORMATION_SEPARATOR_THREE" "INFORMATION_SEPARATOR_TWO"
235 "INFORMATION_SEPARATOR_ONE" "Space" "EXCLAMATION_MARK" "QUOTATION_MARK" "NUMBER_SIGN" "DOLLAR_SIGN"
236 "PERCENT_SIGN" "AMPERSAND" "APOSTROPHE" "LEFT_PARENTHESIS" "RIGHT_PARENTHESIS" "ASTERISK" "PLUS_SIGN"
237 "COMMA" "HYPHEN-MINUS" "FULL_STOP" "SOLIDUS" "DIGIT_ZERO" "DIGIT_ONE" "DIGIT_TWO" "DIGIT_THREE" "DIGIT_FOUR"
238 "DIGIT_FIVE" "DIGIT_SIX" "DIGIT_SEVEN" "DIGIT_EIGHT" "DIGIT_NINE" "COLON" "SEMICOLON" "LESS-THAN_SIGN"
239 "EQUALS_SIGN" "GREATER-THAN_SIGN" "QUESTION_MARK" "COMMERCIAL_AT" "LATIN_CAPITAL_LETTER_A"
240 "LATIN_CAPITAL_LETTER_B" "LATIN_CAPITAL_LETTER_C" "LATIN_CAPITAL_LETTER_D" "LATIN_CAPITAL_LETTER_E"
241 "LATIN_CAPITAL_LETTER_F" "LATIN_CAPITAL_LETTER_G" "LATIN_CAPITAL_LETTER_H" "LATIN_CAPITAL_LETTER_I"
242 "LATIN_CAPITAL_LETTER_J" "LATIN_CAPITAL_LETTER_K" "LATIN_CAPITAL_LETTER_L" "LATIN_CAPITAL_LETTER_M"
243 "LATIN_CAPITAL_LETTER_N" "LATIN_CAPITAL_LETTER_O" "LATIN_CAPITAL_LETTER_P" "LATIN_CAPITAL_LETTER_Q"
244 "LATIN_CAPITAL_LETTER_R" "LATIN_CAPITAL_LETTER_S" "LATIN_CAPITAL_LETTER_T" "LATIN_CAPITAL_LETTER_U"
245 "LATIN_CAPITAL_LETTER_V" "LATIN_CAPITAL_LETTER_W" "LATIN_CAPITAL_LETTER_X" "LATIN_CAPITAL_LETTER_Y"
246 "LATIN_CAPITAL_LETTER_Z" "LEFT_SQUARE_BRACKET" "REVERSE_SOLIDUS" "RIGHT_SQUARE_BRACKET" "CIRCUMFLEX_ACCENT"
247 "LOW_LINE" "GRAVE_ACCENT" "LATIN_SMALL_LETTER_A" "LATIN_SMALL_LETTER_B" "LATIN_SMALL_LETTER_C"
248 "LATIN_SMALL_LETTER_D" "LATIN_SMALL_LETTER_E" "LATIN_SMALL_LETTER_F" "LATIN_SMALL_LETTER_G"
249 "LATIN_SMALL_LETTER_H" "LATIN_SMALL_LETTER_I" "LATIN_SMALL_LETTER_J" "LATIN_SMALL_LETTER_K"
250 "LATIN_SMALL_LETTER_L" "LATIN_SMALL_LETTER_M" "LATIN_SMALL_LETTER_N" "LATIN_SMALL_LETTER_O"
251 "LATIN_SMALL_LETTER_P" "LATIN_SMALL_LETTER_Q" "LATIN_SMALL_LETTER_R" "LATIN_SMALL_LETTER_S"
252 "LATIN_SMALL_LETTER_T" "LATIN_SMALL_LETTER_U" "LATIN_SMALL_LETTER_V" "LATIN_SMALL_LETTER_W"
253 "LATIN_SMALL_LETTER_X" "LATIN_SMALL_LETTER_Y" "LATIN_SMALL_LETTER_Z" "LEFT_CURLY_BRACKET" "VERTICAL_LINE"
254 "RIGHT_CURLY_BRACKET" "TILDE" "Rubout")
255 "Names/codepoints of the first 128 characters from Unicode 6.2,
256 except with Common Lisp's suggested changes.
257 For the first 32 characters ('C0 controls'), the first
258 'Commonly used alternative alias' is used -- note that this differs from SBCL, which uses abbreviations.")
259 ;; I hope being slightly different from SBCL doesn't bite me down the road.
260 ;; I'll figure out a good way to add the other 21701 names later.
262 (defun char-name (char)
263 ;; For consistency, I'm using the SBCL convention of the Unicode
264 ;; name, with spaces as underscores. It would be nice to use
265 ;; their "Uxxxx" convention for names I don't know, but there's
266 ;; not much in FORMAT yet. I'm only implementing ASCII names right
267 ;; now, since Unicode is kind of big.
269 (let ((code (char-code char)))
271 (aref +ascii-names+ code)
272 nil))) ;; for now, no name
274 (defun name-char (name)
275 (let ((name-upcase (string-upcase (string name))))
276 (dotimes (i (length +ascii-names+))
277 (when (string= name-upcase (string-upcase (aref +ascii-names+ i))) ;; poor man's STRING-EQUAL
278 (return-from name-char (code-char i))))