5 (defparameter *output-directory*
7 (make-pathname :directory '(:relative :up "output"))
8 (make-pathname :directory (pathname-directory *load-truename*))))
10 (defparameter *page-size-exponent* 8)
13 (ash cp (- *page-size-exponent*)))
16 (ldb (byte *page-size-exponent* 0) cp))
20 (defstruct ucd misc transform)
22 (defparameter *unicode-character-database*
23 (make-pathname :directory (pathname-directory *load-truename*)))
25 (defparameter *ucd-base* nil)
26 (defparameter *unicode-names* (make-hash-table))
28 (defparameter *last-uppercase* nil)
29 (defparameter *uppercase-transition-count* 0)
30 (defparameter *different-titlecases* nil)
31 (defparameter *different-numerics* nil)
32 (defparameter *name-size* 0)
33 (defparameter *misc-hash* (make-hash-table :test #'equal))
34 (defparameter *misc-index* -1)
35 (defparameter *misc-table* nil)
36 (defparameter *misc-mapping* nil)
37 (defparameter *both-cases* nil)
38 (defparameter *long-decompositions* nil)
39 (defparameter *decomposition-types* nil)
40 (defparameter *decomposition-base* nil)
42 (defun hash-misc (gc-index bidi-index ccc-index decimal-digit digit
43 bidi-mirrored cl-both-case-p decomposition-info)
44 (let* ((list (list gc-index bidi-index ccc-index decimal-digit digit
45 bidi-mirrored cl-both-case-p decomposition-info))
46 (index (gethash list *misc-hash*)))
49 (vector-push list *misc-table*)
50 (setf (gethash list *misc-hash*)
51 (incf *misc-index*))))))
53 (defun gc-index-sort-key (gc-index)
54 (or (cdr (assoc gc-index '((1 . 2) (2 . 1)))) gc-index))
56 (defun compare-misc-entry (left right)
57 (destructuring-bind (left-gc-index left-bidi-index left-ccc-index
58 left-decimal-digit left-digit left-bidi-mirrored
59 left-cl-both-case-p left-decomposition-info)
61 (destructuring-bind (right-gc-index right-bidi-index right-ccc-index
62 right-decimal-digit right-digit right-bidi-mirrored
63 right-cl-both-case-p right-decomposition-info)
65 (or (and left-cl-both-case-p (not right-cl-both-case-p))
66 (and (or left-cl-both-case-p (not right-cl-both-case-p))
67 (or (< (gc-index-sort-key left-gc-index)
68 (gc-index-sort-key right-gc-index))
69 (and (= left-gc-index right-gc-index)
70 (or (< left-decomposition-info right-decomposition-info)
71 (and (= left-decomposition-info right-decomposition-info)
72 (or (< left-bidi-index right-bidi-index)
73 (and (= left-bidi-index right-bidi-index)
74 (or (< left-ccc-index right-ccc-index)
75 (and (= left-ccc-index right-ccc-index)
76 (or (string< left-decimal-digit
78 (and (string= left-decimal-digit
80 (or (string< left-digit right-digit)
81 (and (string= left-digit
83 (string< left-bidi-mirrored
84 right-bidi-mirrored))))))))))))))))))
86 (defun build-misc-table ()
87 (let ((table (sort *misc-table* #'compare-misc-entry)))
88 ;; after sorting, insert at the end a special entry to handle
89 ;; unallocated characters.
90 (setf *misc-table* (make-array (1+ (length table))))
91 (replace *misc-table* table)
92 (setf (aref *misc-table* (length table))
93 ;; unallocated characters have a GC index of 31 (not
94 ;; colliding with any other GC), are not digits or decimal
95 ;; digits, aren't BOTH-CASE-P, don't decompose, and aren't
96 ;; interestingly bidi or combining.
97 '(31 0 0 "" "" "" nil 0)))
98 (setq *misc-mapping* (make-array (1+ *misc-index*)))
99 (loop for i from 0 to *misc-index*
100 do (setf (aref *misc-mapping*
101 (gethash (aref *misc-table* i) *misc-hash*))
104 (defvar *comp-table*)
107 (with-open-file (s (make-pathname :name "CompositionExclusions" :type "txt"
108 :defaults *unicode-character-database*))
109 (do ((line (read-line s nil nil) (read-line s nil nil))
112 (when (and (> (length line) 0)
113 (char/= (char line 0) #\#))
114 (push (parse-integer line :end (position #\Space line) :radix 16)
118 (setf *comp-table* (make-hash-table :test 'equal))
119 (setq *last-uppercase* nil)
120 (setq *uppercase-transition-count* 0)
121 (setq *different-titlecases* nil)
122 (setq *different-numerics* nil)
124 (setq *misc-hash* (make-hash-table :test #'equal))
125 (setq *misc-index* -1)
126 (setq *misc-table* (make-array 2048 :fill-pointer 0))
127 (setq *both-cases* nil)
128 (setq *long-decompositions*
129 (make-array 2048 :fill-pointer 0 :adjustable t))
130 (setq *decomposition-types*
131 (let ((array (make-array 256 :initial-element nil :fill-pointer 1)))
132 (vector-push "" array)
133 (vector-push "<compat>" array)
135 (setq *decomposition-base* (make-array (ash #x110000
136 (- *page-size-exponent*))
137 :initial-element nil))
138 (setq *ucd-base* (make-array (ash #x110000 (- *page-size-exponent*))
139 :initial-element nil))
140 (with-open-file (*standard-input*
141 (make-pathname :name "UnicodeData"
143 :defaults *unicode-character-database*)
145 (loop for line = (read-line nil nil)
147 do (slurp-ucd-line line)))
150 (fixup-hangul-syllables)
152 (length *long-decompositions*))
154 (defun fixup-compositions ()
157 (ucd (aref (aref *ucd-base* (cp-high cp)) (cp-low cp)))
158 (misc (aref *misc-table* (ucd-misc ucd)))
159 (ccc-index (third misc)))
160 ;; we can do everything in the first pass except for
161 ;; accounting for decompositions where the first
162 ;; character of the decomposition is not a starter.
163 (when (/= ccc-index 0)
164 (remhash k *comp-table*)))))
165 (maphash #'fixup *comp-table*)))
167 (defun fixup-hangul-syllables ()
168 ;; "Hangul Syllable Composition, Unicode 5.1 section 3-12"
169 (let* ((sbase #xac00)
177 (ncount (* vcount tcount))
178 (table (make-hash-table)))
179 (with-open-file (*standard-input*
180 (make-pathname :name "Jamo" :type "txt"
181 :defaults *unicode-character-database*))
182 (loop for line = (read-line nil nil)
184 if (position #\; line)
185 do (add-jamo-information line table)))
186 (dotimes (sindex scount)
187 (let* ((l (+ lbase (floor sindex ncount)))
188 (v (+ vbase (floor (mod sindex ncount) tcount)))
189 (tee (+ tbase (mod sindex tcount)))
190 (code-point (+ sbase sindex))
191 (name (format nil "HANGUL_SYLLABLE_~A~A~:[~A~;~]"
192 (gethash l table) (gethash v table)
193 (= tee tbase) (gethash tee table))))
194 (setf (gethash code-point *unicode-names*) name)
195 (unless (aref *decomposition-base* (cp-high code-point))
196 (setf (aref *decomposition-base* (cp-high code-point))
197 (make-array (ash 1 *page-size-exponent*)
198 :initial-element nil)))
199 (setf (aref (aref *decomposition-base* (cp-high code-point))
201 (cons (if (= tee tbase) 2 3) 0))))))
203 (defun add-jamo-information (line table)
204 (let* ((split (split-string line #\;))
205 (code (parse-integer (first split) :radix 16))
206 (syllable (string-trim '(#\Space)
207 (subseq (second split) 0 (position #\# (second split))))))
208 (setf (gethash code table) syllable)))
210 (defun split-string (line character)
211 (loop for prev-position = 0 then (1+ position)
212 for position = (position character line :start prev-position)
213 collect (subseq line prev-position position)
217 (defun init-indices (strings)
218 (let ((hash (make-hash-table :test #'equal)))
219 (loop for string in strings
221 do (setf (gethash string hash) index))
224 (defparameter *general-categories*
225 (init-indices '("Lu" "Ll" "Lt" "Lm" "Lo" "Cc" "Cf" "Co" "Cs" "Mc"
226 "Me" "Mn" "Nd" "Nl" "No" "Pc" "Pd" "Pe" "Pf" "Pi"
227 "Po" "Ps" "Sc" "Sk" "Sm" "So" "Zl" "Zp" "Zs")))
228 (defparameter *bidi-classes*
229 (init-indices '("AL" "AN" "B" "BN" "CS" "EN" "ES" "ET" "L" "LRE" "LRO"
230 "NSM" "ON" "PDF" "R" "RLE" "RLO" "S" "WS")))
233 (defparameter *block-first* nil)
235 (defun normalize-character-name (name)
236 (when (find #\_ name)
237 (error "Bad name for a character: ~A" name))
238 (unless (or (zerop (length name)) (find #\< name) (find #\> name))
239 (substitute #\_ #\Space name)))
241 ;;; 3400 -- 4DB5 : cjk ideograph extension a ;Lo;0;L;;;;;N;;;;;
242 ;;; AC00 -- D7A3 : hangul syllables ;Lo;0;L;;;;;N;;;;;
243 ;;; D800 -- F8FF : surrogates and private use
244 ;;; 20000 -- 2A6D6 : cjk ideograph extension b ;Lo;0;L;;;;;N;;;;;
245 ;;; F0000 -- FFFFD : private use
246 ;;; 100000 -- 10FFFD: private use
247 (defun encode-ucd-line (line code-point)
248 (destructuring-bind (name general-category canonical-combining-class
249 bidi-class decomposition-type-and-mapping
250 decimal-digit digit numeric bidi-mirrored
251 unicode-1-name iso-10646-comment simple-uppercase
252 simple-lowercase simple-titlecase)
254 (declare (ignore unicode-1-name iso-10646-comment))
255 (if (and (> (length name) 8)
256 (string= ", First>" name :start2 (- (length name) 8)))
258 (setq *block-first* code-point)
260 (let* ((gc-index (or (gethash general-category *general-categories*)
261 (error "unknown general category ~A"
263 (bidi-index (or (gethash bidi-class *bidi-classes*)
264 (error "unknown bidirectional class ~A"
266 (ccc-index (parse-integer canonical-combining-class))
267 (digit-index (unless (string= "" decimal-digit)
268 (parse-integer decimal-digit)))
269 (upper-index (unless (string= "" simple-uppercase)
270 (parse-integer simple-uppercase :radix 16)))
271 (lower-index (unless (string= "" simple-lowercase)
272 (parse-integer simple-lowercase :radix 16)))
273 (title-index (unless (string= "" simple-titlecase)
274 (parse-integer simple-titlecase :radix 16)))
276 (not (null (or (and (= gc-index 0) lower-index)
277 (and (= gc-index 1) upper-index)
278 ;; deal with prosgegrammeni / titlecase
280 (typep code-point '(integer #x1000 #x1fff))
282 (decomposition-info 0))
283 (declare (ignore digit-index))
284 (when (and (not cl-both-case-p)
286 (format t "~A~%" name))
287 (incf *name-size* (length name))
288 (when (string/= "" decomposition-type-and-mapping)
289 (let ((split (split-string decomposition-type-and-mapping #\Space)))
291 ((char= #\< (aref (first split) 0))
292 (unless (position (first split) *decomposition-types*
294 (vector-push (first split) *decomposition-types*))
295 (setf decomposition-info (position (pop split) *decomposition-types* :test #'equal)))
296 (t (setf decomposition-info 1)))
297 (unless (aref *decomposition-base* (cp-high code-point))
298 (setf (aref *decomposition-base* (cp-high code-point))
299 (make-array (ash 1 *page-size-exponent*)
300 :initial-element nil)))
301 (setf (aref (aref *decomposition-base* (cp-high code-point))
304 (mapcar #'(lambda (string)
305 (parse-integer string :radix 16))
307 (when (= decomposition-info 1)
308 ;; Primary composition excludes:
309 ;; * singleton decompositions;
310 ;; * decompositions of non-starters;
311 ;; * script-specific decompositions;
312 ;; * later-version decompositions;
313 ;; * decompositions whose first character is a
315 ;; All but the last case can be handled here;
316 ;; for the fixup, see FIXUP-COMPOSITIONS
317 (when (and (> (length decomposition) 1)
319 (not (member code-point *exclusions*)))
320 (unless (= (length decomposition) 2)
321 (error "canonical decomposition unexpectedly long"))
322 (setf (gethash (cons (first decomposition)
323 (second decomposition))
326 (if (= (length decomposition) 1)
327 (cons 1 (car decomposition))
328 (cons (length decomposition)
329 (prog1 (fill-pointer *long-decompositions*)
330 (dolist (code decomposition)
331 (vector-push-extend code *long-decompositions*)))))))))
332 ;; Hangul decomposition; see Unicode 6.2 section 3-12
333 (when (= code-point #xd7a3)
334 ;; KLUDGE: it's a bit ugly to do this here when we've got
335 ;; a reasonable function to do this in
336 ;; (FIXUP-HANGUL-SYLLABLES). The problem is that the
337 ;; fixup would be somewhat tedious to do, what with all
338 ;; the careful hashing of misc data going on.
339 (setf decomposition-info 1)
340 ;; the construction of *decomposition-base* entries is,
341 ;; however, easy to handle within FIXUP-HANGUL-SYLLABLES.
343 (when (and (string/= "" simple-uppercase)
344 (string/= "" simple-lowercase))
345 (push (list code-point upper-index lower-index) *both-cases*))
346 (when (string/= simple-uppercase simple-titlecase)
347 (push (cons code-point title-index) *different-titlecases*))
348 (when (string/= digit numeric)
349 (push (cons code-point numeric) *different-numerics*))
352 (unless *last-uppercase*
353 (incf *uppercase-transition-count*))
354 (setq *last-uppercase* t))
356 (when *last-uppercase*
357 (incf *uppercase-transition-count*))
358 (setq *last-uppercase* nil)))
359 (when (> ccc-index 255)
360 (error "canonical combining class too large ~A" ccc-index))
361 (let* ((misc-index (hash-misc gc-index bidi-index ccc-index
362 decimal-digit digit bidi-mirrored
363 cl-both-case-p decomposition-info))
364 (result (make-ucd :misc misc-index
365 :transform (or upper-index lower-index 0))))
366 (when (and (> (length name) 7)
367 (string= ", Last>" name :start2 (- (length name) 7)))
368 (let ((page-start (ash (+ *block-first*
369 (ash 1 *page-size-exponent*)
371 (- *page-size-exponent*)))
372 (page-end (ash code-point (- *page-size-exponent*))))
373 (loop for point from *block-first*
374 below (ash page-start *page-size-exponent*)
375 do (setf (aref (aref *ucd-base* (cp-high point))
378 (loop for page from page-start below page-end
379 do (setf (aref *ucd-base* page)
380 (make-array (ash 1 *page-size-exponent*)
381 :initial-element result)))
382 (loop for point from (ash page-end *page-size-exponent*)
384 do (setf (aref (aref *ucd-base* (cp-high point))
387 (values result (normalize-character-name name)))))))
389 (defun slurp-ucd-line (line)
390 (let* ((split-line (split-string line #\;))
391 (code-point (parse-integer (first split-line) :radix 16))
392 (code-high (ash code-point (- *page-size-exponent*)))
393 (code-low (ldb (byte *page-size-exponent* 0) code-point)))
394 (unless (aref *ucd-base* code-high)
395 (setf (aref *ucd-base* code-high)
396 (make-array (ash 1 *page-size-exponent*)
397 :initial-element nil)))
398 (multiple-value-bind (encoding name)
399 (encode-ucd-line (cdr split-line) code-point)
400 (setf (aref (aref *ucd-base* code-high) code-low) encoding
401 (gethash code-point *unicode-names*) name))))
403 ;;; this fixes up the case conversion discrepancy between CL and
404 ;;; Unicode: CL operators depend on char-downcase / char-upcase being
405 ;;; inverses, which is not true in general in Unicode even for
406 ;;; characters which change case to single characters.
407 (defun second-pass ()
408 (dotimes (i (length *ucd-base*))
409 (let ((base (aref *ucd-base* i)))
410 (dotimes (j (length base)) ; base is NIL or an array
411 (let ((result (aref base j)))
413 ;; fixup case mappings for CL/Unicode mismatch
414 (let* ((transform-point (ucd-transform result))
415 (transform-high (ash transform-point
416 (- *page-size-exponent*)))
417 (transform-low (ldb (byte *page-size-exponent* 0)
419 (when (and (plusp transform-point)
421 (aref (aref *ucd-base* transform-high)
423 (+ (ash i *page-size-exponent*) j)))
424 (destructuring-bind (gc-index bidi-index ccc-index
425 decimal-digit digit bidi-mirrored
426 cl-both-case-p decomposition-info)
427 (aref *misc-table* (ucd-misc result))
428 (declare (ignore cl-both-case-p))
429 (format t "~A~%" (+ (ash i *page-size-exponent*) j))
430 (setf (ucd-misc result)
431 (hash-misc gc-index bidi-index ccc-index
432 decimal-digit digit bidi-mirrored
433 nil decomposition-info)))))))))))
435 (defun write-4-byte (quadruplet stream)
436 (write-byte (ldb (byte 8 24) quadruplet) stream)
437 (write-byte (ldb (byte 8 16) quadruplet) stream)
438 (write-byte (ldb (byte 8 8) quadruplet) stream)
439 (write-byte (ldb (byte 8 0) quadruplet) stream))
441 (defun digit-to-byte (digit)
442 (if (string= "" digit)
444 (parse-integer digit)))
446 (defun output-ucd-data ()
447 (let ((hash (make-hash-table :test #'equalp))
449 (loop for page across *ucd-base*
451 (unless (gethash page hash)
452 (setf (gethash page hash)
454 (let ((array (make-array (1+ index))))
455 (maphash #'(lambda (key value)
456 (setf (aref array value) key))
459 (make-array (ash 1 *page-size-exponent*) :initial-element nil))
460 (with-open-file (stream (make-pathname :name "ucd"
462 :defaults *output-directory*)
464 :element-type '(unsigned-byte 8)
465 :if-exists :supersede
466 :if-does-not-exist :create)
467 (loop for (gc-index bidi-index ccc-index decimal-digit digit
468 bidi-mirrored nil decomposition-info)
470 ;; three bits spare here
471 do (write-byte gc-index stream)
472 ;; three bits spare here
473 do (write-byte bidi-index stream)
474 do (write-byte ccc-index stream)
475 ;; we could save some space here: decimal-digit and
476 ;; digit are constrained (CHECKME) to be between 0 and
477 ;; 9, so we could encode the pair in a single byte.
478 ;; (Also, decimal-digit is equal to digit or undefined,
479 ;; so we could encode decimal-digit as a single bit,
480 ;; meaning that we could save 11 bits here.
481 do (write-byte (digit-to-byte decimal-digit) stream)
482 do (write-byte (digit-to-byte digit) stream)
483 ;; there's an easy 7 bits to spare here
484 do (write-byte (if (string= "N" bidi-mirrored) 0 1) stream)
485 ;; at the moment we store information about which type
486 ;; of compatibility decomposition is used, costing c.3
487 ;; bits. We could elide that.
488 do (write-byte decomposition-info stream)
489 do (write-byte 0 stream))
490 (loop for page across *ucd-base*
491 do (write-byte (if page (gethash page hash) 0) stream))
492 (loop for page across array
493 do (loop for entry across page
496 (aref *misc-mapping* (ucd-misc entry))
497 ;; the last entry in *MISC-TABLE* (see
498 ;; BUILD-MISC-TABLE) is special,
499 ;; reserved for the information for
500 ;; characters unallocated by Unicode.
501 (1- (length *misc-table*)))
503 (if entry (ucd-transform entry) 0))
506 ;;; KLUDGE: this code, to write out decomposition information, is a
507 ;;; little bit very similar to the ucd entries above. Try factoring
508 ;;; out the common stuff?
509 (defun output-decomposition-data ()
510 (let ((hash (make-hash-table :test #'equalp))
512 (loop for page across *decomposition-base*
514 (unless (gethash page hash)
515 (setf (gethash page hash)
516 (prog1 index (incf index))))))
517 (let ((array (make-array index)))
518 (maphash #'(lambda (key value)
519 (setf (aref array value) key))
521 (with-open-file (stream (make-pathname :name "decomp" :type "dat"
522 :defaults *output-directory*)
524 :element-type '(unsigned-byte 8)
525 :if-exists :supersede
526 :if-does-not-exist :create)
527 (loop for page across *decomposition-base*
528 do (write-byte (if page (gethash page hash) 0) stream))
529 (loop for page across array
530 do (loop for entry across page
532 (dpb (if entry (car entry) 0)
534 (if entry (cdr entry) 0))
536 (with-open-file (stream (make-pathname :name "ldecomp" :type "dat"
537 :defaults *output-directory*)
539 :element-type '(unsigned-byte 8)
540 :if-exists :supersede
541 :if-does-not-exist :create)
542 (loop for code across (copy-seq *long-decompositions*)
543 do (write-4-byte code stream))))))
545 (defun output-composition-data ()
547 (let (firsts seconds)
550 (pushnew (car k) firsts)
551 (pushnew (cdr k) seconds)))
552 (maphash #'frob *comp-table*)))
553 (with-open-file (stream (make-pathname :name "comp" :type "dat"
554 :defaults *output-directory*)
556 :element-type '(unsigned-byte 8)
557 :if-exists :supersede :if-does-not-exist :create)
558 (maphash (lambda (k v)
559 (write-4-byte (car k) stream)
560 (write-4-byte (cdr k) stream)
561 (write-4-byte v stream))
566 (output-decomposition-data)
567 (output-composition-data)
568 (with-open-file (f (make-pathname :name "ucd-names" :type "lisp-expr"
569 :defaults *output-directory*)
571 :if-exists :supersede
572 :if-does-not-exist :create)
573 (with-standard-io-syntax
574 (write-string ";;; Do not edit by hand: generated by ucd.lisp" f)
575 (maphash (lambda (code name)
580 (setf *unicode-names* nil))
581 (with-open-file (*standard-output*
582 (make-pathname :name "numerics"
584 :defaults *output-directory*)
586 :if-exists :supersede
587 :if-does-not-exist :create)
588 (with-standard-io-syntax
589 (let ((*print-pretty* t))
590 (prin1 (mapcar #'(lambda (x) (cons (car x) (read-from-string (cdr x))))
591 *different-numerics*)))))
592 (with-open-file (*standard-output*
593 (make-pathname :name "titlecases"
595 :defaults *output-directory*)
597 :if-exists :supersede
598 :if-does-not-exist :create)
599 (with-standard-io-syntax
600 (let ((*print-pretty* t))
601 (prin1 *different-titlecases*))))
602 (with-open-file (*standard-output*
603 (make-pathname :name "misc"
605 :defaults *output-directory*)
607 :if-exists :supersede
608 :if-does-not-exist :create)
609 (with-standard-io-syntax
610 (let ((*print-pretty* t))
611 (prin1 `(:length ,(length *misc-table*)
612 :uppercase ,(loop for (gc-index) across *misc-table*
616 :lowercase ,(loop for (gc-index) across *misc-table*
620 :titlecase ,(loop for (gc-index) across *misc-table*
626 ;;; Use of the generated files
628 (defparameter *compiled-ucd* nil)
630 (defun read-compiled-ucd ()
631 (with-open-file (stream (make-pathname :name "ucd"
633 :defaults *output-directory*)
635 :element-type '(unsigned-byte 8))
636 (let ((length (file-length stream)))
638 (make-array length :element-type '(unsigned-byte 8)))
639 (read-sequence *compiled-ucd* stream)))
642 ;;; The stuff below is dependent on misc.lisp-expr being
644 ;;; (:LENGTH 395 :UPPERCASE (0 1 2 3 8 9 10 11) :LOWERCASE (4 5 6 7 12 13 14 15) :TITLECASE (16 17))
646 ;;; There are two groups of entries for UPPERCASE and LOWERCASE
647 ;;; because some characters have case (by Unicode standards) but are
648 ;;; not transformable character-by-character in a locale-independent
649 ;;; way (as CL requires for its standard operators).
651 ;;; for more details on these debugging functions, see the description
652 ;;; of the character database format in src/code/target-char.lisp
654 (defparameter *length* 395)
657 (let* ((cp-high (cp-high cp))
658 (page (aref *compiled-ucd* (+ (* 8 *length*) cp-high))))
660 (ash #x110000 (- *page-size-exponent*))
661 (* (ash 4 *page-size-exponent*) page)
664 (defun cp-value-0 (cp)
665 (let ((index (cp-index cp)))
666 (dpb (aref *compiled-ucd* index)
668 (ldb (byte 3 5) (aref *compiled-ucd* (1+ index))))))
670 (defun cp-value-1 (cp)
671 (let ((index (cp-index cp)))
672 (dpb (aref *compiled-ucd* (1+ index)) (byte 5 16)
673 (dpb (aref *compiled-ucd* (+ index 2)) (byte 8 8)
674 (aref *compiled-ucd* (+ index 3))))))
676 (defun cp-general-category (cp)
677 (aref *compiled-ucd* (* 8 (cp-value-0 cp))))
679 (defun cp-decimal-digit (cp)
680 (let ((decimal-digit (aref *compiled-ucd* (+ 3 (* 8 (cp-value-0 cp))))))
681 (and (< decimal-digit 10)
684 (defun cp-alpha-char-p (cp)
685 (< (cp-general-category cp) 5))
687 (defun cp-alphanumericp (cp)
688 (let ((gc (cp-general-category cp)))
692 (defun cp-digit-char-p (cp &optional (radix 10))
693 (let ((number (or (cp-decimal-digit cp)
698 (when (and number (< number radix))
701 (defun cp-graphic-char-p (cp)
705 (defun cp-char-upcase (cp)
706 (if (< 3 (cp-value-0 cp) 8)
710 (defun cp-char-downcase (cp)
711 (if (< (cp-value-0 cp) 4)
715 (defun cp-upper-case-p (cp)
716 (< (cp-value-0 cp) 4))
718 (defun cp-lower-case-p (cp)
719 (< 3 (cp-value-0 cp) 8))
721 (defun cp-both-case-p (cp)
722 (< (cp-value-0 cp) 8))