do (slurp-ucd-line line)))
(second-pass)
(build-misc-table)
+ (fixup-hangul-syllables)
*decompositions*)
+(defun fixup-hangul-syllables ()
+ ;; "Hangul Syllable Composition, Unicode 5.1 section 3-12"
+ (let* ((sbase #xac00)
+ (lbase #x1100)
+ (vbase #x1161)
+ (tbase #x11a7)
+ (scount 11172)
+ (lcount 19)
+ (vcount 21)
+ (tcount 28)
+ (ncount (* vcount tcount))
+ (table (make-hash-table)))
+ (with-open-file (*standard-input*
+ (make-pathname :name "Jamo" :type "txt"
+ :defaults *unicode-character-database*))
+ (loop for line = (read-line nil nil)
+ while line
+ if (position #\; line)
+ do (add-jamo-information line table)))
+ (dotimes (sindex scount)
+ (let* ((l (+ lbase (floor sindex ncount)))
+ (v (+ vbase (floor (mod sindex ncount) tcount)))
+ (tee (+ tbase (mod sindex tcount)))
+ (name (format nil "HANGUL_SYLLABLE_~A~A~:[~A~;~]"
+ (gethash l table) (gethash v table)
+ (= tee tbase) (gethash tee table))))
+ (setf (gethash (+ sbase sindex) *unicode-names*) name)))))
+
+(defun add-jamo-information (line table)
+ (let* ((split (split-string line #\;))
+ (code (parse-integer (first split) :radix 16))
+ (syllable (string-trim '(#\Space)
+ (subseq (second split) 0 (position #\# (second split))))))
+ (setf (gethash code table) syllable)))
+
(defun split-string (line character)
(loop for prev-position = 0 then (1+ position)
for position = (position character line :start prev-position)
decimal-digit digit bidi-mirrored
cl-both-case-p)))
(declare (ignore digit-index))
+ (when (and (not cl-both-case-p)
+ (< gc-index 2))
+ (format t "~A~%" name))
(incf *name-size* (length name))
(when (string/= "" decomposition-type-and-mapping)
(let ((split (split-string decomposition-type-and-mapping
:direction :output
:if-exists :supersede
:if-does-not-exist :create)
- (let ((*print-pretty* t))
- (prin1 (mapcar #'(lambda (x) (cons (car x) (read-from-string (cdr x))))
- *different-numerics*))))
+ (with-standard-io-syntax
+ (let ((*print-pretty* t))
+ (prin1 (mapcar #'(lambda (x) (cons (car x) (read-from-string (cdr x))))
+ *different-numerics*)))))
(with-open-file (*standard-output*
(make-pathname :name "titlecases"
:type "lisp-expr"
:direction :output
:if-exists :supersede
:if-does-not-exist :create)
- (let ((*print-pretty* t))
- (prin1 *different-titlecases*)))
+ (with-standard-io-syntax
+ (let ((*print-pretty* t))
+ (prin1 *different-titlecases*))))
(with-open-file (*standard-output*
(make-pathname :name "misc"
:type "lisp-expr"
:direction :output
:if-exists :supersede
:if-does-not-exist :create)
- (let ((*print-pretty* t))
- (prin1 `(:length ,(length *misc-table*)
- :uppercase ,(loop for (gc-index) across *misc-table*
- for i from 0
- when (= gc-index 0)
- collect i)
- :lowercase ,(loop for (gc-index) across *misc-table*
- for i from 0
- when (= gc-index 1)
- collect i)
- :titlecase ,(loop for (gc-index) across *misc-table*
- for i from 0
- when (= gc-index 2)
- collect i)))))
+ (with-standard-io-syntax
+ (let ((*print-pretty* t))
+ (prin1 `(:length ,(length *misc-table*)
+ :uppercase ,(loop for (gc-index) across *misc-table*
+ for i from 0
+ when (= gc-index 0)
+ collect i)
+ :lowercase ,(loop for (gc-index) across *misc-table*
+ for i from 0
+ when (= gc-index 1)
+ collect i)
+ :titlecase ,(loop for (gc-index) across *misc-table*
+ for i from 0
+ when (= gc-index 2)
+ collect i))))))
(values))
;;; Use of the generated files
(values))
;;; The stuff below is dependent on misc.lisp-expr being
-;;; (:LENGTH 186 :UPPERCASE (0) :LOWERCASE (1) :TITLECASE (2))
-
-(defparameter *length* 186)
+;;; (:LENGTH 215 :UPPERCASE (0 2) :LOWERCASE (1 3) :TITLECASE (4)).
+;;;
+;;; There are two entries for UPPERCASE and LOWERCASE because some
+;;; characters have case (by Unicode standards) but are not
+;;; transformable character-by-character in a locale-independent way
+;;; (as CL requires for its standard operators).
+;;;
+;;; for more details on these debugging functions, see the description
+;;; of the character database format in src/code/target-char.lisp
+
+(defparameter *length* 215)
(defun cp-index (cp)
(let* ((cp-high (cp-high cp))