+ (length *long-decompositions*))
+
+(defun fixup-compositions ()
+ (flet ((fixup (k v)
+ (let* ((cp (car k))
+ (ucd (aref (aref *ucd-base* (cp-high cp)) (cp-low cp)))
+ (misc (aref *misc-table* (ucd-misc ucd)))
+ (ccc-index (third misc)))
+ ;; we can do everything in the first pass except for
+ ;; accounting for decompositions where the first
+ ;; character of the decomposition is not a starter.
+ (when (/= ccc-index 0)
+ (remhash k *comp-table*)))))
+ (maphash #'fixup *comp-table*)))
+
+(defun fixup-hangul-syllables ()
+ ;; "Hangul Syllable Composition, Unicode 5.1 section 3-12"
+ (let* ((sbase #xac00)
+ (lbase #x1100)
+ (vbase #x1161)
+ (tbase #x11a7)
+ (scount 11172)
+ (lcount 19)
+ (vcount 21)
+ (tcount 28)
+ (ncount (* vcount tcount))
+ (table (make-hash-table)))
+ (with-open-file (*standard-input*
+ (make-pathname :name "Jamo" :type "txt"
+ :defaults *unicode-character-database*))
+ (loop for line = (read-line nil nil)
+ while line
+ if (position #\; line)
+ do (add-jamo-information line table)))
+ (dotimes (sindex scount)
+ (let* ((l (+ lbase (floor sindex ncount)))
+ (v (+ vbase (floor (mod sindex ncount) tcount)))
+ (tee (+ tbase (mod sindex tcount)))
+ (code-point (+ sbase sindex))
+ (name (format nil "HANGUL_SYLLABLE_~A~A~:[~A~;~]"
+ (gethash l table) (gethash v table)
+ (= tee tbase) (gethash tee table))))
+ (setf (gethash code-point *unicode-names*) name)
+ (unless (aref *decomposition-base* (cp-high code-point))
+ (setf (aref *decomposition-base* (cp-high code-point))
+ (make-array (ash 1 *page-size-exponent*)
+ :initial-element nil)))
+ (setf (aref (aref *decomposition-base* (cp-high code-point))
+ (cp-low code-point))
+ (cons (if (= tee tbase) 2 3) 0))))))
+
+(defun add-jamo-information (line table)
+ (let* ((split (split-string line #\;))
+ (code (parse-integer (first split) :radix 16))
+ (syllable (string-trim '(#\Space)
+ (subseq (second split) 0 (position #\# (second split))))))
+ (setf (gethash code table) syllable)))