(dpb v2 (byte 8 8) v3))))
(if (= length 1)
(string (code-char entry))
- (let ((result (make-string length))
- (e (* 4 entry)))
- (dotimes (i length result)
- (let ((code (dpb (aref long-decompositions (+ e 1))
- (byte 8 16)
- (dpb (aref long-decompositions (+ e 2))
- (byte 8 8)
- (aref long-decompositions (+ e 3))))))
- (setf (char result i) (code-char code)))
- (incf e 4))))))
+ (if (<= #xac00 cp #xd7a3)
+ ;; see Unicode 6.2, section 3-12
+ (let* ((sbase #xac00)
+ (lbase #x1100)
+ (vbase #x1161)
+ (tbase #x11a7)
+ (lcount 19)
+ (vcount 21)
+ (tcount 28)
+ (ncount (* vcount tcount))
+ (scount (* lcount ncount))
+ (sindex (- cp sbase))
+ (lindex (floor sindex ncount))
+ (vindex (floor (mod sindex ncount) tcount))
+ (tindex (mod sindex tcount))
+ (result (make-string length)))
+ (declare (ignore scount))
+ (setf (char result 0) (code-char (+ lbase lindex)))
+ (setf (char result 1) (code-char (+ vbase vindex)))
+ (when (> tindex 0)
+ (setf (char result 2) (code-char (+ tbase tindex))))
+ result)
+ (let ((result (make-string length))
+ (e (* 4 entry)))
+ (dotimes (i length result)
+ (let ((code (dpb (aref long-decompositions (+ e 1))
+ (byte 8 16)
+ (dpb (aref long-decompositions (+ e 2))
+ (byte 8 8)
+ (aref long-decompositions (+ e 3))))))
+ (setf (char result i) (code-char code)))
+ (incf e 4)))))))
(defun decompose-char (char)
(if (= (char-decomposition-info char) 0)
while line
do (slurp-ucd-line line)))
(second-pass)
- (build-misc-table)
(fixup-hangul-syllables)
+ (build-misc-table)
(length *long-decompositions*))
(defun fixup-hangul-syllables ()
(let* ((l (+ lbase (floor sindex ncount)))
(v (+ vbase (floor (mod sindex ncount) tcount)))
(tee (+ tbase (mod sindex tcount)))
+ (code-point (+ sbase sindex))
(name (format nil "HANGUL_SYLLABLE_~A~A~:[~A~;~]"
(gethash l table) (gethash v table)
(= tee tbase) (gethash tee table))))
- (setf (gethash (+ sbase sindex) *unicode-names*) name)))))
+ (setf (gethash code-point *unicode-names*) name)
+ (unless (aref *decomposition-base* (cp-high code-point))
+ (setf (aref *decomposition-base* (cp-high code-point))
+ (make-array (ash 1 *page-size-exponent*)
+ :initial-element nil)))
+ (setf (aref (aref *decomposition-base* (cp-high code-point))
+ (cp-low code-point))
+ (cons (if (= tee tbase) 2 3) 0))))))
(defun add-jamo-information (line table)
(let* ((split (split-string line #\;))
(defun split-string (line character)
(loop for prev-position = 0 then (1+ position)
- for position = (position character line :start prev-position)
- collect (subseq line prev-position position)
- do (unless position
- (loop-finish))))
+ for position = (position character line :start prev-position)
+ collect (subseq line prev-position position)
+ do (unless position
+ (loop-finish))))
(defun init-indices (strings)
(let ((hash (make-hash-table :test #'equal)))
(loop for string in strings
- for index from 0
- do (setf (gethash string hash) index))
+ for index from 0
+ do (setf (gethash string hash) index))
hash))
(defparameter *general-categories*
(prog1 (fill-pointer *long-decompositions*)
(dolist (code decomposition)
(vector-push-extend code *long-decompositions*)))))))))
+ ;; Hangul decomposition; see Unicode 6.2 section 3-12
+ (when (= code-point #xd7a3)
+ ;; KLUDGE: it's a bit ugly to do this here when we've got
+ ;; a reasonable function to do this in
+ ;; (FIXUP-HANGUL-SYLLABLES). The problem is that the
+ ;; fixup would be somewhat tedious to do, what with all
+ ;; the careful hashing of misc data going on.
+ (setf decomposition-info 1)
+ ;; the construction of *decomposition-base* entries is,
+ ;; however, easy to handle within FIXUP-HANGUL-SYLLABLES.
+ )
(when (and (string/= "" simple-uppercase)
(string/= "" simple-lowercase))
(push (list code-point upper-index lower-index) *both-cases*))