- (string= ", First>" name :start2 (- (length name) 8)))
- (progn
- (setq *block-first* code-point)
- nil)
- (let* ((gc-index (or (gethash general-category *general-categories*)
- (error "unknown general category ~A"
- general-category)))
- (bidi-index (or (gethash bidi-class *bidi-classes*)
- (error "unknown bidirectional class ~A"
- bidi-class)))
- (ccc-index (parse-integer canonical-combining-class))
- (digit-index (unless (string= "" decimal-digit)
- (parse-integer decimal-digit)))
- (upper-index (unless (string= "" simple-uppercase)
- (parse-integer simple-uppercase :radix 16)))
- (lower-index (unless (string= "" simple-lowercase)
- (parse-integer simple-lowercase :radix 16)))
- (title-index (unless (string= "" simple-titlecase)
- (parse-integer simple-titlecase :radix 16)))
- (cl-both-case-p
- (not (null (or (and (= gc-index 0) lower-index)
- (and (= gc-index 1) upper-index)))))
- (misc-index (hash-misc gc-index bidi-index ccc-index
- decimal-digit digit bidi-mirrored
- cl-both-case-p)))
- (declare (ignore digit-index))
- (incf *name-size* (length name))
- (when (string/= "" decomposition-type-and-mapping)
- (let ((split (split-string decomposition-type-and-mapping
- #\Space)))
- (when (char= #\< (aref (first split) 0))
- (setf (gethash (pop split) *decomposition-types*) t))
- (unless (aref *decomposition-base* (cp-high code-point))
- (setf (aref *decomposition-base* (cp-high code-point))
- (make-array (ash 1 *page-size-exponent*)
- :initial-element nil)))
- (setf (aref (aref *decomposition-base* (cp-high code-point))
- (cp-low code-point))
- (mapcar #'(lambda (string)
- (parse-integer string :radix 16))
- split))
- (setq *decomposition-length-max*
- (max *decomposition-length-max* (length split)))
- (incf *decompositions* (length split))))
- (when (and (string/= "" simple-uppercase)
- (string/= "" simple-lowercase))
- (push (list code-point upper-index lower-index) *both-cases*))
- (when (string/= simple-uppercase simple-titlecase)
- (push (cons code-point title-index) *different-titlecases*))
- (when (string/= digit numeric)
- (push (cons code-point numeric) *different-numerics*))
- (cond
- ((= gc-index 8)
- (unless *last-uppercase*
- (incf *uppercase-transition-count*))
- (setq *last-uppercase* t))
- (t
- (when *last-uppercase*
- (incf *uppercase-transition-count*))
- (setq *last-uppercase* nil)))
- (when (> ccc-index 255)
- (error "canonical combining class too large ~A" ccc-index))
- (let ((result (vector misc-index (or upper-index lower-index 0))))
- (when (and (> (length name) 7)
- (string= ", Last>" name :start2 (- (length name) 7)))
- (let ((page-start (ash (+ *block-first*
- (ash 1 *page-size-exponent*)
- -1)
- (- *page-size-exponent*)))
- (page-end (ash code-point (- *page-size-exponent*))))
- (loop for point from *block-first*
- below (ash page-start *page-size-exponent*)
- do (setf (aref (aref *ucd-base* (cp-high point))
- (cp-low point))
- result))
- (loop for page from page-start below page-end
- do (setf (aref *ucd-base* page)
- (make-array (ash 1 *page-size-exponent*)
- :initial-element result)))
- (loop for point from (ash page-end *page-size-exponent*)
- below code-point
- do (setf (aref (aref *ucd-base* (cp-high point))
- (cp-low point))
- result))))
- result)))))
+ (string= ", First>" name :start2 (- (length name) 8)))
+ (progn
+ (setq *block-first* code-point)
+ nil)
+ (let* ((gc-index (or (gethash general-category *general-categories*)
+ (error "unknown general category ~A"
+ general-category)))
+ (bidi-index (or (gethash bidi-class *bidi-classes*)
+ (error "unknown bidirectional class ~A"
+ bidi-class)))
+ (ccc-index (parse-integer canonical-combining-class))
+ (digit-index (unless (string= "" decimal-digit)
+ (parse-integer decimal-digit)))
+ (upper-index (unless (string= "" simple-uppercase)
+ (parse-integer simple-uppercase :radix 16)))
+ (lower-index (unless (string= "" simple-lowercase)
+ (parse-integer simple-lowercase :radix 16)))
+ (title-index (unless (string= "" simple-titlecase)
+ (parse-integer simple-titlecase :radix 16)))
+ (cl-both-case-p
+ (not (null (or (and (= gc-index 0) lower-index)
+ (and (= gc-index 1) upper-index)))))
+ (decomposition-info 0))
+ (declare (ignore digit-index))
+ (when (and (not cl-both-case-p)
+ (< gc-index 2))
+ (format t "~A~%" name))
+ (incf *name-size* (length name))
+ (when (string/= "" decomposition-type-and-mapping)
+ (let ((split (split-string decomposition-type-and-mapping #\Space)))
+ (cond
+ ((char= #\< (aref (first split) 0))
+ (unless (position (first split) *decomposition-types*
+ :test #'equal)
+ (vector-push (first split) *decomposition-types*))
+ (setf decomposition-info (position (pop split) *decomposition-types* :test #'equal)))
+ (t (setf decomposition-info 1)))
+ (unless (aref *decomposition-base* (cp-high code-point))
+ (setf (aref *decomposition-base* (cp-high code-point))
+ (make-array (ash 1 *page-size-exponent*)
+ :initial-element nil)))
+ (setf (aref (aref *decomposition-base* (cp-high code-point))
+ (cp-low code-point))
+ (let ((decomposition
+ (mapcar #'(lambda (string)
+ (parse-integer string :radix 16))
+ split)))
+ (when (= decomposition-info 1)
+ ;; Primary composition excludes:
+ ;; * singleton decompositions;
+ ;; * decompositions of non-starters;
+ ;; * script-specific decompositions;
+ ;; * later-version decompositions;
+ ;; * decompositions whose first character is a
+ ;; non-starter.
+ ;; All but the last case can be handled here;
+ ;; for the fixup, see FIXUP-COMPOSITIONS
+ (when (and (> (length decomposition) 1)
+ (= ccc-index 0)
+ (not (member code-point *exclusions*)))
+ (unless (= (length decomposition) 2)
+ (error "canonical decomposition unexpectedly long"))
+ (setf (gethash (cons (first decomposition)
+ (second decomposition))
+ *comp-table*)
+ code-point)))
+ (if (= (length decomposition) 1)
+ (cons 1 (car decomposition))
+ (cons (length decomposition)
+ (prog1 (fill-pointer *long-decompositions*)
+ (dolist (code decomposition)
+ (vector-push-extend code *long-decompositions*)))))))))
+ ;; Hangul decomposition; see Unicode 6.2 section 3-12
+ (when (= code-point #xd7a3)
+ ;; KLUDGE: it's a bit ugly to do this here when we've got
+ ;; a reasonable function to do this in
+ ;; (FIXUP-HANGUL-SYLLABLES). The problem is that the
+ ;; fixup would be somewhat tedious to do, what with all
+ ;; the careful hashing of misc data going on.
+ (setf decomposition-info 1)
+ ;; the construction of *decomposition-base* entries is,
+ ;; however, easy to handle within FIXUP-HANGUL-SYLLABLES.
+ )
+ (when (and (string/= "" simple-uppercase)
+ (string/= "" simple-lowercase))
+ (push (list code-point upper-index lower-index) *both-cases*))
+ (when (string/= simple-uppercase simple-titlecase)
+ (push (cons code-point title-index) *different-titlecases*))
+ (when (string/= digit numeric)
+ (push (cons code-point numeric) *different-numerics*))
+ (cond
+ ((= gc-index 8)
+ (unless *last-uppercase*
+ (incf *uppercase-transition-count*))
+ (setq *last-uppercase* t))
+ (t
+ (when *last-uppercase*
+ (incf *uppercase-transition-count*))
+ (setq *last-uppercase* nil)))
+ (when (> ccc-index 255)
+ (error "canonical combining class too large ~A" ccc-index))
+ (let* ((misc-index (hash-misc gc-index bidi-index ccc-index
+ decimal-digit digit bidi-mirrored
+ cl-both-case-p decomposition-info))
+ (result (make-ucd :misc misc-index
+ :transform (or upper-index lower-index 0))))
+ (when (and (> (length name) 7)
+ (string= ", Last>" name :start2 (- (length name) 7)))
+ (let ((page-start (ash (+ *block-first*
+ (ash 1 *page-size-exponent*)
+ -1)
+ (- *page-size-exponent*)))
+ (page-end (ash code-point (- *page-size-exponent*))))
+ (loop for point from *block-first*
+ below (ash page-start *page-size-exponent*)
+ do (setf (aref (aref *ucd-base* (cp-high point))
+ (cp-low point))
+ result))
+ (loop for page from page-start below page-end
+ do (setf (aref *ucd-base* page)
+ (make-array (ash 1 *page-size-exponent*)
+ :initial-element result)))
+ (loop for point from (ash page-end *page-size-exponent*)
+ below code-point
+ do (setf (aref (aref *ucd-base* (cp-high point))
+ (cp-low point))
+ result))))
+ (values result (normalize-character-name name)))))))