beginnings of decomposition
[sbcl.git] / src / code / target-char.lisp
index 262cead..0c2b5f7 100644 (file)
@@ -33,9 +33,9 @@
                                   :directory
                                   '(:relative :up :up "output")
                                   :name name :type type)
-                                 sb!xc:*compile-file-truename*)))
-         (let ((character-database
-                (with-open-file (stream (file "ucd" "dat")
+                                 sb!xc:*compile-file-truename*))
+              (read-ub8-vector (pathname)
+                (with-open-file (stream pathname
                                         :direction :input
                                         :element-type '(unsigned-byte 8))
                   (let* ((length (file-length stream))
                                  length :element-type '(unsigned-byte 8))))
                     (read-sequence array stream)
                     array))))
+         (let ((character-database (read-ub8-vector (file "ucd" "dat")))
+               (decompositions (read-ub8-vector (file "decomp" "dat")))
+               (long-decompositions (read-ub8-vector (file "ldecomp" "dat"))))
            `(progn
-              (declaim (type (simple-array (unsigned-byte 8) (*)) **character-database**))
+              (declaim (type (simple-array (unsigned-byte 8) (*)) **character-database** **character-decompositions** **character-long-decompositions**))
               (defglobal **character-database** ,character-database)
+              (defglobal **character-decompositions** ,decompositions)
+              (defglobal **character-long-decompositions** ,long-decompositions)
               (defun !character-database-cold-init ()
                 (setf **character-database** ,character-database))
               ,(with-open-file (stream (file "ucd-names" "lisp-expr")
@@ -602,3 +607,39 @@ character exists."
   (and (typep weight 'fixnum)
        (>= weight 0) (< weight radix) (< weight 36)
        (code-char (if (< weight 10) (+ 48 weight) (+ 55 weight)))))
+\f
+(defun char-decomposition-info (char)
+  (aref **character-database** (+ 6 (* 8 (ucd-value-0 char)))))
+
+(defun char-decomposition (char)
+  (let* ((cp (char-code char))
+         (cp-high (ash cp -8))
+         (decompositions **character-decompositions**)
+         (long-decompositions **character-long-decompositions**)
+         (index (+ #x1100
+                   (ash  (aref decompositions cp-high) 10)
+                   (ash (ldb (byte 8 0) cp) 2)))
+         (v0 (aref decompositions index))
+         (v1 (aref decompositions (+ index 1)))
+         (v2 (aref decompositions (+ index 2)))
+         (v3 (aref decompositions (+ index 3)))
+         (length (dpb v0 (byte 8 3) (ldb (byte 3 5) v1)))
+         (entry (dpb (ldb (byte 5 0) v1) (byte 5 16)
+                     (dpb v2 (byte 8 8) v3))))
+    (if (= length 1)
+        (string (code-char entry))
+        (let ((result (make-string length))
+              (e (* 4 entry)))
+          (dotimes (i length result)
+            (let ((code (dpb (aref long-decompositions (+ e 1))
+                             (byte 8 16)
+                             (dpb (aref long-decompositions (+ e 2))
+                                  (byte 8 8)
+                                  (aref long-decompositions (+ e 3))))))
+              (setf (char result i) (code-char code)))
+            (incf e 4))))))
+
+(defun decompose-char (char)
+  (if (= (char-decomposition-info char) 0)
+      (string char)
+      (char-decomposition char)))