0.9.10.38:
authorJuho Snellman <jsnell@iki.fi>
Thu, 16 Mar 2006 03:24:13 +0000 (03:24 +0000)
committerJuho Snellman <jsnell@iki.fi>
Thu, 16 Mar 2006 03:24:13 +0000 (03:24 +0000)
        Add #\Uxxxx and #\Uxxxxxxxx read-syntax for characters. Make all
        characters readably printable even on non-Unicode streams. Patch by
        Robert Macomber (sbcl-devel "Unicode character names", 2006-03-06).

        * Also add a test.

src/code/print.lisp
src/code/target-char.lisp
tests/print.impure.lisp
version.lisp-expr

index 9d2ea8e..c17846b 100644 (file)
 ;;; the character name or the character in the #\char format.
 (defun output-character (char stream)
   (if (or *print-escape* *print-readably*)
-      (let ((graphicp (graphic-char-p char))
+      (let ((graphicp (and (graphic-char-p char)
+                           (standard-char-p char)))
             (name (char-name char)))
         (write-string "#\\" stream)
         (if (and name (not graphicp))
index 680afcc..18ea349 100644 (file)
         (let ((h-code (cdr (binary-search char-code
                                           (car *unicode-character-name-database*)
                                           :key #'car))))
-          (when h-code
-            (huffman-decode h-code *unicode-character-name-huffman-tree*))))))
+          (cond
+            (h-code
+             (huffman-decode h-code *unicode-character-name-huffman-tree*))
+            ((< char-code #x10000)
+             (format nil "U~4,'0X" char-code))
+            (t
+             (format nil "U~8,'0X" char-code)))))))
 
 (defun name-char (name)
   #!+sb-doc
           (let ((char-code
                  (car (binary-search encoding
                                      (cdr *unicode-character-name-database*)
-                                     :key #'cdr))))
-            (when char-code
-              (code-char char-code)))))))
+                                     :key #'cdr)))
+                (name-length (length name)))
+            (cond
+              (char-code
+               (code-char char-code))
+              ((and (or (= name-length 9)
+                        (= name-length 5))
+                    (char-equal (char name 0) #\U)
+                    (loop for i from 1 below name-length
+                          always (digit-char-p (char name i) 16)))
+               (code-char (parse-integer name :start 1 :radix 16)))
+              (t
+               nil)))))))
 \f
 ;;;; predicates
 
index c6b23e9..5aca94f 100644 (file)
                         (prin1 table))))
                   print-not-readable)))
 
+;; Test that we can print characters readably regardless of the external format
+;; of the stream.
+
+(defun test-readable-character (character external-format)
+  (let ((file "print.impure.tmp"))
+    (unwind-protect
+         (progn
+           (with-open-file (stream file
+                                   :direction :output
+                                   :external-format external-format
+                                   :if-exists :supersede)
+             (write character :stream stream :readably t))
+           (with-open-file (stream file
+                                   :direction :input
+                                   :external-format external-format
+                                   :if-does-not-exist :error)
+             (assert (char= (read stream) character))))
+      (ignore-errors
+        (delete-file file)))))
+
+#+sb-unicode
+(with-test (:name (:print-readable :character :utf-8))
+  (test-readable-character (code-char #xfffe) :utf-8))
+
+#+sb-unicode
+(with-test (:name (:print-readable :character :iso-8859-1))
+  (test-readable-character (code-char #xfffe) :iso-8859-1))
+
 ;;; success
index 7bdcb65..a7bca37 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.10.37"
+"0.9.10.38"