From: Christophe Rhodes Date: Wed, 11 Nov 2009 17:21:05 +0000 (+0000) Subject: 1.0.32.20: bug fixes in unibyte external formats X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=314ebe7a98015e25eb6dc750962b0726feac1b2c;p=sbcl.git 1.0.32.20: bug fixes in unibyte external formats * handling undefined codepoints: There's a difference between "unassigned codepoint", represented as ( nil), and "codepoint mapping to character with that codepoint", represented by the absence of an entry in the exceptions list. Simply testing for trueness of (cadr (assoc exceptions)) isn't good enough; test for trueness of (cdr (assoc ...)) and use the car if so. * undefined codepoints as the first exception: If the first exception to code<->byte is an undefined character, as for example in iso-8859-8, the lowest-code exception was computed wrongly, leading to incorrect encoding. --- diff --git a/NEWS b/NEWS index 4ebd920..55377ca 100644 --- a/NEWS +++ b/NEWS @@ -30,9 +30,10 @@ changes relative to sbcl-1.0.32: ** fix the bug underlying the expected failure in the FORCE-END-OF-FILE restart on fd-stream decoding errors. ** fix a bug in the ATTEMPT-RESYNC fd-stream decoding restart when the - error is near the end of file + error is near the end of file. ** fix a double-error case in unibyte octet conversions, when the first use of USE-VALUE is ignored. + ** fix bugs in handling of undefined code points in unibyte encodings. * enhancement: SB-INTROSPECT:ALLOCATION-INFORMATION also reports if the object is allocated in a boxed region of dynamic space. * bug fix: uses of slot accessors on specialized method parameters within diff --git a/src/code/octets.lisp b/src/code/octets.lisp index d8c0376..bb63673 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -183,9 +183,8 @@ one-past-the-end" finally (return elements))) ;; Find the smallest character code such that the corresponding ;; byte is != to the code. - (lowest-non-equivalent-code (position-if-not #'(lambda (pair) - (apply #'= pair)) - pairs)) + (lowest-non-equivalent-code + (caar (sort (copy-seq exceptions) #'< :key #'car))) ;; Sort them for our lookup table. (sorted-pairs (sort (subseq pairs lowest-non-equivalent-code) #'< :key #'car)) @@ -199,9 +198,9 @@ one-past-the-end" ,(make-array 256 :element-type t #+nil 'char-code :initial-contents (loop for byte below 256 collect - (let ((exception (cadr (assoc byte exceptions)))) + (let ((exception (cdr (assoc byte exceptions)))) (if exception - exception + (car exception) byte))))) (code-to-byte-table ,(make-array (length sorted-lookup-table) diff --git a/tests/external-format.impure.lisp b/tests/external-format.impure.lisp index 0845e6b..ee8aed1 100644 --- a/tests/external-format.impure.lisp +++ b/tests/external-format.impure.lisp @@ -366,4 +366,13 @@ (assert (or (= i (char-code #\?)) (> i 127)))) (t (assert (and (not (= i (char-code #\?))) (< i 128))))))))) +(with-test (:name (:unibyte-invalid-codepoints :cp857)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format :cp857) + (handler-case (read-char s) + (error () (assert (member i '(#xd5 #xe7 #xf2)))) + (:no-error (char) (assert (not (member i '(#xd5 #xe7 #xf2))))))))) + ;;;; success diff --git a/version.lisp-expr b/version.lisp-expr index 7a8e631..6ac0cbb 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.32.19" +"1.0.32.20"