1 ;;;; This file is for testing external-format functionality, using
2 ;;;; test machinery which might have side effects (e.g. executing
3 ;;;; DEFUN, writing files). Note that the tests here reach into
4 ;;;; unexported functionality, and should not be used as a guide for
7 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; While most of SBCL is derived from the CMU CL system, the test
11 ;;;; files (like this one) were written from scratch after the fork
14 ;;;; This software is in the public domain and is provided with
15 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
16 ;;;; more information.
18 (defmacro do-external-formats ((xf &optional result) &body body)
20 `(loop for ,nxf being the hash-values of sb-impl::*external-formats*
21 do (let ((,xf (first (sb-impl::ef-names ,nxf))))
24 (defvar *test-path* "external-format-test.tmp")
26 (do-external-formats (xf)
27 (with-open-file (s #-win32 "/dev/null" #+win32 "nul" :direction :input :external-format xf)
28 (assert (eq (read-char s nil s) s))))
30 ;;; Test standard character read-write equivalency over all external formats.
31 (let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
32 (do-external-formats (xf)
33 (with-open-file (s *test-path* :direction :output
34 :if-exists :supersede :external-format xf)
35 (loop for character across standard-characters
36 do (write-char character s)))
37 (with-open-file (s *test-path* :direction :input
39 (loop for character across standard-characters
40 do (let ((got (read-char s)))
41 (unless (eql character got)
42 (error "wanted ~S, got ~S" character got)))))))
44 (delete-file *test-path*)
47 (test-util:report-test-status)
48 (sb-ext:quit :unix-status 104))
50 ;;; Test UTF-8 writing and reading of 1, 2, 3 and 4 octet characters with
51 ;;; all possible offsets. Tests for buffer edge bugs. fd-stream buffers are
54 (let ((character (code-char (elt '(1 #x81 #x801 #x10001) width-1))))
55 (dotimes (offset (+ width-1 1))
56 (with-open-file (s *test-path* :direction :output
57 :if-exists :supersede :external-format :utf-8)
60 (dotimes (n (+ 4 sb-impl::+bytes-per-buffer+))
61 (write-char character s)))
62 (with-open-file (s *test-path* :direction :input
63 :external-format :utf-8)
65 (assert (eql (read-char s) #\a)))
66 (dotimes (n (+ 4 sb-impl::+bytes-per-buffer+))
67 (let ((got (read-char s)))
68 (unless (eql got character)
69 (error "wanted ~S, got ~S (~S)" character got n))))
70 (assert (eql (read-char s nil s) s))))))
72 ;;; Test character decode restarts.
73 (with-open-file (s *test-path* :direction :output
74 :if-exists :supersede :element-type '(unsigned-byte 8))
79 (with-open-file (s *test-path* :direction :input
80 :external-format :utf-8)
83 ((sb-int:character-decoding-error #'(lambda (decoding-error)
84 (declare (ignore decoding-error))
85 (when (> (incf count) 1)
86 (error "too many errors"))
88 'sb-int:attempt-resync))))
89 (assert (equal (read-line s nil s) "ABC"))
90 (assert (equal (read-line s nil s) s)))))
91 (with-open-file (s *test-path* :direction :input
92 :external-format :utf-8)
95 ((sb-int:character-decoding-error #'(lambda (decoding-error)
96 (declare (ignore decoding-error))
97 (when (> (incf count) 1)
98 (error "too many errors"))
100 'sb-int:force-end-of-file))))
101 (assert (equal (read-line s nil s) "AB"))
103 (assert (equal (read-line s nil s) s)))))
105 ;;; And again with more data to account for buffering (this was briefly)
106 ;;; broken in early 0.9.6.
107 (with-open-file (s *test-path* :direction :output
108 :if-exists :supersede :element-type '(unsigned-byte 8))
109 (let ((a (make-array 50
110 :element-type '(unsigned-byte 64)
111 :initial-contents (map 'list #'char-code
112 "1234567890123456789012345678901234567890123456789."))))
113 (setf (aref a 49) (char-code #\Newline))
115 (write-sequence a s))
118 (write-sequence a s))))
119 (with-test (:name (:character-decode-large :attempt-resync))
120 (with-open-file (s *test-path* :direction :input
121 :external-format :utf-8)
124 ((sb-int:character-decoding-error (lambda (decoding-error)
125 (declare (ignore decoding-error))
126 (when (> (incf count) 1)
127 (error "too many errors"))
129 'sb-int:attempt-resync)))
130 ;; The failure mode is an infinite loop, add a timeout to
132 (sb-ext:timeout (lambda () (error "Timeout"))))
133 (sb-ext:with-timeout 5
135 (assert (equal (read-line s nil s)
136 "1234567890123456789012345678901234567890123456789"))))))))
138 (with-test (:name (:character-decode-large :force-end-of-file))
139 (with-open-file (s *test-path* :direction :input
140 :external-format :utf-8)
143 ((sb-int:character-decoding-error (lambda (decoding-error)
144 (declare (ignore decoding-error))
145 (when (> (incf count) 1)
146 (error "too many errors"))
148 'sb-int:force-end-of-file)))
149 ;; The failure mode is an infinite loop, add a timeout to detetct it.
150 (sb-ext:timeout (lambda () (error "Timeout"))))
151 (sb-ext:with-timeout 5
153 (assert (equal (read-line s nil s)
154 "1234567890123456789012345678901234567890123456789")))
156 (assert (equal (read-line s nil s) s)))))))
158 ;;; Test character encode restarts.
159 (with-open-file (s *test-path* :direction :output
160 :if-exists :supersede :external-format :latin-1)
162 ((sb-int:character-encoding-error #'(lambda (encoding-error)
163 (declare (ignore encoding-error))
165 'sb-impl::output-nothing))))
168 (write-char (code-char 322) s)
170 (with-open-file (s *test-path* :direction :input
171 :external-format :latin-1)
172 (assert (equal (read-line s nil s) "ABC"))
173 (assert (equal (read-line s nil s) s)))
175 (with-open-file (s *test-path* :direction :output
176 :if-exists :supersede :external-format :latin-1)
178 ((sb-int:character-encoding-error #'(lambda (encoding-error)
179 (declare (ignore encoding-error))
181 'sb-impl::output-nothing))))
182 (let ((string (make-array 4 :element-type 'character
183 :initial-contents `(#\A #\B ,(code-char 322)
185 (write-string string s))))
186 (with-open-file (s *test-path* :direction :input
187 :external-format :latin-1)
188 (assert (equal (read-line s nil s) "ABC"))
189 (assert (equal (read-line s nil s) s)))
191 ;;; Test skipping character-decode-errors in comments.
192 (let ((s (open "external-format-test.lisp" :direction :output
193 :if-exists :supersede :external-format :latin-1)))
196 (write-string ";;; ABCD" s)
197 (write-char (code-char 233) s)
200 (compile-file "external-format-test.lisp" :external-format :utf-8))
202 (let ((p (probe-file (compile-file-pathname "external-format-test.lisp"))))
207 ;;;; KOI8-R external format
208 (with-open-file (s *test-path* :direction :output
209 :if-exists :supersede :external-format :koi8-r)
210 (write-char (code-char #xB0) s)
214 (write-char (code-char #xBAAD) s)
216 (sb-int:character-encoding-error ()
219 (with-open-file (s *test-path* :direction :input
220 :element-type '(unsigned-byte 8))
221 (let ((byte (read-byte s)))
222 (assert (= (eval byte) #x9C))))
223 (with-open-file (s *test-path* :direction :input
224 :external-format :koi8-r)
225 (let ((char (read-char s)))
226 (assert (= (char-code (eval char)) #xB0))))
227 (delete-file *test-path*)
229 (let* ((koi8-r-codes (coerce '(240 210 201 215 197 212 33) '(vector (unsigned-byte 8))))
230 (uni-codes #(1055 1088 1080 1074 1077 1090 33))
232 (string (octets-to-string koi8-r-codes :external-format :koi8-r))
233 (uni-decoded (map 'vector #'char-code string)))
234 (assert (equalp (map 'vector #'char-code (octets-to-string koi8-r-codes :external-format :koi8-r))
236 (assert (equalp (string-to-octets (map 'string #'code-char uni-codes) :external-format :koi8-r)
239 ;;; tests of FILE-STRING-LENGTH
240 (let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
241 (do-external-formats (xf)
242 (with-open-file (s *test-path* :direction :output
244 (loop for x across standard-characters
245 for position = (file-position s)
246 for char-length = (file-string-length s x)
248 do (assert (= (file-position s) (+ position char-length))))
249 (let ((position (file-position s))
250 (string-length (file-string-length s standard-characters)))
251 (write-string standard-characters s)
252 (assert (= (file-position s) (+ position string-length)))))
253 (delete-file *test-path*)))
255 (let ((char-codes '(0 1 255 256 511 512 1023 1024 2047 2048 4095 4096
256 8191 8192 16383 16384 32767 32768 65535 65536 131071
257 131072 262143 262144)))
258 (with-open-file (s *test-path* :direction :output
259 :external-format :utf-8)
260 (dolist (code char-codes)
261 (let* ((char (code-char code))
262 (position (file-position s))
263 (char-length (file-string-length s char)))
265 (assert (= (file-position s) (+ position char-length)))))
266 (let* ((string (map 'string #'code-char char-codes))
267 (position (file-position s))
268 (string-length (file-string-length s string)))
269 (write-string string s)
270 (assert (= (file-position s) (+ position string-length))))))
273 ;;; See sbcl-devel "Subject: Bug in FILE-POSITION on UTF-8-encoded files"
274 ;;; by Lutz Euler on 2006-03-05 for more details.
275 (with-test (:name (:file-position :utf-8))
276 (let ((path *test-path*))
277 (with-open-file (s path
279 :if-exists :supersede
280 :element-type '(unsigned-byte 8))
281 ;; Write #\*, encoded in UTF-8, to the file.
283 ;; Append #\adiaeresis, encoded in UTF-8, to the file.
284 (write-sequence '(195 164) s))
285 (with-open-file (s path :external-format :utf-8)
287 (let ((pos (file-position s))
288 (char (read-char s)))
289 (format t "read character with code ~a successfully from file position ~a~%"
290 (char-code char) pos)
291 (file-position s pos)
292 (format t "set file position back to ~a, trying to read-char again~%" pos)
293 (let ((new-char (read-char s)))
294 (assert (char= char new-char)))))
296 (delete-file *test-path*)
298 ;;; We used to call STREAM-EXTERNAL-FORMAT on the stream in the error
299 ;;; when printing a coding error, but that didn't work if the stream
300 ;;; was closed by the time the error was printed. See sbcl-devel
301 ;;; "Subject: Printing coding errors for closed streams" by Zach Beane
302 ;;; on 2008-10-16 for more info.
303 (with-test (:name (:character-coding-error-stream-external-format))
304 (flet ((first-file-character ()
305 (with-open-file (stream *test-path* :external-format :utf-8)
306 (read-char stream))))
307 (with-open-file (stream *test-path*
309 :if-exists :supersede
310 :element-type '(unsigned-byte 8))
311 (write-byte 192 stream))
312 (princ-to-string (nth-value 1 (ignore-errors (first-file-character))))))
313 (delete-file *test-path*)
315 ;;; External format support in SB-ALIEN
317 (with-test (:name (:sb-alien :vanilla))
318 (define-alien-routine strdup c-string (str c-string))
319 (assert (equal "foo" (strdup "foo"))))
321 (with-test (:name (:sb-alien :utf-8 :utf-8))
322 (define-alien-routine strdup (c-string :external-format :utf-8)
323 (str (c-string :external-format :utf-8)))
324 (assert (equal "foo" (strdup "foo"))))
326 (with-test (:name (:sb-alien :latin-1 :utf-8))
327 (define-alien-routine strdup (c-string :external-format :latin-1)
328 (str (c-string :external-format :utf-8)))
329 (assert (= (length (strdup (string (code-char 246))))
332 (with-test (:name (:sb-alien :utf-8 :latin-1))
333 (define-alien-routine strdup (c-string :external-format :utf-8)
334 (str (c-string :external-format :latin-1)))
335 (assert (equal (string (code-char 228))
336 (strdup (concatenate 'string
337 (list (code-char 195))
338 (list (code-char 164)))))))
340 (with-test (:name (:sb-alien :ebcdic :ebcdic))
341 (define-alien-routine strdup (c-string :external-format :ebcdic-us)
342 (str (c-string :external-format :ebcdic-us)))
343 (assert (equal "foo" (strdup "foo"))))
345 (with-test (:name (:sb-alien :latin-1 :ebcdic))
346 (define-alien-routine strdup (c-string :external-format :latin-1)
347 (str (c-string :external-format :ebcdic-us)))
348 (assert (not (equal "foo" (strdup "foo")))))
350 (with-test (:name (:sb-alien :simple-base-string))
351 (define-alien-routine strdup (c-string :external-format :ebcdic-us
352 :element-type base-char)
353 (str (c-string :external-format :ebcdic-us)))
354 (assert (typep (strdup "foo") 'simple-base-string)))
356 (with-test (:name (:input-replacement :at-end-of-file))
358 (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8))
360 (handler-bind ((sb-int:character-decoding-error
362 (invoke-restart 'sb-impl::input-replacement #\?))))
363 (with-open-file (s *test-path* :external-format :utf-8)
365 ((char= (read-char s) #\?)
366 (assert (or (= i (char-code #\?)) (> i 127))))
367 (t (assert (and (not (= i (char-code #\?))) (< i 128)))))))))
369 (with-test (:name (:unibyte-invalid-codepoints :cp857))
371 (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8))
373 (with-open-file (s *test-path* :external-format :cp857)
374 (handler-case (read-char s)
375 (error () (assert (member i '(#xd5 #xe7 #xf2))))
376 (:no-error (char) (assert (not (member i '(#xd5 #xe7 #xf2)))))))))