From a18894dbea4495b885e1747babf4e2593dfb705e Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Wed, 11 Nov 2009 17:34:09 +0000 Subject: [PATCH] 1.0.32.21: compress most unibyte-external-format definitions All the unibyte-mapper-based external-formats had huge amounts of cut-and-pasted code, differing only in names of functions. This is, oddly enough, a clear case for abstracting away the repeated code into a macro. In the process, convert them to the multibyte apparatus, which has support for the nice restarts, and remove the too-simple unibyte DEFINE-EXTERNAL-FORMAT (and EXTERNAL-FORMAT-DECODING-ERROR) which are now unused. Include a far-from-comprehensive set of tests, which are mostly for iso-8859-x formats --- OPTIMIZATIONS | 20 + src/code/external-formats/enc-basic.lisp | 12 +- src/code/external-formats/enc-cyr.lisp | 132 +------ src/code/external-formats/enc-dos.lisp | 616 +---------------------------- src/code/external-formats/enc-ebcdic.lisp | 17 +- src/code/external-formats/enc-iso.lisp | 585 ++------------------------- src/code/external-formats/enc-win.lisp | 405 +------------------ src/code/fd-stream.lisp | 258 +++--------- tests/external-format.impure.lisp | 473 ++++++++++++++++++++++ version.lisp-expr | 2 +- 10 files changed, 628 insertions(+), 1892 deletions(-) diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index 581291c..81ab61d 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -405,3 +405,23 @@ can be transformed into which allows compiler-macro-expansion for FOO. (Only constant arguments can be moved inside the new lambda -- otherwise evaluation order is altered.) + +-------------------------------------------------------------------------------- +#41 + +The unibyte external formats are written in a very generic way. Three +optimizations immediately applicable that could be automatically +generated: + +(a) if the external format merely permutes the first 256 characters, a + constant-time lookup (rather than a binary search) could be + performed on output. This applies at least to EBCDIC, which + currently has a hand-rolled mapper instead. + +(b) if there are no undefined characters corresponding to the 256 + codes, then no error checking need be done on input. + +(c) if there is a way to use particular bits of the exceptional + characters, constant-time output (rather than binary search) can + still be achieved as used to be done by the latin-9 external + format before 1.0.31. diff --git a/src/code/external-formats/enc-basic.lisp b/src/code/external-formats/enc-basic.lisp index dee5211..9ae5271 100644 --- a/src/code/external-formats/enc-basic.lisp +++ b/src/code/external-formats/enc-basic.lisp @@ -59,13 +59,14 @@ finally (return (coerce string 'simple-string)))))))) (instantiate-octets-definition define-ascii->string) -(define-external-format (:ascii :us-ascii :ansi_x3.4-1968 - :iso-646 :iso-646-us :|646|) - 1 t +(define-unibyte-external-format :ascii + (:us-ascii :ansi_x3.4-1968 :iso-646 :iso-646-us :|646|) (if (>= bits 128) (external-format-encoding-error stream bits) (setf (sap-ref-8 sap tail) bits)) - (code-char byte) + (if (>= byte 128) + (return-from decode-break-reason 1) + (code-char byte)) ascii->string-aref string->ascii) @@ -101,8 +102,7 @@ ;;; Multiple names for the :ISO{,-}8859-* families are needed because on ;;; FreeBSD (and maybe other BSD systems), nl_langinfo("LATIN-1") will ;;; return "ISO8859-1" instead of "ISO-8859-1". -(define-external-format (:latin-1 :latin1 :iso-8859-1 :iso8859-1) - 1 t +(define-unibyte-external-format :latin-1 (:latin1 :iso-8859-1 :iso8859-1) (if (>= bits 256) (external-format-encoding-error stream bits) (setf (sap-ref-8 sap tail) bits)) diff --git a/src/code/external-formats/enc-cyr.lisp b/src/code/external-formats/enc-cyr.lisp index 7883f06..96dfc58 100644 --- a/src/code/external-formats/enc-cyr.lisp +++ b/src/code/external-formats/enc-cyr.lisp @@ -1,6 +1,6 @@ (in-package "SB!IMPL") -(define-unibyte-mapper koi8-r->code-mapper code->koi8-r-mapper +(define-unibyte-mapping-external-format :koi8-r (:|koi8-r|) (#x80 #x2500) ; BOX DRAWINGS LIGHT HORIZONTAL (#x81 #x2502) ; BOX DRAWINGS LIGHT VERTICAL (#x82 #x250C) ; BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -131,49 +131,7 @@ (#xFF #x042A) ; CYRILLIC CAPITAL LETTER HARD SIGN ) -(declaim (inline get-koi8-r-bytes)) -(defun get-koi8-r-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->koi8-r-mapper :koi8-r string pos)) - -(defun string->koi8-r (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-koi8-r-bytes null-padding))) - -(defmacro define-koi8-r->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'koi8-r->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'koi8-r->code-mapper))))) - -(instantiate-octets-definition define-koi8-r->string*) - -(defmacro define-koi8-r->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'koi8-r->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'koi8-r->code-mapper))) - -(instantiate-octets-definition define-koi8-r->string) - -(define-external-format (:koi8-r :|koi8-r|) - 1 t - (let ((koi8-r-byte (code->koi8-r-mapper bits))) - (if koi8-r-byte - (setf (sap-ref-8 sap tail) koi8-r-byte) - (external-format-encoding-error stream bits))) - (let ((code (koi8-r->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - koi8-r->string-aref - string->koi8-r) ;; TODO -- error check - -(define-unibyte-mapper koi8-u->code-mapper code->koi8-u-mapper +(define-unibyte-mapping-external-format :koi8-u (:|koi8-u|) (#x80 #x2500) ; BOX DRAWINGS LIGHT HORIZONTAL (#x81 #x2502) ; BOX DRAWINGS LIGHT VERTICAL (#x82 #x250C) ; BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -304,49 +262,7 @@ (#xFF #x042A) ; CYRILLIC CAPITAL LETTER HARD SIGN ) -(declaim (inline get-koi8-u-bytes)) -(defun get-koi8-u-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->koi8-u-mapper :koi8-u string pos)) - -(defun string->koi8-u (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-koi8-u-bytes null-padding))) - -(defmacro define-koi8-u->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'koi8-u->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'koi8-u->code-mapper))))) - -(instantiate-octets-definition define-koi8-u->string*) - -(defmacro define-koi8-u->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'koi8-u->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'koi8-u->code-mapper))) - -(instantiate-octets-definition define-koi8-u->string) - -(define-external-format (:koi8-u :|koi8-u|) - 1 t - (let ((koi8-u-byte (code->koi8-u-mapper bits))) - (if koi8-u-byte - (setf (sap-ref-8 sap tail) koi8-u-byte) - (external-format-encoding-error stream bits))) - (let ((code (koi8-u->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - koi8-u->string-aref - string->koi8-u) ;; TODO -- error check - -(define-unibyte-mapper x-mac-cyrillic->code-mapper code->x-mac-cyrillic-mapper +(define-unibyte-mapping-external-format :x-mac-cyrillic (:|x-mac-cyrillic|) (#x80 #x0410) ; CYRILLIC CAPITAL LETTER A (#x81 #x0411) ; CYRILLIC CAPITAL LETTER BE (#x82 #x0412) ; CYRILLIC CAPITAL LETTER VE @@ -471,45 +387,3 @@ (#xFE #x044E) ; CYRILLIC SMALL LETTER YU (#xFF #x00A4) ; CURRENCY SIGN ) - -(declaim (inline get-x-mac-cyrillic-bytes)) -(defun get-x-mac-cyrillic-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->x-mac-cyrillic-mapper :x-mac-cyrillic string pos)) - -(defun string->x-mac-cyrillic (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-x-mac-cyrillic-bytes null-padding))) - -(defmacro define-x-mac-cyrillic->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'x-mac-cyrillic->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'x-mac-cyrillic->code-mapper))))) - -(instantiate-octets-definition define-x-mac-cyrillic->string*) - -(defmacro define-x-mac-cyrillic->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'x-mac-cyrillic->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'x-mac-cyrillic->code-mapper))) - -(instantiate-octets-definition define-x-mac-cyrillic->string) - -(define-external-format (:x-mac-cyrillic :|x-mac-cyrillic|) - 1 t - (let ((x-mac-cyrillic-byte (code->x-mac-cyrillic-mapper bits))) - (if x-mac-cyrillic-byte - (setf (sap-ref-8 sap tail) x-mac-cyrillic-byte) - (external-format-encoding-error stream bits))) - (let ((code (x-mac-cyrillic->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - x-mac-cyrillic->string-aref - string->x-mac-cyrillic) ;; TODO -- error check diff --git a/src/code/external-formats/enc-dos.lisp b/src/code/external-formats/enc-dos.lisp index 0b8ea99..224b921 100644 --- a/src/code/external-formats/enc-dos.lisp +++ b/src/code/external-formats/enc-dos.lisp @@ -1,6 +1,6 @@ (in-package "SB!IMPL") -(define-unibyte-mapper cp437->code-mapper code->cp437-mapper +(define-unibyte-mapping-external-format :cp437 (:|cp437|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -131,49 +131,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp437-bytes)) -(defun get-cp437-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp437-mapper :cp437 string pos)) - -(defun string->cp437 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp437-bytes null-padding))) - -(defmacro define-cp437->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp437->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp437->code-mapper))))) - -(instantiate-octets-definition define-cp437->string*) - -(defmacro define-cp437->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp437->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp437->code-mapper))) - -(instantiate-octets-definition define-cp437->string) - -(define-external-format (:cp437 :|cp437|) - 1 t - (let ((cp437-byte (code->cp437-mapper bits))) - (if cp437-byte - (setf (sap-ref-8 sap tail) cp437-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp437->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp437->string-aref - string->cp437) ;; TODO -- error check - -(define-unibyte-mapper cp850->code-mapper code->cp850-mapper +(define-unibyte-mapping-external-format :cp850 (:|cp850|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -304,49 +262,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp850-bytes)) -(defun get-cp850-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp850-mapper :cp850 string pos)) - -(defun string->cp850 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp850-bytes null-padding))) - -(defmacro define-cp850->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp850->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp850->code-mapper))))) - -(instantiate-octets-definition define-cp850->string*) - -(defmacro define-cp850->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp850->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp850->code-mapper))) - -(instantiate-octets-definition define-cp850->string) - -(define-external-format (:cp850 :|cp850|) - 1 t - (let ((cp850-byte (code->cp850-mapper bits))) - (if cp850-byte - (setf (sap-ref-8 sap tail) cp850-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp850->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp850->string-aref - string->cp850) ;; TODO -- error check - -(define-unibyte-mapper cp852->code-mapper code->cp852-mapper +(define-unibyte-mapping-external-format :cp852 (:|cp852|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -477,49 +393,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp852-bytes)) -(defun get-cp852-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp852-mapper :cp852 string pos)) - -(defun string->cp852 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp852-bytes null-padding))) - -(defmacro define-cp852->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp852->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp852->code-mapper))))) - -(instantiate-octets-definition define-cp852->string*) - -(defmacro define-cp852->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp852->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp852->code-mapper))) - -(instantiate-octets-definition define-cp852->string) - -(define-external-format (:cp852 :|cp852|) - 1 t - (let ((cp852-byte (code->cp852-mapper bits))) - (if cp852-byte - (setf (sap-ref-8 sap tail) cp852-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp852->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp852->string-aref - string->cp852) ;; TODO -- error check - -(define-unibyte-mapper cp855->code-mapper code->cp855-mapper +(define-unibyte-mapping-external-format :cp855 (:|cp855|) (#x80 #x0452) ; CYRILLIC SMALL LETTER DJE (#x81 #x0402) ; CYRILLIC CAPITAL LETTER DJE (#x82 #x0453) ; CYRILLIC SMALL LETTER GJE @@ -650,49 +524,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp855-bytes)) -(defun get-cp855-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp855-mapper :cp855 string pos)) - -(defun string->cp855 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp855-bytes null-padding))) - -(defmacro define-cp855->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp855->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp855->code-mapper))))) - -(instantiate-octets-definition define-cp855->string*) - -(defmacro define-cp855->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp855->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp855->code-mapper))) - -(instantiate-octets-definition define-cp855->string) - -(define-external-format (:cp855 :|cp855|) - 1 t - (let ((cp855-byte (code->cp855-mapper bits))) - (if cp855-byte - (setf (sap-ref-8 sap tail) cp855-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp855->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp855->string-aref - string->cp855) ;; TODO -- error check - -(define-unibyte-mapper cp857->code-mapper code->cp857-mapper +(define-unibyte-mapping-external-format :cp857 (:|cp857|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -822,49 +654,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp857-bytes)) -(defun get-cp857-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp857-mapper :cp857 string pos)) - -(defun string->cp857 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp857-bytes null-padding))) - -(defmacro define-cp857->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp857->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp857->code-mapper))))) - -(instantiate-octets-definition define-cp857->string*) - -(defmacro define-cp857->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp857->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp857->code-mapper))) - -(instantiate-octets-definition define-cp857->string) - -(define-external-format (:cp857 :|cp857|) - 1 t - (let ((cp857-byte (code->cp857-mapper bits))) - (if cp857-byte - (setf (sap-ref-8 sap tail) cp857-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp857->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp857->string-aref - string->cp857) ;; TODO -- error check - -(define-unibyte-mapper cp860->code-mapper code->cp860-mapper +(define-unibyte-mapping-external-format :cp860 (:|cp860|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -995,49 +785,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp860-bytes)) -(defun get-cp860-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp860-mapper :cp860 string pos)) - -(defun string->cp860 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp860-bytes null-padding))) - -(defmacro define-cp860->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp860->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp860->code-mapper))))) - -(instantiate-octets-definition define-cp860->string*) - -(defmacro define-cp860->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp860->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp860->code-mapper))) - -(instantiate-octets-definition define-cp860->string) - -(define-external-format (:cp860 :|cp860|) - 1 t - (let ((cp860-byte (code->cp860-mapper bits))) - (if cp860-byte - (setf (sap-ref-8 sap tail) cp860-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp860->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp860->string-aref - string->cp860) ;; TODO -- error check - -(define-unibyte-mapper cp861->code-mapper code->cp861-mapper +(define-unibyte-mapping-external-format :cp861 (:|cp861|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -1168,49 +916,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp861-bytes)) -(defun get-cp861-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp861-mapper :cp861 string pos)) - -(defun string->cp861 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp861-bytes null-padding))) - -(defmacro define-cp861->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp861->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp861->code-mapper))))) - -(instantiate-octets-definition define-cp861->string*) - -(defmacro define-cp861->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp861->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp861->code-mapper))) - -(instantiate-octets-definition define-cp861->string) - -(define-external-format (:cp861 :|cp861|) - 1 t - (let ((cp861-byte (code->cp861-mapper bits))) - (if cp861-byte - (setf (sap-ref-8 sap tail) cp861-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp861->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp861->string-aref - string->cp861) ;; TODO -- error check - -(define-unibyte-mapper cp862->code-mapper code->cp862-mapper +(define-unibyte-mapping-external-format :cp862 (:|cp862|) (#x80 #x05D0) ; HEBREW LETTER ALEF (#x81 #x05D1) ; HEBREW LETTER BET (#x82 #x05D2) ; HEBREW LETTER GIMEL @@ -1341,49 +1047,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp862-bytes)) -(defun get-cp862-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp862-mapper :cp862 string pos)) - -(defun string->cp862 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp862-bytes null-padding))) - -(defmacro define-cp862->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp862->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp862->code-mapper))))) - -(instantiate-octets-definition define-cp862->string*) - -(defmacro define-cp862->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp862->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp862->code-mapper))) - -(instantiate-octets-definition define-cp862->string) - -(define-external-format (:cp862 :|cp862|) - 1 t - (let ((cp862-byte (code->cp862-mapper bits))) - (if cp862-byte - (setf (sap-ref-8 sap tail) cp862-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp862->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp862->string-aref - string->cp862) ;; TODO -- error check - -(define-unibyte-mapper cp863->code-mapper code->cp863-mapper +(define-unibyte-mapping-external-format :cp863 (:|cp863|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -1514,49 +1178,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp863-bytes)) -(defun get-cp863-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp863-mapper :cp863 string pos)) - -(defun string->cp863 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp863-bytes null-padding))) - -(defmacro define-cp863->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp863->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp863->code-mapper))))) - -(instantiate-octets-definition define-cp863->string*) - -(defmacro define-cp863->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp863->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp863->code-mapper))) - -(instantiate-octets-definition define-cp863->string) - -(define-external-format (:cp863 :|cp863|) - 1 t - (let ((cp863-byte (code->cp863-mapper bits))) - (if cp863-byte - (setf (sap-ref-8 sap tail) cp863-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp863->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp863->string-aref - string->cp863) ;; TODO -- error check - -(define-unibyte-mapper cp864->code-mapper code->cp864-mapper +(define-unibyte-mapping-external-format :cp864 (:|cp864|) (#x80 #x00B0) ; DEGREE SIGN (#x81 #x00B7) ; MIDDLE DOT (#x82 #x2219) ; BULLET OPERATOR @@ -1684,49 +1306,7 @@ (#xFF nil) ) -(declaim (inline get-cp864-bytes)) -(defun get-cp864-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp864-mapper :cp864 string pos)) - -(defun string->cp864 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp864-bytes null-padding))) - -(defmacro define-cp864->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp864->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp864->code-mapper))))) - -(instantiate-octets-definition define-cp864->string*) - -(defmacro define-cp864->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp864->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp864->code-mapper))) - -(instantiate-octets-definition define-cp864->string) - -(define-external-format (:cp864 :|cp864|) - 1 t - (let ((cp864-byte (code->cp864-mapper bits))) - (if cp864-byte - (setf (sap-ref-8 sap tail) cp864-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp864->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp864->string-aref - string->cp864) ;; TODO -- error check - -(define-unibyte-mapper cp865->code-mapper code->cp865-mapper +(define-unibyte-mapping-external-format :cp865 (:|cp865|) (#x80 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA (#x81 #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS (#x82 #x00E9) ; LATIN SMALL LETTER E WITH ACUTE @@ -1857,49 +1437,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp865-bytes)) -(defun get-cp865-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp865-mapper :cp865 string pos)) - -(defun string->cp865 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp865-bytes null-padding))) - -(defmacro define-cp865->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp865->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp865->code-mapper))))) - -(instantiate-octets-definition define-cp865->string*) - -(defmacro define-cp865->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp865->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp865->code-mapper))) - -(instantiate-octets-definition define-cp865->string) - -(define-external-format (:cp865 :|cp865|) - 1 t - (let ((cp865-byte (code->cp865-mapper bits))) - (if cp865-byte - (setf (sap-ref-8 sap tail) cp865-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp865->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp865->string-aref - string->cp865) ;; TODO -- error check - -(define-unibyte-mapper cp866->code-mapper code->cp866-mapper +(define-unibyte-mapping-external-format :cp866 (:|cp866|) (#x80 #x0410) ; CYRILLIC CAPITAL LETTER A (#x81 #x0411) ; CYRILLIC CAPITAL LETTER BE (#x82 #x0412) ; CYRILLIC CAPITAL LETTER VE @@ -2030,49 +1568,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp866-bytes)) -(defun get-cp866-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp866-mapper :cp866 string pos)) - -(defun string->cp866 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp866-bytes null-padding))) - -(defmacro define-cp866->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp866->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp866->code-mapper))))) - -(instantiate-octets-definition define-cp866->string*) - -(defmacro define-cp866->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp866->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp866->code-mapper))) - -(instantiate-octets-definition define-cp866->string) - -(define-external-format (:cp866 :|cp866|) - 1 t - (let ((cp866-byte (code->cp866-mapper bits))) - (if cp866-byte - (setf (sap-ref-8 sap tail) cp866-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp866->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp866->string-aref - string->cp866) ;; TODO -- error check - -(define-unibyte-mapper cp869->code-mapper code->cp869-mapper +(define-unibyte-mapping-external-format :cp869 (:|cp869|) (#x80 nil) (#x81 nil) (#x82 nil) @@ -2203,49 +1699,7 @@ (#xFF #x00A0) ; NO-BREAK SPACE ) -(declaim (inline get-cp869-bytes)) -(defun get-cp869-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp869-mapper :cp869 string pos)) - -(defun string->cp869 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp869-bytes null-padding))) - -(defmacro define-cp869->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp869->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp869->code-mapper))))) - -(instantiate-octets-definition define-cp869->string*) - -(defmacro define-cp869->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp869->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp869->code-mapper))) - -(instantiate-octets-definition define-cp869->string) - -(define-external-format (:cp869 :|cp869|) - 1 t - (let ((cp869-byte (code->cp869-mapper bits))) - (if cp869-byte - (setf (sap-ref-8 sap tail) cp869-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp869->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp869->string-aref - string->cp869) ;; TODO -- error check - -(define-unibyte-mapper cp874->code-mapper code->cp874-mapper +(define-unibyte-mapping-external-format :cp874 (:|cp874|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 nil) @@ -2374,45 +1828,3 @@ (#xFE nil) (#xFF nil) ) - -(declaim (inline get-cp874-bytes)) -(defun get-cp874-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp874-mapper :cp874 string pos)) - -(defun string->cp874 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp874-bytes null-padding))) - -(defmacro define-cp874->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp874->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp874->code-mapper))))) - -(instantiate-octets-definition define-cp874->string*) - -(defmacro define-cp874->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp874->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp874->code-mapper))) - -(instantiate-octets-definition define-cp874->string) - -(define-external-format (:cp874 :|cp874|) - 1 t - (let ((cp874-byte (code->cp874-mapper bits))) - (if cp874-byte - (setf (sap-ref-8 sap tail) cp874-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp874->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp874->string-aref - string->cp874) ;; TODO -- error check diff --git a/src/code/external-formats/enc-ebcdic.lisp b/src/code/external-formats/enc-ebcdic.lisp index b54e630..e78ed5e 100644 --- a/src/code/external-formats/enc-ebcdic.lisp +++ b/src/code/external-formats/enc-ebcdic.lisp @@ -61,12 +61,11 @@ (,(make-od-name 'latin->string accessor) array astart aend #'ebcdic-us->code-mapper))) (instantiate-octets-definition define-ebcdic-us->string) -(define-external-format (:ebcdic-us :cp037 :|cp037| :ibm-037 :ibm037) - 1 t - (let ((ebcdic-us-byte (code->ebcdic-us-mapper bits))) - (if ebcdic-us-byte - (setf (sap-ref-8 sap tail) ebcdic-us-byte) - (external-format-encoding-error stream bits))) - (code-char (ebcdic-us->code-mapper byte)) - ebcdic-us->string-aref - string->ebcdic-us) +(define-unibyte-external-format :ebcdic-us (:cp037 :|cp037| :ibm-037 :ibm037) + (let ((ebcdic-us-byte (code->ebcdic-us-mapper bits))) + (if ebcdic-us-byte + (setf (sap-ref-8 sap tail) ebcdic-us-byte) + (external-format-encoding-error stream bits))) + (code-char (ebcdic-us->code-mapper byte)) + ebcdic-us->string-aref + string->ebcdic-us) diff --git a/src/code/external-formats/enc-iso.lisp b/src/code/external-formats/enc-iso.lisp index eacffb4..b5bdadb 100644 --- a/src/code/external-formats/enc-iso.lisp +++ b/src/code/external-formats/enc-iso.lisp @@ -1,6 +1,7 @@ (in-package "SB!IMPL") -(define-unibyte-mapper iso-8859-2->code-mapper code->iso-8859-2-mapper +(define-unibyte-mapping-external-format :iso-8859-2 + (:|iso-8859-2| :latin-2 :|latin-2|) (#xA1 #x0104) ; LATIN CAPITAL LETTER A WITH OGONEK (#xA2 #x02D8) ; BREVE (#xA3 #x0141) ; LATIN CAPITAL LETTER L WITH STROKE @@ -60,49 +61,8 @@ (#xFF #x02D9) ; DOT ABOVE ) -(declaim (inline get-iso-8859-2-bytes)) -(defun get-iso-8859-2-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-2-mapper :iso-8859-2 string pos)) - -(defun string->iso-8859-2 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-2-bytes null-padding))) - -(defmacro define-iso-8859-2->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-2->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-2->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-2->string*) - -(defmacro define-iso-8859-2->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-2->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-2->code-mapper))) - -(instantiate-octets-definition define-iso-8859-2->string) - -(define-external-format (:iso-8859-2 :|iso-8859-2| :latin-2 :|latin-2|) - 1 t - (let ((iso-8859-2-byte (code->iso-8859-2-mapper bits))) - (if iso-8859-2-byte - (setf (sap-ref-8 sap tail) iso-8859-2-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-2->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-2->string-aref - string->iso-8859-2) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-3->code-mapper code->iso-8859-3-mapper +(define-unibyte-mapping-external-format :iso-8859-3 + (:|iso-8859-3| :latin-3 :|latin-3|) (#xA1 #x0126) ; LATIN CAPITAL LETTER H WITH STROKE (#xA2 #x02D8) ; BREVE (#xA5 nil) @@ -140,49 +100,8 @@ (#xFF #x02D9) ; DOT ABOVE ) -(declaim (inline get-iso-8859-3-bytes)) -(defun get-iso-8859-3-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-3-mapper :iso-8859-3 string pos)) - -(defun string->iso-8859-3 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-3-bytes null-padding))) - -(defmacro define-iso-8859-3->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-3->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-3->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-3->string*) - -(defmacro define-iso-8859-3->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-3->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-3->code-mapper))) - -(instantiate-octets-definition define-iso-8859-3->string) - -(define-external-format (:iso-8859-3 :|iso-8859-3| :latin-3 :|latin-3|) - 1 t - (let ((iso-8859-3-byte (code->iso-8859-3-mapper bits))) - (if iso-8859-3-byte - (setf (sap-ref-8 sap tail) iso-8859-3-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-3->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-3->string-aref - string->iso-8859-3) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-4->code-mapper code->iso-8859-4-mapper +(define-unibyte-mapping-external-format :iso-8859-4 + (:|iso-8859-4| :latin-4 :|latin-4|) (#xA1 #x0104) ; LATIN CAPITAL LETTER A WITH OGONEK (#xA2 #x0138) ; LATIN SMALL LETTER KRA (#xA3 #x0156) ; LATIN CAPITAL LETTER R WITH CEDILLA @@ -235,49 +154,7 @@ (#xFF #x02D9) ; DOT ABOVE ) -(declaim (inline get-iso-8859-4-bytes)) -(defun get-iso-8859-4-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-4-mapper :iso-8859-4 string pos)) - -(defun string->iso-8859-4 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-4-bytes null-padding))) - -(defmacro define-iso-8859-4->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-4->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-4->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-4->string*) - -(defmacro define-iso-8859-4->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-4->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-4->code-mapper))) - -(instantiate-octets-definition define-iso-8859-4->string) - -(define-external-format (:iso-8859-4 :|iso-8859-4| :latin-4 :|latin-4|) - 1 t - (let ((iso-8859-4-byte (code->iso-8859-4-mapper bits))) - (if iso-8859-4-byte - (setf (sap-ref-8 sap tail) iso-8859-4-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-4->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-4->string-aref - string->iso-8859-4) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-5->code-mapper code->iso-8859-5-mapper +(define-unibyte-mapping-external-format :iso-8859-5 (:|iso-8859-5|) (#xA1 #x0401) ; CYRILLIC CAPITAL LETTER IO (#xA2 #x0402) ; CYRILLIC CAPITAL LETTER DJE (#xA3 #x0403) ; CYRILLIC CAPITAL LETTER GJE @@ -374,49 +251,7 @@ (#xFF #x045F) ; CYRILLIC SMALL LETTER DZHE ) -(declaim (inline get-iso-8859-5-bytes)) -(defun get-iso-8859-5-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-5-mapper :iso-8859-5 string pos)) - -(defun string->iso-8859-5 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-5-bytes null-padding))) - -(defmacro define-iso-8859-5->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-5->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-5->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-5->string*) - -(defmacro define-iso-8859-5->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-5->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-5->code-mapper))) - -(instantiate-octets-definition define-iso-8859-5->string) - -(define-external-format (:iso-8859-5 :|iso-8859-5|) - 1 t - (let ((iso-8859-5-byte (code->iso-8859-5-mapper bits))) - (if iso-8859-5-byte - (setf (sap-ref-8 sap tail) iso-8859-5-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-5->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-5->string-aref - string->iso-8859-5) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-6->code-mapper code->iso-8859-6-mapper +(define-unibyte-mapping-external-format :iso-8859-6 (:|iso-8859-6|) (#xA1 nil) (#xA2 nil) (#xA3 nil) @@ -512,49 +347,7 @@ (#xFF nil) ) -(declaim (inline get-iso-8859-6-bytes)) -(defun get-iso-8859-6-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-6-mapper :iso-8859-6 string pos)) - -(defun string->iso-8859-6 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-6-bytes null-padding))) - -(defmacro define-iso-8859-6->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-6->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-6->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-6->string*) - -(defmacro define-iso-8859-6->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-6->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-6->code-mapper))) - -(instantiate-octets-definition define-iso-8859-6->string) - -(define-external-format (:iso-8859-6 :|iso-8859-6|) - 1 t - (let ((iso-8859-6-byte (code->iso-8859-6-mapper bits))) - (if iso-8859-6-byte - (setf (sap-ref-8 sap tail) iso-8859-6-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-6->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-6->string-aref - string->iso-8859-6) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-7->code-mapper code->iso-8859-7-mapper +(define-unibyte-mapping-external-format :iso-8859-7 (:|iso-8859-7|) (#xA1 #x02BD) ; MODIFIER LETTER REVERSED COMMA (#xA2 #x02BC) ; MODIFIER LETTER APOSTROPHE (#xA4 nil) @@ -637,49 +430,7 @@ (#xFF nil) ) -(declaim (inline get-iso-8859-7-bytes)) -(defun get-iso-8859-7-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-7-mapper :iso-8859-7 string pos)) - -(defun string->iso-8859-7 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-7-bytes null-padding))) - -(defmacro define-iso-8859-7->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-7->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-7->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-7->string*) - -(defmacro define-iso-8859-7->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-7->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-7->code-mapper))) - -(instantiate-octets-definition define-iso-8859-7->string) - -(define-external-format (:iso-8859-7 :|iso-8859-7|) - 1 t - (let ((iso-8859-7-byte (code->iso-8859-7-mapper bits))) - (if iso-8859-7-byte - (setf (sap-ref-8 sap tail) iso-8859-7-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-7->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-7->string-aref - string->iso-8859-7) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-8->code-mapper code->iso-8859-8-mapper +(define-unibyte-mapping-external-format :iso-8859-8 (:|iso-8859-8|) (#xA1 nil) (#xAA #x00D7) ; MULTIPLICATION SIGN (#xAF #x203E) ; OVERLINE @@ -751,49 +502,8 @@ (#xFF nil) ) -(declaim (inline get-iso-8859-8-bytes)) -(defun get-iso-8859-8-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-8-mapper :iso-8859-8 string pos)) - -(defun string->iso-8859-8 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-8-bytes null-padding))) - -(defmacro define-iso-8859-8->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-8->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-8->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-8->string*) - -(defmacro define-iso-8859-8->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-8->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-8->code-mapper))) - -(instantiate-octets-definition define-iso-8859-8->string) - -(define-external-format (:iso-8859-8 :|iso-8859-8|) - 1 t - (let ((iso-8859-8-byte (code->iso-8859-8-mapper bits))) - (if iso-8859-8-byte - (setf (sap-ref-8 sap tail) iso-8859-8-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-8->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-8->string-aref - string->iso-8859-8) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-9->code-mapper code->iso-8859-9-mapper +(define-unibyte-mapping-external-format :iso-8859-9 + (:|iso-8859-9| :latin-5 :|latin-5|) (#xD0 #x011E) ; LATIN CAPITAL LETTER G WITH BREVE (#xDD #x0130) ; LATIN CAPITAL LETTER I WITH DOT ABOVE (#xDE #x015E) ; LATIN CAPITAL LETTER S WITH CEDILLA @@ -802,49 +512,8 @@ (#xFE #x015F) ; LATIN SMALL LETTER S WITH CEDILLA ) -(declaim (inline get-iso-8859-9-bytes)) -(defun get-iso-8859-9-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-9-mapper :iso-8859-9 string pos)) - -(defun string->iso-8859-9 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-9-bytes null-padding))) - -(defmacro define-iso-8859-9->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-9->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-9->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-9->string*) - -(defmacro define-iso-8859-9->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-9->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-9->code-mapper))) - -(instantiate-octets-definition define-iso-8859-9->string) - -(define-external-format (:iso-8859-9 :|iso-8859-9| :latin-5 :|latin-5|) - 1 t - (let ((iso-8859-9-byte (code->iso-8859-9-mapper bits))) - (if iso-8859-9-byte - (setf (sap-ref-8 sap tail) iso-8859-9-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-9->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-9->string-aref - string->iso-8859-9) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-10->code-mapper code->iso-8859-10-mapper +(define-unibyte-mapping-external-format :iso-8859-10 + (:|iso-8859-10| :latin-6 :|latin-6|) (#xA1 #x0104) ; LATIN CAPITAL LETTER A WITH OGONEK (#xA2 #x0112) ; LATIN CAPITAL LETTER E WITH MACRON (#xA3 #x0122) ; LATIN CAPITAL LETTER G WITH CEDILLA @@ -893,49 +562,7 @@ (#xFF #x0138) ; LATIN SMALL LETTER KRA ) -(declaim (inline get-iso-8859-10-bytes)) -(defun get-iso-8859-10-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-10-mapper :iso-8859-10 string pos)) - -(defun string->iso-8859-10 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-10-bytes null-padding))) - -(defmacro define-iso-8859-10->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-10->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-10->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-10->string*) - -(defmacro define-iso-8859-10->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-10->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-10->code-mapper))) - -(instantiate-octets-definition define-iso-8859-10->string) - -(define-external-format (:iso-8859-10 :|iso-8859-10| :latin-6 :|latin-6|) - 1 t - (let ((iso-8859-10-byte (code->iso-8859-10-mapper bits))) - (if iso-8859-10-byte - (setf (sap-ref-8 sap tail) iso-8859-10-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-10->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-10->string-aref - string->iso-8859-10) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-11->code-mapper code->iso-8859-11-mapper +(define-unibyte-mapping-external-format :iso-8859-11 (:|iso-8859-11|) (#xA1 #x0E01) ; THAI CHARACTER KO KAI (#xA2 #x0E02) ; THAI CHARACTER KHO KHAI (#xA3 #x0E03) ; THAI CHARACTER KHO KHUAT @@ -1033,49 +660,8 @@ (#xFF nil) ) -(declaim (inline get-iso-8859-11-bytes)) -(defun get-iso-8859-11-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-11-mapper :iso-8859-11 string pos)) - -(defun string->iso-8859-11 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-11-bytes null-padding))) - -(defmacro define-iso-8859-11->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-11->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-11->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-11->string*) - -(defmacro define-iso-8859-11->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-11->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-11->code-mapper))) - -(instantiate-octets-definition define-iso-8859-11->string) - -(define-external-format (:iso-8859-11 :|iso-8859-11|) - 1 t - (let ((iso-8859-11-byte (code->iso-8859-11-mapper bits))) - (if iso-8859-11-byte - (setf (sap-ref-8 sap tail) iso-8859-11-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-11->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-11->string-aref - string->iso-8859-11) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-13->code-mapper code->iso-8859-13-mapper +(define-unibyte-mapping-external-format :iso-8859-13 + (:|iso-8859-13| :latin-7 :|latin-7|) (#xA1 #x201D) ; RIGHT DOUBLE QUOTATION MARK (#xA5 #x201E) ; DOUBLE LOW-9 QUOTATION MARK (#xA8 #x00D8) ; LATIN CAPITAL LETTER O WITH STROKE @@ -1134,49 +720,8 @@ (#xFF #x2019) ; RIGHT SINGLE QUOTATION MARK ) -(declaim (inline get-iso-8859-13-bytes)) -(defun get-iso-8859-13-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-13-mapper :iso-8859-13 string pos)) - -(defun string->iso-8859-13 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-13-bytes null-padding))) - -(defmacro define-iso-8859-13->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-13->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-13->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-13->string*) - -(defmacro define-iso-8859-13->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-13->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-13->code-mapper))) - -(instantiate-octets-definition define-iso-8859-13->string) - -(define-external-format (:iso-8859-13 :|iso-8859-13| :latin-7 :|latin-7|) - 1 t - (let ((iso-8859-13-byte (code->iso-8859-13-mapper bits))) - (if iso-8859-13-byte - (setf (sap-ref-8 sap tail) iso-8859-13-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-13->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-13->string-aref - string->iso-8859-13) ;; TODO -- error check - -(define-unibyte-mapper iso-8859-14->code-mapper code->iso-8859-14-mapper +(define-unibyte-mapping-external-format :iso-8859-14 + (:|iso-8859-14| :latin-8 :|latin-8|) (#xA1 #x1E02) ; LATIN CAPITAL LETTER B WITH DOT ABOVE (#xA2 #x1E03) ; LATIN SMALL LETTER B WITH DOT ABOVE (#xA4 #x010A) ; LATIN CAPITAL LETTER C WITH DOT ABOVE @@ -1210,51 +755,9 @@ (#xFE #x0177) ; LATIN SMALL LETTER Y WITH CIRCUMFLEX ) -(declaim (inline get-iso-8859-14-bytes)) -(defun get-iso-8859-14-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->iso-8859-14-mapper :iso-8859-14 string pos)) - -(defun string->iso-8859-14 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-iso-8859-14-bytes null-padding))) - -(defmacro define-iso-8859-14->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'iso-8859-14->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'iso-8859-14->code-mapper))))) - -(instantiate-octets-definition define-iso-8859-14->string*) - -(defmacro define-iso-8859-14->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'iso-8859-14->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'iso-8859-14->code-mapper))) - -(instantiate-octets-definition define-iso-8859-14->string) - -(define-external-format (:iso-8859-14 :|iso-8859-14| :latin-8 :|latin-8|) - 1 t - (let ((iso-8859-14-byte (code->iso-8859-14-mapper bits))) - (if iso-8859-14-byte - (setf (sap-ref-8 sap tail) iso-8859-14-byte) - (external-format-encoding-error stream bits))) - (let ((code (iso-8859-14->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - iso-8859-14->string-aref - string->iso-8859-14) ;; TODO -- error check - -(define-unibyte-mapper - latin9->code-mapper - code->latin9-mapper +;;; The names for latin9 are different due to a historical accident. +(define-unibyte-mapping-external-format :latin-9 + (:latin9 :iso-8859-15 :iso8859-15) (#xA4 #x20AC) (#xA6 #x0160) (#xA8 #x0161) @@ -1262,45 +765,5 @@ (#xB8 #x017E) (#xBC #x0152) (#xBD #x0153) - (#xBE #x0178)) - -(declaim (inline get-latin9-bytes)) -(defun get-latin9-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->latin9-mapper :latin-9 string pos)) - -(defun string->latin9 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-latin9-bytes null-padding))) - -(defmacro define-latin9->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'latin9->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'latin9->code-mapper))))) -(instantiate-octets-definition define-latin9->string*) - -(defmacro define-latin9->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'latin9->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'latin9->code-mapper))) - (instantiate-octets-definition define-latin9->string) - -;;; The names for latin9 are different due to a historical accident. -(define-external-format (:latin-9 :latin9 :iso-8859-15 :iso8859-15) - 1 t - (let ((latin-9-byte (code->latin9-mapper bits))) - (if latin-9-byte - (setf (sap-ref-8 sap tail) latin-9-byte) - (external-format-encoding-error stream bits))) - (let ((code (latin9->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - latin9->string-aref - string->latin9) + (#xBE #x0178) +) diff --git a/src/code/external-formats/enc-win.lisp b/src/code/external-formats/enc-win.lisp index 82e293a..8876680 100644 --- a/src/code/external-formats/enc-win.lisp +++ b/src/code/external-formats/enc-win.lisp @@ -1,6 +1,7 @@ (in-package "SB!IMPL") -(define-unibyte-mapper cp1250->code-mapper code->cp1250-mapper +(define-unibyte-mapping-external-format :cp1250 + (:|cp1250| :windows-1250 :|windows-1250|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -82,49 +83,8 @@ (#xFF #x02D9) ; DOT ABOVE ) -(declaim (inline get-cp1250-bytes)) -(defun get-cp1250-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1250-mapper :cp1250 string pos)) - -(defun string->cp1250 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1250-bytes null-padding))) - -(defmacro define-cp1250->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1250->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1250->code-mapper))))) - -(instantiate-octets-definition define-cp1250->string*) - -(defmacro define-cp1250->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1250->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1250->code-mapper))) - -(instantiate-octets-definition define-cp1250->string) - -(define-external-format (:cp1250 :|cp1250| :windows-1250 :|windows-1250|) - 1 t - (let ((cp1250-byte (code->cp1250-mapper bits))) - (if cp1250-byte - (setf (sap-ref-8 sap tail) cp1250-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1250->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1250->string-aref - string->cp1250) ;; TODO -- error check - -(define-unibyte-mapper cp1251->code-mapper code->cp1251-mapper +(define-unibyte-mapping-external-format :cp1251 + (:|cp1251| :windows-1251 :|windows-1251|) (#x80 #x0402) ; CYRILLIC CAPITAL LETTER DJE (#x81 #x0403) ; CYRILLIC CAPITAL LETTER GJE (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -240,49 +200,8 @@ (#xFF #x044F) ; CYRILLIC SMALL LETTER YA ) -(declaim (inline get-cp1251-bytes)) -(defun get-cp1251-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1251-mapper :cp1251 string pos)) - -(defun string->cp1251 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1251-bytes null-padding))) - -(defmacro define-cp1251->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1251->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1251->code-mapper))))) - -(instantiate-octets-definition define-cp1251->string*) - -(defmacro define-cp1251->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1251->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1251->code-mapper))) - -(instantiate-octets-definition define-cp1251->string) - -(define-external-format (:cp1251 :|cp1251| :windows-1251 :|windows-1251|) - 1 t - (let ((cp1251-byte (code->cp1251-mapper bits))) - (if cp1251-byte - (setf (sap-ref-8 sap tail) cp1251-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1251->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1251->string-aref - string->cp1251) ;; TODO -- error check - -(define-unibyte-mapper cp1252->code-mapper code->cp1252-mapper +(define-unibyte-mapping-external-format :cp1252 + (:|cp1252| :windows-1252 :|windows-1252|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -317,49 +236,8 @@ (#x9F #x0178) ; LATIN CAPITAL LETTER Y WITH DIAERESIS ) -(declaim (inline get-cp1252-bytes)) -(defun get-cp1252-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1252-mapper :cp1252 string pos)) - -(defun string->cp1252 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1252-bytes null-padding))) - -(defmacro define-cp1252->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1252->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1252->code-mapper))))) - -(instantiate-octets-definition define-cp1252->string*) - -(defmacro define-cp1252->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1252->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1252->code-mapper))) - -(instantiate-octets-definition define-cp1252->string) - -(define-external-format (:cp1252 :|cp1252| :windows-1252 :|windows-1252|) - 1 t - (let ((cp1252-byte (code->cp1252-mapper bits))) - (if cp1252-byte - (setf (sap-ref-8 sap tail) cp1252-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1252->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1252->string-aref - string->cp1252) ;; TODO -- error check - -(define-unibyte-mapper cp1253->code-mapper code->cp1253-mapper +(define-unibyte-mapping-external-format :cp1253 + (:|cp1253| :windows-1253 :|windows-1253|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -469,49 +347,7 @@ (#xFF nil) ) -(declaim (inline get-cp1253-bytes)) -(defun get-cp1253-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1253-mapper :cp1253 string pos)) - -(defun string->cp1253 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1253-bytes null-padding))) - -(defmacro define-cp1253->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1253->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1253->code-mapper))))) - -(instantiate-octets-definition define-cp1253->string*) - -(defmacro define-cp1253->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1253->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1253->code-mapper))) - -(instantiate-octets-definition define-cp1253->string) - -(define-external-format (:cp1253 :|cp1253| :windows-1253 :|windows-1253|) - 1 t - (let ((cp1253-byte (code->cp1253-mapper bits))) - (if cp1253-byte - (setf (sap-ref-8 sap tail) cp1253-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1253->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1253->string-aref - string->cp1253) ;; TODO -- error check - -(define-unibyte-mapper cp1254->code-mapper code->cp1254-mapper +(define-unibyte-mapping-external-format :cp1254 (:|cp1254|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -552,49 +388,8 @@ (#xFE #x015F) ; LATIN SMALL LETTER S WITH CEDILLA ) -(declaim (inline get-cp1254-bytes)) -(defun get-cp1254-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1254-mapper :cp1254 string pos)) - -(defun string->cp1254 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1254-bytes null-padding))) - -(defmacro define-cp1254->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1254->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1254->code-mapper))))) - -(instantiate-octets-definition define-cp1254->string*) - -(defmacro define-cp1254->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1254->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1254->code-mapper))) - -(instantiate-octets-definition define-cp1254->string) - -(define-external-format (:cp1254 :|cp1254|) - 1 t - (let ((cp1254-byte (code->cp1254-mapper bits))) - (if cp1254-byte - (setf (sap-ref-8 sap tail) cp1254-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1254->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1254->string-aref - string->cp1254) ;; TODO -- error check - -(define-unibyte-mapper cp1255->code-mapper code->cp1255-mapper +(define-unibyte-mapping-external-format :cp1255 + (:|cp1255| :windows-1255 :|windows-1255|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -696,49 +491,8 @@ (#xFF nil) ) -(declaim (inline get-cp1255-bytes)) -(defun get-cp1255-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1255-mapper :cp1255 string pos)) - -(defun string->cp1255 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1255-bytes null-padding))) - -(defmacro define-cp1255->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1255->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1255->code-mapper))))) - -(instantiate-octets-definition define-cp1255->string*) - -(defmacro define-cp1255->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1255->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1255->code-mapper))) - -(instantiate-octets-definition define-cp1255->string) - -(define-external-format (:cp1255 :|cp1255| :windows-1255 :|windows-1255|) - 1 t - (let ((cp1255-byte (code->cp1255-mapper bits))) - (if cp1255-byte - (setf (sap-ref-8 sap tail) cp1255-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1255->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1255->string-aref - string->cp1255) ;; TODO -- error check - -(define-unibyte-mapper cp1256->code-mapper code->cp1256-mapper +(define-unibyte-mapping-external-format :cp1256 + (:|cp1256| :windows-1256 :|windows-1256|) (#x80 #x20AC) ; EURO SIGN (#x81 #x067E) ; ARABIC LETTER PEH (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -826,49 +580,8 @@ (#xFF nil) ) -(declaim (inline get-cp1256-bytes)) -(defun get-cp1256-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1256-mapper :cp1256 string pos)) - -(defun string->cp1256 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1256-bytes null-padding))) - -(defmacro define-cp1256->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1256->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1256->code-mapper))))) - -(instantiate-octets-definition define-cp1256->string*) - -(defmacro define-cp1256->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1256->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1256->code-mapper))) - -(instantiate-octets-definition define-cp1256->string) - -(define-external-format (:cp1256 :|cp1256| :windows-1256 :|windows-1256|) - 1 t - (let ((cp1256-byte (code->cp1256-mapper bits))) - (if cp1256-byte - (setf (sap-ref-8 sap tail) cp1256-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1256->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1256->string-aref - string->cp1256) ;; TODO -- error check - -(define-unibyte-mapper cp1257->code-mapper code->cp1257-mapper +(define-unibyte-mapping-external-format :cp1257 + (:|cp1257| :windows-1257 :|windows-1257|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -958,49 +671,8 @@ (#xFF #x02D9) ; DOT ABOVE ) -(declaim (inline get-cp1257-bytes)) -(defun get-cp1257-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1257-mapper :cp1257 string pos)) - -(defun string->cp1257 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1257-bytes null-padding))) - -(defmacro define-cp1257->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1257->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1257->code-mapper))))) - -(instantiate-octets-definition define-cp1257->string*) - -(defmacro define-cp1257->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1257->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1257->code-mapper))) - -(instantiate-octets-definition define-cp1257->string) - -(define-external-format (:cp1257 :|cp1257| :windows-1257 :|windows-1257|) - 1 t - (let ((cp1257-byte (code->cp1257-mapper bits))) - (if cp1257-byte - (setf (sap-ref-8 sap tail) cp1257-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1257->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1257->string-aref - string->cp1257) ;; TODO -- error check - -(define-unibyte-mapper cp1258->code-mapper code->cp1258-mapper +(define-unibyte-mapping-external-format :cp1258 + (:|cp1258| :windows-1258 :|windows-1258|) (#x80 #x20AC) ; EURO SIGN (#x81 nil) (#x82 #x201A) ; SINGLE LOW-9 QUOTATION MARK @@ -1048,46 +720,3 @@ (#xFD #x01B0) ; LATIN SMALL LETTER U WITH HORN (#xFE #x20AB) ; DONG SIGN ) - -(declaim (inline get-cp1258-bytes)) -(defun get-cp1258-bytes (string pos) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range pos)) - (get-latin-bytes #'code->cp1258-mapper :cp1258 string pos)) - -(defun string->cp1258 (string sstart send null-padding) - (declare (optimize speed (safety 0)) - (type simple-string string) - (type array-range sstart send)) - (values (string->latin% string sstart send #'get-cp1258-bytes null-padding))) - -(defmacro define-cp1258->string* (accessor type) - (declare (ignore type)) - (let ((name (make-od-name 'cp1258->string* accessor))) - `(progn - (defun ,name (string sstart send array astart aend) - (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'cp1258->code-mapper))))) - -(instantiate-octets-definition define-cp1258->string*) - -(defmacro define-cp1258->string (accessor type) - (declare (ignore type)) - `(defun ,(make-od-name 'cp1258->string accessor) (array astart aend) - (,(make-od-name 'latin->string accessor) array astart aend #'cp1258->code-mapper))) - -(instantiate-octets-definition define-cp1258->string) - -(define-external-format (:cp1258 :|cp1258| :windows-1258 :|windows-1258|) - 1 t - (let ((cp1258-byte (code->cp1258-mapper bits))) - (if cp1258-byte - (setf (sap-ref-8 sap tail) cp1258-byte) - (external-format-encoding-error stream bits))) - (let ((code (cp1258->code-mapper byte))) - (if code - (code-char code) - (external-format-decoding-error stream byte))) - cp1258->string-aref - string->cp1258) ;; TODO -- error check - diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 534c42c..0761456 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -505,11 +505,6 @@ (stream-encoding-error-and-handle stream code) (c-string-encoding-error stream code))) -(defun external-format-decoding-error (stream octet-count) - (if (streamp stream) - (stream-decoding-error stream octet-count) - (c-string-decoding-error stream octet-count))) - (defun synchronize-stream-output (stream) ;; If we're reading and writing on the same file, flush buffered ;; input and rewind file position accordingly. @@ -1336,198 +1331,69 @@ (defun bytes-for-char-fun (ef-entry) (if ef-entry (ef-bytes-for-char-fun ef-entry) (constantly 1))) -(defmacro define-external-format (external-format size output-restart - out-expr in-expr - octets-to-string-sym - string-to-octets-sym) - (let* ((name (first external-format)) - (out-function (symbolicate "OUTPUT-BYTES/" name)) - (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" (string name))) - (in-function (symbolicate "FD-STREAM-READ-N-CHARACTERS/" name)) - (in-char-function (symbolicate "INPUT-CHAR/" name)) - (size-function (symbolicate "BYTES-FOR-CHAR/" name)) - (read-c-string-function (symbolicate "READ-FROM-C-STRING/" name)) - (output-c-string-function (symbolicate "OUTPUT-TO-C-STRING/" name)) - (n-buffer (gensym "BUFFER"))) +(defmacro define-unibyte-mapping-external-format + (canonical-name (&rest other-names) &body exceptions) + (let ((->code-name (symbolicate canonical-name '->code-mapper)) + (code->-name (symbolicate 'code-> canonical-name '-mapper)) + (get-bytes-name (symbolicate 'get- canonical-name '-bytes)) + (string->-name (symbolicate 'string-> canonical-name)) + (define-string*-name (symbolicate 'define- canonical-name '->string*)) + (string*-name (symbolicate canonical-name '->string*)) + (define-string-name (symbolicate 'define- canonical-name '->string)) + (string-name (symbolicate canonical-name '->string)) + (->string-aref-name (symbolicate canonical-name '->string-aref))) `(progn - (defun ,size-function (byte) - (declare (ignore byte)) - ,size) - (defun ,out-function (stream string flush-p start end) - (let ((start (or start 0)) - (end (or end (length string)))) - (declare (type index start end)) - (synchronize-stream-output stream) - (unless (<= 0 start end (length string)) - (sequence-bounding-indices-bad-error string start end)) - (do () - ((= end start)) - (let ((obuf (fd-stream-obuf stream))) - (string-dispatch (simple-base-string - #!+sb-unicode - (simple-array character (*)) - string) - string - (let ((sap (buffer-sap obuf)) - (len (buffer-length obuf)) - ;; FIXME: rename - (tail (buffer-tail obuf))) - (declare (type index tail) - ;; STRING bounds have already been checked. - (optimize (safety 0))) - (,@(if output-restart - `(catch 'output-nothing) - `(progn)) - (do* () - ((or (= start end) (< (- len tail) 4))) - (let* ((byte (aref string start)) - (bits (char-code byte))) - ,out-expr - (incf tail ,size) - (setf (buffer-tail obuf) tail) - (incf start))) - ;; Exited from the loop normally - (go flush)) - ;; Exited via CATCH. Skip the current character. - (incf start)))) - flush - (when (< start end) - (flush-output-buffer stream))) - (when flush-p - (flush-output-buffer stream)))) - (def-output-routines (,format - ,size - ,output-restart - (:none character) - (:line character) - (:full character)) - (if (eql byte #\Newline) - (setf (fd-stream-char-pos stream) 0) - (incf (fd-stream-char-pos stream))) - (let* ((obuf (fd-stream-obuf stream)) - (bits (char-code byte)) - (sap (buffer-sap obuf)) - (tail (buffer-tail obuf))) - ,out-expr)) - (defun ,in-function (stream buffer start requested eof-error-p - &aux (index start) (end (+ start requested))) - (declare (type fd-stream stream) - (type index start requested index end) - (type - (simple-array character (#.+ansi-stream-in-buffer-length+)) - buffer)) - (when (fd-stream-eof-forced-p stream) - (setf (fd-stream-eof-forced-p stream) nil) - (return-from ,in-function 0)) - (do ((instead (fd-stream-instead stream))) - ((= (fill-pointer instead) 0) - (setf (fd-stream-listen stream) nil)) - (setf (aref buffer index) (vector-pop instead)) - (incf index) - (when (= index end) - (return-from ,in-function (- index start)))) - (do () - (nil) - (let* ((ibuf (fd-stream-ibuf stream)) - (head (buffer-head ibuf)) - (tail (buffer-tail ibuf)) - (sap (buffer-sap ibuf))) - (declare (type index head tail) - (type system-area-pointer sap)) - ;; Copy data from stream buffer into user's buffer. - (dotimes (i (min (truncate (- tail head) ,size) - (- end index))) - (declare (optimize speed)) - (let* ((byte (sap-ref-8 sap head))) - (setf (aref buffer index) ,in-expr) - (incf index) - (incf head ,size))) - (setf (buffer-head ibuf) head) - ;; Maybe we need to refill the stream buffer. - (cond ( ;; If there was enough data in the stream buffer, we're done. - (= index end) - (return (- index start))) - ( ;; If EOF, we're done in another way. - (null (catch 'eof-input-catcher (refill-input-buffer stream))) - (if eof-error-p - (error 'end-of-file :stream stream) - (return (- index start)))) - ;; Otherwise we refilled the stream buffer, so fall - ;; through into another pass of the loop. - )))) - (def-input-routine ,in-char-function (character ,size sap head) - (let ((byte (sap-ref-8 sap head))) - ,in-expr)) - (defun ,read-c-string-function (sap element-type) - (declare (type system-area-pointer sap) - (type (member character base-char) element-type)) - (locally - (declare (optimize (speed 3) (safety 0))) - (let* ((stream ,name) - (length - (loop for head of-type index upfrom 0 by ,size - for count of-type index upto (1- array-dimension-limit) - for byte = (sap-ref-8 sap head) - for char of-type character = ,in-expr - until (zerop (char-code char)) - finally (return count))) - ;; Inline the common cases - (string (make-string length :element-type element-type))) - (declare (ignorable stream) - (type index length) - (type simple-string string)) - (/show0 before-copy-loop) - (loop for head of-type index upfrom 0 by ,size - for index of-type index below length - for byte = (sap-ref-8 sap head) - for char of-type character = ,in-expr - do (setf (aref string index) char)) - string))) ;; last loop rewrite to dotimes? - (defun ,output-c-string-function (string) - (declare (type simple-string string)) - (locally - (declare (optimize (speed 3) (safety 0))) - (let* ((length (length string)) - (,n-buffer (make-array (* (1+ length) ,size) - :element-type '(unsigned-byte 8))) - (tail 0) - (stream ,name)) - (declare (type index length tail)) - (with-pinned-objects (,n-buffer) - (let ((sap (vector-sap ,n-buffer))) - (declare (system-area-pointer sap)) - (dotimes (i length) - (let* ((byte (aref string i)) - (bits (char-code byte))) - (declare (ignorable byte bits)) - ,out-expr) - (incf tail ,size)) - (let* ((bits 0) - (byte (code-char bits))) - (declare (ignorable bits byte)) - ,out-expr))) - ,n-buffer))) - (let ((entry (%make-external-format - :names ',external-format - :read-n-chars-fun #',in-function - :read-char-fun #',in-char-function - :write-n-bytes-fun #',out-function - ,@(mapcan #'(lambda (buffering) - (list (intern (format nil "WRITE-CHAR-~A-BUFFERED-FUN" buffering) :keyword) - `#',(intern (format nil format (string buffering))))) - '(:none :line :full)) - :resync-fun nil - :bytes-for-char-fun #',size-function - :read-c-string-fun #',read-c-string-function - :write-c-string-fun #',output-c-string-function - :octets-to-string-fun (lambda (&rest rest) - (declare (dynamic-extent rest)) - (apply ',octets-to-string-sym rest)) - :string-to-octets-fun (lambda (&rest rest) - (declare (dynamic-extent rest)) - (apply ',string-to-octets-sym rest))))) - (dolist (ef ',external-format) - (setf (gethash ef *external-formats*) entry)))))) + (define-unibyte-mapper ,->code-name ,code->-name + ,@exceptions) + (declaim (inline ,get-bytes-name)) + (defun ,get-bytes-name (string pos) + (declare (optimize speed (safety 0)) + (type simple-string string) + (type array-range pos)) + (get-latin-bytes #',code->-name ,canonical-name string pos)) + (defun ,string->-name (string sstart send null-padding) + (declare (optimize speed (safety 0)) + (type simple-string string) + (type array-range sstart send)) + (values (string->latin% string sstart send #',get-bytes-name null-padding))) + (defmacro ,define-string*-name (accessor type) + (declare (ignore type)) + (let ((name (make-od-name ',string*-name accessor))) + `(progn + (defun ,name (string sstart send array astart aend) + (,(make-od-name 'latin->string* accessor) + string sstart send array astart aend #',',->code-name))))) + (instantiate-octets-definition ,define-string*-name) + (defmacro ,define-string-name (accessor type) + (declare (ignore type)) + (let ((name (make-od-name ',string-name accessor))) + `(progn + (defun ,name (array astart aend) + (,(make-od-name 'latin->string accessor) + array astart aend #',',->code-name))))) + (instantiate-octets-definition ,define-string-name) + (define-unibyte-external-format ,canonical-name ,other-names + (let ((octet (,code->-name bits))) + (if octet + (setf (sap-ref-8 sap tail) octet) + (external-format-encoding-error stream bits))) + (let ((code (,->code-name byte))) + (if code + (code-char code) + (return-from decode-break-reason 1))) + ,->string-aref-name + ,string->-name)))) + +(defmacro define-unibyte-external-format + (canonical-name (&rest other-names) + out-form in-form octets-to-string-symbol string-to-octets-symbol) + `(define-external-format/variable-width (,canonical-name ,@other-names) + t 1 + ,out-form + 1 + ,in-form + ,octets-to-string-symbol + ,string-to-octets-symbol)) (defmacro define-external-format/variable-width (external-format output-restart out-size-expr diff --git a/tests/external-format.impure.lisp b/tests/external-format.impure.lisp index ee8aed1..9311320 100644 --- a/tests/external-format.impure.lisp +++ b/tests/external-format.impure.lisp @@ -374,5 +374,478 @@ (handler-case (read-char s) (error () (assert (member i '(#xd5 #xe7 #xf2)))) (:no-error (char) (assert (not (member i '(#xd5 #xe7 #xf2))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-input-replacement :cp857)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:cp857 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert (member i `(,(char-code #\?) #xd5 #xe7 #xf2)))) + (t (assert (not (member i `(,(char-code #\?) #xd5 #xe7 #xf2)))))))))) +(delete-file *test-path*) +(with-test (:name (:unibyte-output-replacement :cp857)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:cp857 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:cp857)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i 128) + (assert (= (char-code (char string i)) i))) + (assert (= 38 (count #\? string :start 128)))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-input-replacement :ascii)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:ascii :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert (or (= i (char-code #\?)) (> i 127)))) + (t (assert (and (< i 128) (not (= i (char-code #\?))))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :ascii)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:ascii :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:ascii)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i 128) + (assert (= (char-code (char string i)) i))) + (assert (= 128 (count #\? string :start 128)))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-input-replacement :latin-1)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-1 :replacement #\?)) + (let ((char (read-char s))) + (assert (= (char-code char) i)))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-1)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-1 :replacement #\?)) + (dotimes (i 257) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-1)) + (let ((string (make-string 257))) + (read-sequence string s) + (dotimes (i 256) + (assert (= (char-code (char string i)) i))) + (assert (char= #\? (char string 256)))))) +(delete-file *test-path*) + +;;; latin-2 tests +(with-test (:name (:unibyte-input-replacement :latin-2)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-2 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((< i #xa1) (assert (= (char-code char) i))) + ;; FIXME: more tests + ))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-2)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-2 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-2)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 57 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-3 tests +(with-test (:name (:unibyte-input-replacement :latin-3)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-3 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert #1=(or (= i (char-code #\?)) + (member i '(#xa5 #xae #xbe #xc3 #xd0 #xe3 #xf0))))) + (t (assert (not #1#)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-3)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-3 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-3)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 35 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-4 tests +(with-test (:name (:unibyte-input-replacement :latin-4)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-4 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((< i #xa1) (assert (= (char-code char) i))) + ;; FIXME: more tests + ))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-4)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-4 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-4)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 50 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; iso-8859-5 tests +(with-test (:name (:unibyte-input-replacement :iso-8859-5)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:iso-8859-5 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((= (char-code char) i) + (assert (or (< i #xa1) (= i #xad)))) + (t (assert (and (>= i #xa1) (/= i #xad))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :iso-8859-5)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:iso-8859-5 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:iso-8859-5)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 93 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; iso-8859-6 tests +(with-test (:name (:unibyte-input-replacement :iso-8859-6)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:iso-8859-6 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert #1=(or (= i (char-code #\?)) + (<= #xa1 i #xa3) (<= #xa5 i #xab) (<= #xae i #xba) + (<= #xbc i #xbe) (= i #xc0) (<= #xdb i #xdf) + (<= #xf3 i)))) + (t (assert (not #1#)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :iso-8859-6)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:iso-8859-6 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:iso-8859-6)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 93 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; iso-8859-7 tests +(with-test (:name (:unibyte-input-replacement :iso-8859-7)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:iso-8859-7 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert #1=(or (= i (char-code #\?)) + (member i '(#xa4 #xa5 #xaa #xae #xd2 #xff))))) + (t (assert (not #1#)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :iso-8859-7)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:iso-8859-7 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:iso-8859-7)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 80 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; iso-8859-8 tests +(with-test (:name (:unibyte-input-replacement :iso-8859-8)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:iso-8859-8 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert #1=(or (= i (char-code #\?)) + (= i #xa1) (<= #xbf i #xde) (>= i #xfb)))) + (t (assert (not #1#)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :iso-8859-8)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:iso-8859-8 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:iso-8859-8)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 67 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-5 tests +(with-test (:name (:unibyte-input-replacement :latin-5)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-5 :replacement #\?)) + (let ((char (read-char s))) + (assert (or (and (= (char-code char) i) + (not (member i '(#xd0 #xdd #xde #xf0 #xfd #xfe)))) + (and (member i '(#xd0 #xdd #xde #xf0 #xfd #xfe)) + (not (char= char #\?))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-5)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-5 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-5)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xd0) + (assert (= (char-code (char string i)) i))) + (assert (= 6 (count #\? string :start #xd0)))))) +(delete-file *test-path*) + +;;; latin-6 tests +(with-test (:name (:unibyte-input-replacement :latin-6)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-6 :replacement #\?)) + (let ((char (read-char s))) + (assert (or (= (char-code char) i) + (and (<= #xa1 i #xff) + (not (char= char #\?))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-6)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-6 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-6)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 46 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; iso-8859-11 tests +(with-test (:name (:unibyte-input-replacement :iso-8859-11)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:iso-8859-11 :replacement #\?)) + (let ((char (read-char s))) + (cond + ((eq char #\?) + (assert (member i #1=`(,(char-code #\?) #xdb #xdc #xdd #xde #xfc #xfd #xfe #xff)))) + (t (assert (not (member i #1#))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :iso-8859-11)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:iso-8859-11 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:iso-8859-11)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 95 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-7 tests +(with-test (:name (:unibyte-input-replacement :latin-7)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-7 :replacement #\?)) + (let ((char (read-char s))) + (assert (or (= (char-code char) i) + (and (<= #xa1 i #xff) + (not (char= char #\?))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-7)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-7 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-7)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (dolist (i '(#xd8 #xc6 #xf8 #xe6)) + (assert (char/= (char string i) #\?))) + (assert (= 52 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-8 tests +(with-test (:name (:unibyte-input-replacement :latin-8)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-8 :replacement #\?)) + (let ((char (read-char s))) + (assert (or (= (char-code char) i) + (and (<= #xa1 i #xfe) + (not (char= char #\?))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-8)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-8 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-8)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa1) + (assert (= (char-code (char string i)) i))) + (assert (= 31 (count #\? string :start #xa1)))))) +(delete-file *test-path*) + +;;; latin-9 tests +(with-test (:name (:unibyte-input-replacement :latin-9)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:latin-9 :replacement #\?)) + (let ((char (read-char s))) + (assert (or (and (= (char-code char) i) + (not (member i '(#xa4 #xa6 #xa8 #xb4 #xb8 #xbc #xbd #xbe)))) + (and (member i '(#xa4 #xa6 #xa8 #xb4 #xb8 #xbc #xbd #xbe)) + (not (char= char #\?))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :latin-9)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:latin-9 :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:latin-9)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #xa4) + (assert (= (char-code (char string i)) i))) + (assert (= 8 (count #\? string :start #xa4)))))) +(delete-file *test-path*) + +;;; koi8-r tests +(with-test (:name (:unibyte-input-replacement :koi8-r)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:koi8-r :replacement #\?)) + (let ((char (read-char s))) + (cond ((= (char-code char) i) + (assert (< i 128))) + (t (assert (> i 127)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :koi8-r)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:koi8-r :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:koi8-r)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #x80) + (assert (= (char-code (char string i)) i))) + (assert (= 122 (count #\? string :start #x80)))))) +(delete-file *test-path*) + +;;; koi8-u tests +(with-test (:name (:unibyte-input-replacement :koi8-u)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:koi8-u :replacement #\?)) + (let ((char (read-char s))) + (cond ((= (char-code char) i) + (assert (< i 128))) + (t (assert (> i 127)))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :koi8-u)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:koi8-u :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:koi8-u)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #x80) + (assert (= (char-code (char string i)) i))) + (assert (= 122 (count #\? string :start #x80)))))) +(delete-file *test-path*) + +;;; x-mac-cyrillic tests +(with-test (:name (:unibyte-input-replacement :x-mac-cyrillic)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (with-open-file (s *test-path* :external-format '(:x-mac-cyrillic :replacement #\?)) + (let ((char (read-char s))) + (cond ((= (char-code char) i) + (assert (or (< i 128) (member i '(#xa2 #xa3 #xa9 #xb1 #xb5))))) + (t (assert (and (> i 127) + (not (member i '(#xa2 #xa3 #xa9 #xb1 #xb5))))))))))) +(delete-file *test-path*) + +(with-test (:name (:unibyte-output-replacement :x-mac-cyrillic)) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :external-format '(:x-mac-cyrillic :replacement #\?)) + (dotimes (i 256) + (write-char (code-char i) s))) + (with-open-file (s *test-path* :external-format '(:x-mac-cyrillic)) + (let ((string (make-string 256))) + (read-sequence string s) + (dotimes (i #x80) + (assert (= (char-code (char string i)) i))) + (assert (= 113 (count #\? string :start #x80)))))) +(delete-file *test-path*) + ;;;; success diff --git a/version.lisp-expr b/version.lisp-expr index 6ac0cbb..eb84f98 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.32.20" +"1.0.32.21" -- 1.7.10.4