From ba008a77f32b2ef30e8492757afb8b54731c76cf Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Sun, 22 Dec 2013 18:42:43 -0500 Subject: [PATCH] external-formats: Add support for MacRoman character encoding. * This is rarely used these days, having been supplanted by the use of UTF-8 with the adoption of OSX, but it is occasionally seen "in the wild", or if someone has an interoperability concern with antique macs. I was rather surprised to encounter a CSV file recently, with data for an event in May of 2013, that happened to be MacRoman encoded, and then further surprised to find that SBCL had no support for the encoding. * I have tested this interactively only, using OCTETS-TO-STRING to verify that a sequence of the upper half of the 8-bit encoding space was correctly mapped to unicode characters. --- NEWS | 1 + build-order.lisp-expr | 2 + src/code/external-formats/enc-mac.lisp | 128 ++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 src/code/external-formats/enc-mac.lisp diff --git a/NEWS b/NEWS index a46e058..75355ee 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ ;;;; -*- coding: utf-8; fill-column: 78 -*- changes relative to sbcl-1.1.14: + * improvement: support for "Mac Roman" external format. * new feature: the iterative spilling/coloring register allocator developed by Alexandra Barchunova during Google Summer of Code 2013 is now merged in. By default, it only activates for functions that optimize with diff --git a/build-order.lisp-expr b/build-order.lisp-expr index 70648ee..afc2770 100644 --- a/build-order.lisp-expr +++ b/build-order.lisp-expr @@ -705,6 +705,8 @@ #!+sb-unicode ("src/code/external-formats/enc-win" :not-host) #!+sb-unicode + ("src/code/external-formats/enc-mac" :not-host) + #!+sb-unicode ("src/code/external-formats/mb-util" :not-host) #!+sb-unicode ("src/code/external-formats/enc-cn-tbl" :not-host) diff --git a/src/code/external-formats/enc-mac.lisp b/src/code/external-formats/enc-mac.lisp new file mode 100644 index 0000000..8ad4fe3 --- /dev/null +++ b/src/code/external-formats/enc-mac.lisp @@ -0,0 +1,128 @@ +(in-package "SB!IMPL") + +(define-unibyte-mapping-external-format :mac-roman + (:|mac-roman| :|MacRoman| :mac :|mac| :macintosh :|macintosh|) + (#x80 #x00C4) ; LATIN CAPITAL LETTER A WITH DIAERESIS + (#x81 #x00C5) ; LATIN CAPITAL LETTER A WITH RING ABOVE + (#x82 #x00C7) ; LATIN CAPITAL LETTER C WITH CEDILLA + (#x83 #x00C9) ; LATIN CAPITAL LETTER E WITH ACUTE + (#x84 #x00D1) ; LATIN CAPITAL LETTER N WITH TILDE + (#x85 #x00D6) ; LATIN CAPITAL LETTER O WITH DIAERESIS + (#x86 #x00DC) ; LATIN CAPITAL LETTER U WITH DIAERESIS + (#x87 #x00E1) ; LATIN SMALL LETTER A WITH ACUTE + (#x88 #x00E0) ; LATIN SMALL LETTER A WITH GRAVE + (#x89 #x00E2) ; LATIN SMALL LETTER A WITH CIRCUMFLEX + (#x8A #x00E4) ; LATIN SMALL LETTER A WITH DIAERESIS + (#x8B #x00E3) ; LATIN SMALL LETTER A WITH TILDE + (#x8C #x00E5) ; LATIN SMALL LETTER A WITH RING ABOVE + (#x8D #x00E7) ; LATIN SMALL LETTER C WITH CEDILLA + (#x8E #x00E9) ; LATIN SMALL LETTER E WITH ACUTE + (#x8F #x00E8) ; LATIN SMALL LETTER E WITH GRAVE + (#x90 #x00EA) ; LATIN SMALL LETTER E WITH CIRCUMFLEX + (#x91 #x00EB) ; LATIN SMALL LETTER E WITH DIAERESIS + (#x92 #x00ED) ; LATIN SMALL LETTER I WITH ACUTE + (#x93 #x00EC) ; LATIN SMALL LETTER I WITH GRAVE + (#x94 #x00EE) ; LATIN SMALL LETTER I WITH CIRCUMFLEX + (#x95 #x00EF) ; LATIN SMALL LETTER I WITH DIAERESIS + (#x96 #x00F1) ; LATIN SMALL LETTER N WITH TILDE + (#x97 #x00F3) ; LATIN SMALL LETTER O WITH ACUTE + (#x98 #x00F2) ; LATIN SMALL LETTER O WITH GRAVE + (#x99 #x00F4) ; LATIN SMALL LETTER O WITH CIRCUMFLEX + (#x9A #x00F6) ; LATIN SMALL LETTER O WITH DIAERESIS + (#x9B #x00F5) ; LATIN SMALL LETTER O WITH TILDE + (#x9C #x00FA) ; LATIN SMALL LETTER U WITH ACUTE + (#x9D #x00F9) ; LATIN SMALL LETTER U WITH GRAVE + (#x9E #x00FB) ; LATIN SMALL LETTER U WITH CIRCUMFLEX + (#x9F #x00FC) ; LATIN SMALL LETTER U WITH DIAERESIS + (#xA0 #x2020) ; DAGGER + (#xA1 #x00B0) ; DEGREE SIGN + (#xA4 #x00A7) ; SECTION SIGN + (#xA5 #x2022) ; BULLET + (#xA6 #x00B6) ; PILCROW SIGN + (#xA7 #x00DF) ; LATIN SMALL LETTER SHARP S + (#xA8 #x00AE) ; REGISTERED SIGN + (#xAA #x2122) ; TRADE MARK SIGN + (#xAB #x00B4) ; ACUTE ACCENT + (#xAC #x00A8) ; DIAERESIS + (#xAD #x2260) ; NOT EQUAL TO + (#xAE #x00C6) ; LATIN CAPITAL LETTER AE + (#xAF #x00D8) ; LATIN CAPITAL LETTER O WITH STROKE + (#xB0 #x221E) ; INFINITY + (#xB2 #x2264) ; LESS-THAN OR EQUAL TO + (#xB3 #x2265) ; GREATER-THAN OR EQUAL TO + (#xB4 #x00A5) ; YEN SIGN + (#xB6 #x2202) ; PARTIAL DIFFERENTIAL + (#xB7 #x2211) ; N-ARY SUMMATION + (#xB8 #x220F) ; N-ARY PRODUCT + (#xB9 #x03C0) ; GREEK SMALL LETTER PI + (#xBA #x222B) ; INTEGRAL + (#xBB #x00AA) ; FEMININE ORDINAL INDICATOR + (#xBC #x00BA) ; MASCULINE ORDINAL INDICATOR + (#xBD #x03A9) ; GREEK CAPITAL LETTER OMEGA + (#xBE #x00E6) ; LATIN SMALL LETTER AE + (#xBF #x00F8) ; LATIN SMALL LETTER O WITH STROKE + (#xC0 #x00BF) ; INVERTED QUESTION MARK + (#xC1 #x00A1) ; INVERTED EXCLAMATION MARK + (#xC2 #x00AC) ; NOT SIGN + (#xC3 #x221A) ; SQUARE ROOT + (#xC4 #x0192) ; LATIN SMALL LETTER F WITH HOOK + (#xC5 #x2248) ; ALMOST EQUAL TO + (#xC6 #x2206) ; INCREMENT + (#xC7 #x00AB) ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xC8 #x00BB) ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xC9 #x2026) ; HORIZONTAL ELLIPSIS + (#xCA #x00A0) ; NO-BREAK SPACE + (#xCB #x00C0) ; LATIN CAPITAL LETTER A WITH GRAVE + (#xCC #x00C3) ; LATIN CAPITAL LETTER A WITH TILDE + (#xCD #x00D5) ; LATIN CAPITAL LETTER O WITH TILDE + (#xCE #x0152) ; LATIN CAPITAL LIGATURE OE + (#xCF #x0153) ; LATIN SMALL LIGATURE OE + (#xD0 #x2013) ; EN DASH + (#xD1 #x2014) ; EM DASH + (#xD2 #x201C) ; LEFT DOUBLE QUOTATION MARK + (#xD3 #x201D) ; RIGHT DOUBLE QUOTATION MARK + (#xD4 #x2018) ; LEFT SINGLE QUOTATION MARK + (#xD5 #x2019) ; RIGHT SINGLE QUOTATION MARK + (#xD6 #x00F7) ; DIVISION SIGN + (#xD7 #x25CA) ; LOZENGE + (#xD8 #x00FF) ; LATIN SMALL LETTER Y WITH DIAERESIS + (#xD9 #x0178) ; LATIN CAPITAL LETTER Y WITH DIAERESIS + (#xDA #x2044) ; FRACTION SLASH + (#xDB #x20AC) ; EURO SIGN + (#xDC #x2039) ; SINGLE LEFT-POINTING ANGLE QUOTATION MARK + (#xDD #x203A) ; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + (#xDE #xFB01) ; LATIN SMALL LIGATURE FI + (#xDF #xFB02) ; LATIN SMALL LIGATURE FL + (#xE0 #x2021) ; DOUBLE DAGGER + (#xE1 #x00B7) ; MIDDLE DOT + (#xE2 #x201A) ; SINGLE LOW-9 QUOTATION MARK + (#xE3 #x201E) ; DOUBLE LOW-9 QUOTATION MARK + (#xE4 #x2030) ; PER MILLE SIGN + (#xE5 #x00C2) ; LATIN CAPITAL LETTER A WITH CIRCUMFLEX + (#xE6 #x00CA) ; LATIN CAPITAL LETTER E WITH CIRCUMFLEX + (#xE7 #x00C1) ; LATIN CAPITAL LETTER A WITH ACUTE + (#xE8 #x00CB) ; LATIN CAPITAL LETTER E WITH DIAERESIS + (#xE9 #x00C8) ; LATIN CAPITAL LETTER E WITH GRAVE + (#xEA #x00CD) ; LATIN CAPITAL LETTER I WITH ACUTE + (#xEB #x00CE) ; LATIN CAPITAL LETTER I WITH CIRCUMFLEX + (#xEC #x00CF) ; LATIN CAPITAL LETTER I WITH DIAERESIS + (#xED #x00CC) ; LATIN CAPITAL LETTER I WITH GRAVE + (#xEE #x00D3) ; LATIN CAPITAL LETTER O WITH ACUTE + (#xEF #x00D4) ; LATIN CAPITAL LETTER O WITH CIRCUMFLEX + (#xF0 #xF8FF) ; (solid Apple logo) + (#xF1 #x00D2) ; LATIN CAPITAL LETTER O WITH GRAVE + (#xF2 #x00DA) ; LATIN CAPITAL LETTER U WITH ACUTE + (#xF3 #x00DB) ; LATIN CAPITAL LETTER U WITH CIRCUMFLEX + (#xF4 #x00D9) ; LATIN CAPITAL LETTER U WITH GRAVE + (#xF5 #x0131) ; LATIN SMALL LETTER DOTLESS I + (#xF6 #x02C6) ; MODIFIER LETTER CIRCUMFLEX ACCENT + (#xF7 #x02DC) ; SMALL TILDE + (#xF8 #x00AF) ; MACRON + (#xF9 #x02D8) ; BREVE + (#xFA #x02D9) ; DOT ABOVE + (#xFB #x02DA) ; RING ABOVE + (#xFC #x00B8) ; CEDILLA + (#xFD #x02DD) ; DOUBLE ACUTE ACCENT + (#xFE #x02DB) ; OGONEK + (#xFF #x02C7) ; CARON +) -- 1.7.10.4