0.8.16.25:
authorChristophe Rhodes <csr21@cam.ac.uk>
Tue, 2 Nov 2004 08:37:50 +0000 (08:37 +0000)
committerChristophe Rhodes <csr21@cam.ac.uk>
Tue, 2 Nov 2004 08:37:50 +0000 (08:37 +0000)
Merge the rest of character_branch under #!+sb-unicode
... untested with #!+sb-unicode, but it seems to work OK without.
One more build/test cycle to go.

This patch brought to you by --ifdef

33 files changed:
package-data-list.lisp-expr
src/code/array.lisp
src/code/char.lisp
src/code/class.lisp
src/code/fd-stream.lisp
src/code/fop.lisp
src/code/interr.lisp
src/code/late-type.lisp
src/code/print.lisp
src/code/room.lisp
src/code/seq.lisp
src/code/stream.lisp
src/compiler/alpha/array.lisp
src/compiler/dump.lisp
src/compiler/generic/early-objdef.lisp
src/compiler/generic/genesis.lisp
src/compiler/generic/interr.lisp
src/compiler/generic/late-type-vops.lisp
src/compiler/generic/vm-array.lisp
src/compiler/generic/vm-fndb.lisp
src/compiler/generic/vm-tran.lisp
src/compiler/generic/vm-typetran.lisp
src/compiler/hppa/array.lisp
src/compiler/mips/array.lisp
src/compiler/ppc/array.lisp
src/compiler/seqtran.lisp
src/compiler/sparc/array.lisp
src/compiler/target-dump.lisp
src/compiler/typetran.lisp
src/compiler/x86/array.lisp
src/compiler/x86/char.lisp
src/compiler/x86/vm.lisp
version.lisp-expr

index 303bdb7..e8c2302 100644 (file)
@@ -1123,14 +1123,17 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "ARRAY-TYPE" "ARRAY-TYPE-COMPLEXP"
                "ARRAY-TYPE-DIMENSIONS" "ARRAY-TYPE-ELEMENT-TYPE"
                "ARRAY-TYPE-P" "ARRAY-TYPE-SPECIALIZED-ELEMENT-TYPE"
-               "ASH-INDEX" "ASSERT-ERROR" "BASE-STRING-P"
+               "ASH-INDEX" "ASSERT-ERROR"
+               #!+sb-unicode "BASE-CHAR-P"
+               "BASE-STRING-P"
                "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX"
                "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE"
                "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT"
                "CALLABLE" "CASE-BODY-ERROR"
-              "CHARACTER-SET" "CHARACTER-SET-TYPE"
-              "CHARACTER-SET-TYPE-PAIRS"
-              "CHARPOS"
+               "CHARACTER-SET" "CHARACTER-SET-TYPE"
+               "CHARACTER-SET-TYPE-PAIRS"
+              #!+sb-unicode "CHARACTER-STRING-P"
+               "CHARPOS"
                "CHECK-FOR-CIRCULARITY" "CHECK-TYPE-ERROR" "CLOSED-FLAME"
                "CODE-COMPONENT" "CODE-COMPONENT-P" "CODE-DEBUG-INFO"
                "CODE-HEADER-REF" "CODE-HEADER-SET" "CODE-INSTRUCTIONS"
@@ -1233,7 +1236,9 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "NUMERIC-TYPE-HIGH" "NUMERIC-TYPE-LOW" "NUMERIC-TYPE-P"
                "OBJECT-NOT-ARRAY-ERROR" "OBJECT-NOT-CHARACTER-ERROR"
                "OBJECT-NOT-BASE-STRING-ERROR" "OBJECT-NOT-BIGNUM-ERROR"
-               "OBJECT-NOT-BIT-VECTOR-ERROR" "OBJECT-NOT-COMPLEX-ERROR"
+               "OBJECT-NOT-BIT-VECTOR-ERROR"
+              #!+sb-unicode "OBJECT-NOT-CHARACTER-STRING-ERROR"
+               "OBJECT-NOT-COMPLEX-ERROR"
                "OBJECT-NOT-COMPLEX-FLOAT-ERROR"
                "OBJECT-NOT-COMPLEX-SINGLE-FLOAT-ERROR"
                #!+long-float "OBJECT-NOT-COMPLEX-LONG-FLOAT-ERROR"
@@ -1299,6 +1304,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "OBJECT-NOT-SIMPLE-ARRAY-SIGNED-BYTE-8-ERROR"
                "OBJECT-NOT-SIMPLE-BIT-VECTOR-ERROR"
                "OBJECT-NOT-SIMPLE-BASE-STRING-ERROR"
+              #!+sb-unicode "OBJECT-NOT-SIMPLE-CHARACTER-STRING-ERROR"
                "OBJECT-NOT-SIMPLE-STRING-ERROR"
                "OBJECT-NOT-SIMPLE-VECTOR-ERROR"
                "OBJECT-NOT-SINGLE-FLOAT-ERROR" "OBJECT-NOT-STRING-ERROR"
@@ -1307,7 +1313,9 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "OBJECT-NOT-UNSIGNED-BYTE-32-ERROR"
                ;; FIXME: 32/64-bit issues
                "OBJECT-NOT-UNSIGNED-BYTE-64-ERROR"
-               "OBJECT-NOT-VECTOR-ERROR" "OBJECT-NOT-WEAK-POINTER-ERROR"
+               "OBJECT-NOT-VECTOR-ERROR"
+              "OBJECT-NOT-VECTOR-NIL-ERROR"
+              "OBJECT-NOT-WEAK-POINTER-ERROR"
                "ODD-KEY-ARGS-ERROR" "OUTPUT-OBJECT" "OUTPUT-UGLY-OBJECT"
                "PACKAGE-DESIGNATOR" "PACKAGE-DOC-STRING"
                "PACKAGE-HASHTABLE-SIZE" "PACKAGE-HASHTABLE-FREE"
@@ -1354,6 +1362,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "SIMPLE-ARRAY-SIGNED-BYTE-61-P"
                "SIMPLE-ARRAY-SIGNED-BYTE-64-P"
                "SIMPLE-ARRAY-SIGNED-BYTE-8-P" "SIMPLE-BASE-STRING-P"
+              #!+sb-unicode "SIMPLE-CHARACTER-STRING-P"
                "SIMPLE-PACKAGE-ERROR" "SIMPLE-UNBOXED-ARRAY"
                "SINGLE-FLOAT-BITS" "SINGLE-FLOAT-EXPONENT"
                "SINGLE-FLOAT-INT-EXPONENT" "SINGLE-FLOAT-SIGNIFICAND"
@@ -1998,7 +2007,9 @@ structure representations"
               "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT"
               "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG"
               "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER"
-              "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" "COMPLEX-WIDETAG"
+              "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG"
+              #!+sb-unicode "COMPLEX-CHARACTER-STRING-WIDETAG"
+               "COMPLEX-WIDETAG"
               "COMPLEX-VECTOR-NIL-WIDETAG"
               "COMPLEX-VECTOR-WIDETAG" "CONS-CAR-SLOT" "CONS-CDR-SLOT"
               "CONS-SIZE" "CONSTANT-SC-NUMBER"
@@ -2138,6 +2149,7 @@ structure representations"
               "SIMPLE-ARRAY-SIGNED-BYTE-8-WIDETAG"
               "SIMPLE-BIT-VECTOR-WIDETAG"
               "SIMPLE-BASE-STRING-WIDETAG"
+              #!+sb-unicode "SIMPLE-CHARACTER-STRING-WIDETAG"
               "SIMPLE-VECTOR-WIDETAG" "SINGLE-FLOAT-BIAS"
               "SINGLE-FLOAT-DIGITS" "SINGLE-FLOAT-EXPONENT-BYTE"
               "SINGLE-FLOAT-HIDDEN-BIT" "SINGLE-FLOAT-NORMAL-EXPONENT-MAX"
index f463c5f..d3c09c1 100644 (file)
     ;; and for all in any reasonable user programs.)
     ((t)
      (values #.sb!vm:simple-vector-widetag #.sb!vm:n-word-bits))
-    ((base-char standard-char character)
+    ((base-char standard-char #!-sb-unicode character)
      (values #.sb!vm:simple-base-string-widetag #.sb!vm:n-byte-bits))
+    #!+sb-unicode
+    ((character)
+     (values #.sb!vm:simple-character-string-widetag #.sb!vm:n-word-bits))
     ((bit)
      (values #.sb!vm:simple-bit-vector-widetag 1))
     ;; OK, we have to wade into SUBTYPEPing after all.
     ;; Pick off some easy common cases.
     ((t)
      #.sb!vm:complex-vector-widetag)
-    ((base-char character)
+    ((base-char #!-sb-unicode character)
      #.sb!vm:complex-base-string-widetag)
+    #!+sb-unicode
+    ((character)
+     #.sb!vm:complex-character-string-widetag)
     ((nil)
      #.sb!vm:complex-vector-nil-widetag)
     ((bit)
     (t
      (pick-vector-type type
        (nil #.sb!vm:complex-vector-nil-widetag)
+       #!-sb-unicode
        (character #.sb!vm:complex-base-string-widetag)
+       #!+sb-unicode
+       (base-char #.sb!vm:complex-base-string-widetag)
+       #!+sb-unicode
+       (character #.sb!vm:complex-character-string-widetag)
        (bit #.sb!vm:complex-bit-vector-widetag)
        (t #.sb!vm:complex-vector-widetag)))))
 
                 (array (allocate-vector
                         type
                         length
-                        (ceiling (* (if (= type sb!vm:simple-base-string-widetag)
-                                        (1+ length)
-                                        length)
-                                    n-bits)
-                                 sb!vm:n-word-bits))))
+                        (ceiling
+                          (* (if (or (= type sb!vm:simple-base-string-widetag)
+                                     #!+sb-unicode
+                                     (= type
+                                        sb!vm:simple-character-string-widetag))
+                                 (1+ length)
+                                 length)
+                             n-bits)
+                          sb!vm:n-word-bits))))
            (declare (type index length))
            (when initial-element-p
              (fill array initial-element))
          ,@(map 'list
                 (lambda (saetp)
                   `((simple-array ,(sb!vm:saetp-specifier saetp) (*))
-                    ,(if (eq (sb!vm:saetp-specifier saetp) 'character)
+                    ,(if (or (eq (sb!vm:saetp-specifier saetp) 'character)
+                              #!+sb-unicode
+                             (eq (sb!vm:saetp-specifier saetp) 'base-char))
                          *default-init-char-form*
                          (sb!vm:saetp-initial-element-default saetp))))
                 (remove-if-not
index 0a89c81..56b3d2c 100644 (file)
@@ -12,8 +12,8 @@
 
 (in-package "SB!IMPL")
 
-(def!constant sb!xc:char-code-limit 256
+(def!constant sb!xc:char-code-limit #!-sb-unicode 256 #!+sb-unicode #x110000
   #!+sb-doc
   "the upper exclusive bound on values produced by CHAR-CODE")
 
-(def!constant base-char-code-limit 256)
+(def!constant base-char-code-limit #!-sb-unicode 256 #!+sb-unicode 128)
index d9db35f..57ef609 100644 (file)
@@ -1243,6 +1243,21 @@ NIL is returned when no such class exists."
       :inherits (base-string simple-string string vector simple-array
                 array sequence)
       :prototype-form (make-array 0 :element-type 'base-char))
+     #!+sb-unicode
+     (character-string
+      :translation (vector character)
+      :codes (#.sb!vm:complex-character-string-widetag)
+      :direct-superclasses (string)
+      :inherits (string vector array sequence)
+      :prototype-form (make-array 0 :element-type 'character :fill-pointer t))
+     #!+sb-unicode
+     (simple-character-string
+      :translation (simple-array character (*))
+      :codes (#.sb!vm:simple-character-string-widetag)
+      :direct-superclasses (character-string simple-string)
+      :inherits (character-string simple-string string vector simple-array
+                array sequence)
+      :prototype-form (make-array 0 :element-type 'character))
      (list
       :translation (or cons (member nil))
       :inherits (sequence))
index 528bd52..6ae5f06 100644 (file)
   (setf (sap-ref-8 sap tail) bits)
   (code-char byte))
 
+#!+sb-unicode
+(let ((latin-9-table (let ((table (make-string 256)))
+                       (do ((i 0 (1+ i)))
+                           ((= i 256))
+                         (setf (aref table i) (code-char i)))
+                       (setf (aref table #xa4) (code-char #x20ac))
+                       (setf (aref table #xa6) (code-char #x0160))
+                       (setf (aref table #xa8) (code-char #x0161))
+                       (setf (aref table #xb4) (code-char #x017d))
+                       (setf (aref table #xb8) (code-char #x017e))
+                       (setf (aref table #xbc) (code-char #x0152))
+                       (setf (aref table #xbd) (code-char #x0153))
+                       (setf (aref table #xbe) (code-char #x0178))
+                       table))
+      (latin-9-reverse-1 (make-array 16
+                                     :element-type '(unsigned-byte 21)
+                                     :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0)))
+      (latin-9-reverse-2 (make-array 16
+                                     :element-type '(unsigned-byte 8)
+                                     :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0))))
+  (define-external-format (:latin-9 :latin9 :iso-8859-15)
+      1
+    (setf (sap-ref-8 sap tail)
+          (if (< bits 256)
+              (if (= bits (char-code (aref latin-9-table bits)))
+                  bits
+                  (error "cannot encode ~A in latin-9" bits))
+              (if (= (aref latin-9-reverse-1 (logand bits 15)) bits)
+                  (aref latin-9-reverse-2 (logand bits 15))
+                  (error "cannot encode ~A in latin-9" bits))))
+    (aref latin-9-table byte)))
+
 (define-external-format/variable-width (:utf-8 :utf8)
   (let ((bits (char-code byte)))
     (cond ((< bits #x80) 1)
index 7f58e99..a981179 100644 (file)
@@ -71,7 +71,8 @@
 ;;; of like READ-SEQUENCE specialized for files of (UNSIGNED-BYTE 8),
 ;;; with an automatic conversion from (UNSIGNED-BYTE 8) into CHARACTER
 ;;; for each element read
-(declaim (ftype (function (stream simple-string &optional index) (values)) read-string-as-bytes))
+(declaim (ftype (function (stream simple-string &optional index) (values))
+                read-string-as-bytes #!+sb-unicode read-string-as-words))
 (defun read-string-as-bytes (stream string &optional (length (length string)))
   (dotimes (i length)
     (setf (aref string i)
   ;; significantly better than the portable version here. If it is, then use
   ;; it as an alternate definition, protected with #-SB-XC-HOST.
   (values))
+#!+sb-unicode
+(defun read-string-as-words (stream string &optional (length (length string)))
+  #+sb-xc-host (bug "READ-STRING-AS-WORDS called")
+  (dotimes (i length)
+    (setf (aref string i)
+         (sb!xc:code-char (logior
+                            (read-byte stream)
+                            (ash (read-byte stream) 8)
+                            (ash (read-byte stream) 16)
+                            (ash (read-byte stream) 24)))))
+  (values))
 \f
 ;;;; miscellaneous fops
 
                              (make-string (* ,n-size 2))))
                      (done-with-fast-read-byte)
                      (let ((,n-buffer *fasl-symbol-buffer*))
+                        #+sb-xc-host
                        (read-string-as-bytes *fasl-input-stream*
                                              ,n-buffer
                                              ,n-size)
+                        #-sb-xc-host
+                       (#!+sb-unicode read-string-as-words
+                         #!-sb-unicode read-string-as-bytes
+                         *fasl-input-stream*
+                         ,n-buffer
+                         ,n-size)
                        (push-fop-table (without-package-locks
                                         (intern* ,n-buffer
                                                  ,n-size
                    (fop-uninterned-small-symbol-save 13)
   (let* ((arg (clone-arg))
         (res (make-string arg)))
+    #!-sb-unicode
     (read-string-as-bytes *fasl-input-stream* res)
+    #!+sb-unicode
+    (read-string-as-words *fasl-input-stream* res)
     (push-fop-table (make-symbol res))))
 
 (define-fop (fop-package 14)
     (read-string-as-bytes *fasl-input-stream* res)
     res))
 
+#!+sb-unicode
+(progn
+  #+sb-xc-host
+  (define-cloned-fops (fop-character-string 161) (fop-small-character-string 162)
+    (bug "CHARACTER-STRING FOP encountered"))
+
+  #-sb-xc-host
+  (define-cloned-fops (fop-character-string 161) (fop-small-character-string 162)
+    (let* ((arg (clone-arg))
+           (res (make-string arg)))
+      (read-string-as-words *fasl-input-stream* res)
+      res)))
+
 (define-cloned-fops (fop-vector 39) (fop-small-vector 40)
   (let* ((size (clone-arg))
         (res (make-array size)))
index 66f69f4..0d6ceeb 100644 (file)
         :datum object
         :expected-type 'base-string))
 
+(deferr object-not-vector-nil-error (object)
+  (error 'type-error
+        :datum object
+        :expected-type '(vector nil)))
+
+#!+sb-unicode
+(deferr object-not-character-string-error (object)
+  (error 'type-error
+        :datum object
+        :expected-type '(vector character)))
+
 (deferr object-not-bit-vector-error (object)
   (error 'type-error
         :datum object
index 5a1e0e8..e6f9931 100644 (file)
               (if (eq (car dims) '*)
                   (case eltype
                     (bit 'bit-vector)
-                    ((base-char character) 'base-string)
+                    ((base-char #!-sb-unicode character) 'base-string)
                     (* 'vector)
                     (t `(vector ,eltype)))
                   (case eltype
                     (bit `(bit-vector ,(car dims)))
-                    ((base-char character) `(base-string ,(car dims)))
+                    ((base-char #!-sb-unicode character)
+                      `(base-string ,(car dims)))
                     (t `(vector ,eltype ,(car dims)))))
               (if (eq (car dims) '*)
                   (case eltype
                     (bit 'simple-bit-vector)
-                    ((base-char character) 'simple-base-string)
+                    ((base-char #!-sb-unicode character) 'simple-base-string)
                     ((t) 'simple-vector)
                     (t `(simple-array ,eltype (*))))
                   (case eltype
                     (bit `(simple-bit-vector ,(car dims)))
-                    ((base-char character) `(simple-base-string ,(car dims)))
+                    ((base-char #!-sb-unicode character)
+                      `(simple-base-string ,(car dims)))
                     ((t) `(simple-vector ,(car dims)))
                     (t `(simple-array ,eltype ,dims))))))
          (t
index 057df86..02a3ca0 100644 (file)
 ;;; [CMUC]<steele>tradix.press. DO NOT EVEN THINK OF ATTEMPTING TO
 ;;; UNDERSTAND THIS CODE WITHOUT READING THE PAPER!
 
-(declaim (type (simple-array character (10)) *digits*))
-(defvar *digits* "0123456789")
-
 (defun flonum-to-string (x &optional width fdigits scale fmin)
   (cond ((zerop x)
         ;; Zero is a special case which FLOAT-STRING cannot handle.
 (defun float-string (fraction exponent precision width fdigits scale fmin)
   (let ((r fraction) (s 1) (m- 1) (m+ 1) (k 0)
        (digits 0) (decpnt 0) (cutoff nil) (roundup nil) u low high
+        (digit-characters "0123456789")
        (digit-string (make-array 50
                                  :element-type 'base-char
                                  :fill-pointer 0
       ;; Stop when either precision is exhausted or we have printed as
       ;; many fraction digits as permitted.
       (when (or low high (and cutoff (<= k cutoff))) (return))
-      (vector-push-extend (char *digits* u) digit-string)
+      (vector-push-extend (char digit-characters u) digit-string)
       (incf digits))
     ;; If cutoff occurred before first digit, then no digits are
     ;; generated at all.
     (when (or (not cutoff) (>= k cutoff))
       ;; Last digit may need rounding
-      (vector-push-extend (char *digits*
+      (vector-push-extend (char digit-characters
                                (cond ((and low (not high)) u)
                                      ((and high (not low)) (1+ u))
                                      (t (if (<= (ash r 1) s) u (1+ u)))))
   (let ((print-base 10) ; B
        (float-radix 2) ; b
        (float-digits (float-digits v)) ; p
+        (digit-characters "0123456789")
        (min-e
         (etypecase v
           (single-float single-float-min-e)
                                      (and high-ok (= (+ r m+) s))))
                        (when (or tc1 tc2)
                          (go end))
-                       (vector-push-extend (char *digits* d) result)
+                       (vector-push-extend (char digit-characters d) result)
                        (go loop)
                      end
                        (let ((d (cond
                                   ((and tc1 (not tc2)) d)
                                   (t ; (and tc1 tc2)
                                    (if (< (* r 2) s) d (1+ d))))))
-                         (vector-push-extend (char *digits* d) result)
+                         (vector-push-extend (char digit-characters d) result)
                          (return-from generate result))))))
          (if (>= e 0)
              (if (/= f (expt float-radix (1- float-digits)))
index cf18258..c7696b2 100644 (file)
@@ -72,7 +72,8 @@
                            :kind :fixed
                            :length size))))))
 
-(dolist (code (list complex-base-string-widetag simple-array-widetag
+(dolist (code (list #!+sb-unicode complex-character-string-widetag
+                    complex-base-string-widetag simple-array-widetag
                    complex-bit-vector-widetag complex-vector-widetag
                    complex-array-widetag complex-vector-nil-widetag))
   (setf (svref *meta-room-info* code)
                      :kind :string
                      :length 0))
 
+#!+sb-unicode
+(setf (svref *meta-room-info* simple-character-string-widetag)
+      (make-room-info :name 'simple-character-string
+                     :kind :string
+                     :length 2))
+
 (setf (svref *meta-room-info* simple-array-nil-widetag)
       (make-room-info :name 'simple-array-nil
                      :kind :fixed
             #.single-float-widetag
             #.double-float-widetag
             #.simple-base-string-widetag
+             #!+sb-unicode #.simple-character-string-widetag
             #.simple-array-nil-widetag
             #.simple-bit-vector-widetag
             #.simple-array-unsigned-byte-2-widetag
index 50eec6b..b05450e 100644 (file)
   (when (null source-end) (setq source-end (length source-sequence)))
   (mumble-replace-from-mumble))
 
+#!+sb-unicode
+(defun simple-character-string-replace-from-simple-character-string*
+    (target-sequence source-sequence
+     target-start target-end source-start source-end)
+  (declare (type (simple-array character (*)) target-sequence source-sequence))
+  (when (null target-end) (setq target-end (length target-sequence)))
+  (when (null source-end) (setq source-end (length source-sequence)))
+  (mumble-replace-from-mumble))
+
 (define-sequence-traverser replace
     (sequence1 sequence2 &key start1 end1 start2 end2)
   #!+sb-doc
index b33dfb9..6fc1ac8 100644 (file)
 ;;; and hence must be an N-BIN method.
 (defun fast-read-char-refill (stream eof-error-p eof-value)
   (let* ((ibuf (ansi-stream-cin-buffer stream))
-        (count (funcall (ansi-stream-n-bin stream)
-                        stream
-                        ibuf
-                        +ansi-stream-in-buffer-extra+
-                        (- +ansi-stream-in-buffer-length+
-                           +ansi-stream-in-buffer-extra+)
-                        nil))
-        (start (- +ansi-stream-in-buffer-length+ count)))
+         (count (funcall (ansi-stream-n-bin stream)
+                         stream
+                         ibuf
+                         +ansi-stream-in-buffer-extra+
+                         (- +ansi-stream-in-buffer-length+
+                            +ansi-stream-in-buffer-extra+)
+                         nil))
+         (start (- +ansi-stream-in-buffer-length+ count))
+         (n-character-array-bytes
+          #.(/ (sb!vm:saetp-n-bits
+                (find 'character
+                      sb!vm:*specialized-array-element-type-properties*
+                      :key #'sb!vm:saetp-specifier))
+               sb!vm:n-byte-bits)))
     (declare (type index start count))
     (cond ((zerop count)
-          (setf (ansi-stream-in-index stream) +ansi-stream-in-buffer-length+)
-          (funcall (ansi-stream-in stream) stream eof-error-p eof-value))
-         (t
-          (when (/= start +ansi-stream-in-buffer-extra+)
-            (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+
-                                      sb!vm:n-byte-bits)
-                                   (* sb!vm:vector-data-offset
-                                      sb!vm:n-word-bits))
-                           ibuf (+ (the index (* start sb!vm:n-byte-bits))
-                                   (* sb!vm:vector-data-offset
-                                      sb!vm:n-word-bits))
-                           (* count sb!vm:n-byte-bits)))
-          (setf (ansi-stream-in-index stream) (1+ start))
-          (aref ibuf start)))))
+           (setf (ansi-stream-in-index stream)
+                 +ansi-stream-in-buffer-length+)
+           (funcall (ansi-stream-in stream) stream eof-error-p eof-value))
+          (t
+           (when (/= start +ansi-stream-in-buffer-extra+)
+             (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+
+                                       sb!vm:n-byte-bits
+                                       n-character-array-bytes)
+                                    (* sb!vm:vector-data-offset
+                                       sb!vm:n-word-bits))
+                            ibuf (+ (the index (* start
+                                                  sb!vm:n-byte-bits
+                                                  n-character-array-bytes))
+                                    (* sb!vm:vector-data-offset
+                                       sb!vm:n-word-bits))
+                            (* count
+                               sb!vm:n-byte-bits
+                               n-character-array-bytes)))
+           (setf (ansi-stream-in-index stream) (1+ start))
+           (aref ibuf start)))))
 
 ;;; This is similar to FAST-READ-CHAR-REFILL, but we don't have to
 ;;; leave room for unreading.
index e8e2a9b..fa0674c 100644 (file)
@@ -12,7 +12,6 @@
 (in-package "SB!VM")
 \f
 ;;;; allocator for the array header
-
 (define-vop (make-array-header)
   (:policy :fast-safe)
   (:translate make-array-header)
@@ -36,8 +35,6 @@
       (inst bis alloc-tn other-pointer-lowtag result)
       (storew header result 0 other-pointer-lowtag)
       (inst addq alloc-tn bytes alloc-tn))))
-
-
 \f
 ;;;; additional accessors and setters for the array header
 (define-full-reffer %array-dimension *
@@ -61,7 +58,6 @@
     (inst sll temp n-fixnum-tag-bits res)))
 \f
 ;;;; bounds checking routine
-
 (define-vop (check-bound)
   (:translate %check-bound)
   (:policy :fast-safe)
   
   (def-partial-data-vector-frobs simple-base-string character :byte nil
     character-reg)
+  #!+sb-unicode ; FIXME: what about when a word is 64 bits?
+  (def-full-data-vector-frobs simple-character-string character character-reg)
   
   (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum
     :byte nil unsigned-reg signed-reg)
index a66ac81..ede2e77 100644 (file)
        (t
         (unless *cold-load-dump*
           (dump-fop 'fop-normal-load file))
+         #+sb-xc-host
         (dump-simple-base-string
           (coerce (package-name pkg) 'simple-base-string)
           file)
+         #-sb-xc-host
+        (#!+sb-unicode dump-simple-character-string
+          #!-sb-unicode dump-simple-base-string
+         (coerce (package-name pkg) '(simple-array character (*)))
+         file)
         (dump-fop 'fop-package file)
         (unless *cold-load-dump*
           (dump-fop 'fop-maybe-cold-load file))
                                        (*)))
                            x)))
     (typecase simple-version
+      #+sb-xc-host
+      (simple-string
+       (unless (string-check-table x file)
+         (dump-simple-base-string simple-version file)
+         (string-save-object x file)))
+      #-sb-xc-host
       (simple-base-string
-       (unless (equal-check-table x file)
+       (unless (string-check-table x file)
         (dump-simple-base-string simple-version file)
-        (equal-save-object x file)))
+        (string-save-object x file)))
+      #-sb-xc-host
+      ((simple-array character (*))
+       #!+sb-unicode
+       (unless (string-check-table x file)
+        (dump-simple-character-string simple-version file)
+        (string-save-object x file))
+       #!-sb-unicode
+       (bug "how did we get here?"))
       (simple-vector
        (dump-simple-vector simple-version file)
        (eq-save-object x file))
                      file)
           (dump-word pname-length file)))
 
-    (dump-base-chars-of-string pname file)
+    #+sb-xc-host (dump-base-chars-of-string pname file)
+    #-sb-xc-host (#!+sb-unicode dump-characters-of-string
+                  #!-sb-unicode dump-base-chars-of-string
+                  pname file)
 
     (unless *cold-load-dump*
       (setf (gethash s (fasl-output-eq-table file))
index 831cea2..e182880 100644 (file)
   simple-array-unsigned-byte-16     ; 10011110
   simple-array-nil                  ; 10100010
   simple-base-string                ; 10100110
+  #!+sb-unicode simple-character-string
   simple-bit-vector                 ; 10101010
   simple-vector                     ; 10101110
   #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
   simple-array                      ; 11011110
   complex-vector-nil                ; 11100010
   complex-base-string               ; 11100110
+  #!+sb-unicode complex-character-string
   complex-bit-vector                ; 11101010
   complex-vector                    ; 11101110
   complex-array                     ; 11110010
 
   #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
   unused12                          ; 11110110
-  #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
+  #!+(and #.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
+          (not sb-unicode))
   unused13                          ; 11111010
-  #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
+  #!+(and #.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or))
+          (not sb-unicode))
   unused14                          ; 11111110
 )
 
index 674db25..b1dee69 100644 (file)
@@ -2106,6 +2106,11 @@ core and return a descriptor to it."
     (read-string-as-bytes *fasl-input-stream* string)
     (base-string-to-core string)))
 
+#!+sb-unicode
+(clone-cold-fop (fop-character-string)
+               (fop-small-character-string)
+  (bug "CHARACTER-STRING dumped by cross-compiler."))
+
 (clone-cold-fop (fop-vector)
                (fop-small-vector)
   (let* ((size (clone-arg))
index a2edee8..8fde951 100644 (file)
    "Object is not of type STRING.")
   (object-not-base-string
    "Object is not of type BASE-STRING.")
+  (object-not-vector-nil
+   "Object is not of type (VECTOR NIL).")
+  #!+sb-unicode
+  (object-not-character-string
+   "Object is not of type (VECTOR CHARACTER).")
   (object-not-bit-vector
    "Object is not of type BIT-VECTOR.")
   (object-not-array
index b782be5..c9a9602 100644 (file)
@@ -66,7 +66,8 @@
 
 (!define-type-vops simple-string-p check-simple-string nil
     object-not-simple-string-error
-  (simple-base-string-widetag simple-array-nil-widetag))
+  (#!+sb-unicode simple-character-string-widetag
+   simple-base-string-widetag simple-array-nil-widetag))
 
 (macrolet
     ((define-simple-array-type-vops ()
   (funcallable-instance-header-widetag))
 
 (!define-type-vops array-header-p nil nil nil
-  (simple-array-widetag complex-base-string-widetag complex-bit-vector-widetag
+  (simple-array-widetag
+   #!+sb-unicode complex-character-string-widetag
+   complex-base-string-widetag complex-bit-vector-widetag
    complex-vector-widetag complex-array-widetag complex-vector-nil-widetag))
 
 (!define-type-vops stringp check-string nil object-not-string-error
-  (simple-base-string-widetag complex-base-string-widetag
+  (#!+sb-unicode simple-character-string-widetag
+   #!+sb-unicode complex-character-string-widetag
+   simple-base-string-widetag complex-base-string-widetag
    simple-array-nil-widetag complex-vector-nil-widetag))
 
 (!define-type-vops base-string-p check-base-string nil object-not-base-string-error
     object-not-vector-nil-error
   (simple-array-nil-widetag complex-vector-nil-widetag))
 
+#!+sb-unicode
+(!define-type-vops character-string-p check-character-string nil
+    object-not-character-string-error
+  (simple-character-string-widetag complex-character-string-widetag))
+
 (!define-type-vops vectorp check-vector nil object-not-vector-error
   (complex-vector-widetag .
    #.(append
index 503f1bb..f81b183 100644 (file)
@@ -64,6 +64,7 @@
         (nil #:mu 0 simple-array-nil
              :complex-typecode #.sb!vm:complex-vector-nil-widetag
              :importance 0)
+         #!-sb-unicode
         (character ,(code-char 0) 8 simple-base-string
                    ;; (SIMPLE-BASE-STRINGs are stored with an extra
                    ;; trailing #\NULL for convenience in calling out
                    :n-pad-elements 1
                    :complex-typecode #.sb!vm:complex-base-string-widetag
                    :importance 17)
+         #!+sb-unicode
+        (base-char ,(code-char 0) 8 simple-base-string
+                   ;; (SIMPLE-BASE-STRINGs are stored with an extra
+                   ;; trailing #\NULL for convenience in calling out
+                   ;; to C.)
+                   :n-pad-elements 1
+                   :complex-typecode #.sb!vm:complex-base-string-widetag
+                   :importance 17)
+         #!+sb-unicode
+        (character ,(code-char 0) 32 simple-character-string
+                   :n-pad-elements 1
+                   :complex-typecode #.sb!vm:complex-character-string-widetag
+                   :importance 17)
         (single-float 0.0f0 32 simple-array-single-float
          :importance 6)
         (double-float 0.0d0 64 simple-array-double-float
index de26bcb..e0a4899 100644 (file)
@@ -22,6 +22,8 @@
           complex-vector-p
           base-char-p %standard-char-p %instancep
           base-string-p simple-base-string-p
+           #!+sb-unicode character-string-p
+           #!+sb-unicode simple-character-string-p
           array-header-p
           simple-array-p simple-array-nil-p vector-nil-p
           simple-array-unsigned-byte-2-p
index 0a97ccf..e47aec5 100644 (file)
@@ -49,6 +49,8 @@
        (give-up-ir1-transform)
        `(etypecase string
          ((simple-array character (*)) (data-vector-ref string index))
+          #!+sb-unicode
+         ((simple-array base-char (*)) (data-vector-ref string index))
          ((simple-array nil (*)) (data-vector-ref string index))))))
 
 (deftransform hairy-data-vector-ref ((array index) (array t) *)
        `(etypecase string
          ((simple-array character (*))
           (data-vector-set string index new-value))
+          #!+sb-unicode
+         ((simple-array base-char (*))
+          (data-vector-set string index new-value))
          ((simple-array nil (*))
           (data-vector-set string index new-value))))))
 
index c1328e1..2595ed0 100644 (file)
@@ -19,6 +19,7 @@
 ;;; They shouldn't be used explicitly.
 (define-type-predicate base-string-p base-string)
 (define-type-predicate bignump bignum)
+#!+sb-unicode (define-type-predicate character-string-p (vector character))
 (define-type-predicate complex-double-float-p (complex double-float))
 (define-type-predicate complex-single-float-p (complex single-float))
 #!+long-float
@@ -92,6 +93,8 @@
 (define-type-predicate simple-array-complex-long-float-p
                       (simple-array (complex long-float) (*)))
 (define-type-predicate simple-base-string-p simple-base-string)
+#!+sb-unicode (define-type-predicate simple-character-string-p
+                  (simple-array character (*)))
 (define-type-predicate system-area-pointer-p system-area-pointer)
 (define-type-predicate unsigned-byte-32-p (unsigned-byte 32))
 (define-type-predicate signed-byte-32-p (signed-byte 32))
index 2c675f9..0aa28cb 100644 (file)
 ;;;; files for more information.
 
 (in-package "SB!VM")
-
 \f
 ;;;; Allocator for the array header.
-
 (define-vop (make-array-header)
   (:translate make-array-header)
   (:policy :fast-safe)
     (loadw res x 0 other-pointer-lowtag)
     (inst srl res n-widetag-bits res)
     (inst addi (- (1- array-dimensions-offset)) res res)))
-
-
 \f
 ;;;; Bounds checking routine.
-
-
 (define-vop (check-bound)
   (:translate %check-bound)
   (:policy :fast-safe)
@@ -84,7 +78,6 @@
 ;;; Variants built on top of word-index-ref, etc.  I.e. those vectors whos
 ;;; elements are represented in integer registers and are built out of
 ;;; 8, 16, or 32 bit elements.
-
 (macrolet ((def-full-data-vector-frobs (type element-type &rest scs)
   `(progn
      (define-full-reffer ,(symbolicate "DATA-VECTOR-REF/" type) ,type
        ,element-type data-vector-set))))
 
   (def-full-data-vector-frobs simple-vector * descriptor-reg any-reg)
-  
+
   (def-partial-data-vector-frobs simple-base-string character :byte nil character-reg)
-  
+  #!+sb-unicode
+  (def-full-data-vector-frobs simple-character-string character character-reg)
+
   (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum
     :byte nil unsigned-reg signed-reg)
   (def-partial-data-vector-frobs simple-array-unsigned-byte-8 positive-fixnum
 
 ;;; Integer vectors whos elements are smaller than a byte.  I.e. bit, 2-bit,
 ;;; and 4-bit vectors.
-;;; 
-
 (macrolet ((def-small-data-vector-frobs (type bits)
   (let* ((elements-per-word (floor n-word-bits bits))
         (bit-shift (1- (integer-length elements-per-word))))
   (def-small-data-vector-frobs simple-array-unsigned-byte-4 4))
 
 ;;; And the float variants.
-;;; 
-
 (define-vop (data-vector-ref/simple-array-single-float)
   (:note "inline array access")
   (:translate data-vector-ref)
 
 \f
 ;;; Complex float arrays.
-
 (define-vop (data-vector-ref/simple-array-complex-single-float)
   (:note "inline array access")
   (:translate data-vector-ref)
 \f
 ;;; These VOPs are used for implementing float slots in structures (whose raw
 ;;; data is an unsigned-32 vector.
-;;;
 (define-vop (raw-ref-single data-vector-ref/simple-array-single-float)
   (:translate %raw-ref-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-single data-vector-set/simple-array-single-float)
   (:translate %raw-set-single)
   (:arg-types sb!c::raw-vector positive-fixnum single-float))
-;;;
 (define-vop (raw-ref-double data-vector-ref/simple-array-double-float)
   (:translate %raw-ref-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-double data-vector-set/simple-array-double-float)
   (:translate %raw-set-double)
   (:arg-types sb!c::raw-vector positive-fixnum double-float))
-
 (define-vop (raw-ref-complex-single
             data-vector-ref/simple-array-complex-single-float)
   (:translate %raw-ref-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-single
             data-vector-set/simple-array-complex-single-float)
   (:translate %raw-set-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum complex-single-float))
-;;;
 (define-vop (raw-ref-complex-double
             data-vector-ref/simple-array-complex-double-float)
   (:translate %raw-ref-complex-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-double
             data-vector-set/simple-array-complex-double-float)
   (:translate %raw-set-complex-double)
 
 ;;; These vops are useful for accessing the bits of a vector irrespective of
 ;;; what type of vector it is.
-;;; 
-
 (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num
   %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
-
-
 \f
 ;;;; Misc. Array VOPs.
-
 (define-vop (get-vector-subtype get-header-data))
 (define-vop (set-vector-subtype set-header-data))
-
index 2265cf4..7a0e774 100644 (file)
 ;;;; files for more information.
 
 (in-package "SB!VM")
-
 \f
 ;;;; Allocator for the array header.
-
 (define-vop (make-array-header)
   (:policy :fast-safe)
   (:translate make-array-header)
@@ -37,7 +35,6 @@
       (inst or result alloc-tn other-pointer-lowtag)
       (storew header result 0 other-pointer-lowtag)
       (inst addu alloc-tn bytes))))
-
 \f
 ;;;; Additional accessors and setters for the array header.
 (define-full-reffer %array-dimension *
     (inst sra temp n-widetag-bits)
     (inst subu temp (1- array-dimensions-offset))
     (inst sll res temp 2)))
-
-
 \f
 ;;;; Bounds checking routine.
-
-
 (define-vop (check-bound)
   (:translate %check-bound)
   (:policy :fast-safe)
       (inst beq temp zero-tn error)
       (inst nop)
       (move result index))))
-
-
 \f
 ;;;; Accessors/Setters
 
 ;;; Variants built on top of word-index-ref, etc.  I.e. those vectors whos
 ;;; elements are represented in integer registers and are built out of
 ;;; 8, 16, or 32 bit elements.
-
 (macrolet ((def-full-data-vector-frobs (type element-type &rest scs)
   `(progn
      (define-full-reffer ,(symbolicate "DATA-VECTOR-REF/" type) ,type
   
   (def-partial-data-vector-frobs simple-base-string base-char 
     :byte nil base-char-reg)
+  #!+sb-unicode
+  (def-full-data-vector-frobs simple-character-string character character-reg)
   
   (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum
     :byte nil unsigned-reg signed-reg)
   (def-full-data-vector-frobs simple-array-signed-byte-32 signed-num
     signed-reg))
 
-
-
 ;;; Integer vectors whos elements are smaller than a byte.  I.e. bit, 2-bit,
 ;;; and 4-bit vectors.
-;;; 
-
 (macrolet ((def-small-data-vector-frobs (type bits)
   (let* ((elements-per-word (floor n-word-bits bits))
         (bit-shift (1- (integer-length elements-per-word))))
   (def-small-data-vector-frobs simple-array-unsigned-byte-2 2)
   (def-small-data-vector-frobs simple-array-unsigned-byte-4 4))
 
-
 ;;; And the float variants.
-;;; 
-
 (define-vop (data-vector-ref/simple-array-single-float)
   (:note "inline array access")
   (:translate data-vector-ref)
                n-word-bytes))))
     (unless (location= result value)
       (inst fmove :double result value))))
-
 \f
 ;;; Complex float arrays.
-
 (define-vop (data-vector-ref/simple-array-complex-single-float)
   (:note "inline array access")
   (:translate data-vector-ref)
                                other-pointer-lowtag)))
     (inst nop)))
 
-
 (define-vop (data-vector-set/simple-array-complex-single-float)
   (:note "inline array store")
   (:translate data-vector-set)
 \f
 ;;; These VOPs are used for implementing float slots in structures (whose raw
 ;;; data is an unsigned-32 vector.
-;;;
 (define-vop (raw-ref-single data-vector-ref/simple-array-single-float)
   (:translate %raw-ref-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-single data-vector-set/simple-array-single-float)
   (:translate %raw-set-single)
   (:arg-types sb!c::raw-vector positive-fixnum single-float))
-;;;
 (define-vop (raw-ref-double data-vector-ref/simple-array-double-float)
   (:translate %raw-ref-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-double data-vector-set/simple-array-double-float)
   (:translate %raw-set-double)
   (:arg-types sb!c::raw-vector positive-fixnum double-float))
-
 (define-vop (raw-ref-complex-single
             data-vector-ref/simple-array-complex-single-float)
   (:translate %raw-ref-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-single
             data-vector-set/simple-array-complex-single-float)
   (:translate %raw-set-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum complex-single-float))
-;;;
 (define-vop (raw-ref-complex-double
             data-vector-ref/simple-array-complex-double-float)
   (:translate %raw-ref-complex-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-double
             data-vector-set/simple-array-complex-double-float)
   (:translate %raw-set-complex-double)
 
 ;;; These vops are useful for accessing the bits of a vector irrespective of
 ;;; what type of vector it is.
-;;; 
-
 (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num
   %raw-bits)
 (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg)
   unsigned-num %set-raw-bits)
-
-
 \f
 ;;;; Misc. Array VOPs.
-
 (define-vop (get-vector-subtype get-header-data))
 (define-vop (set-vector-subtype set-header-data))
-
index b546630..6eaef08 100644 (file)
        (:result-types ,element-type)))))
   (def-data-vector-frobs simple-base-string byte-index
     character character-reg)
+  #!+sb-unicode
+  (def-data-vector-frobs simple-character-string word-index
+    character character-reg)
   (def-data-vector-frobs simple-vector word-index
     * descriptor-reg any-reg)
-
   (def-data-vector-frobs simple-array-unsigned-byte-7 byte-index
     positive-fixnum unsigned-reg)
   (def-data-vector-frobs simple-array-unsigned-byte-8 byte-index
index b1fb39d..58585aa 100644 (file)
                            sb!vm:n-byte-bits)))
      string1))
 
+;;; KLUDGE: This isn't the nicest way of achieving efficient string
+;;; streams, but it does work; a more general framework for this kind
+;;; of optimization, as well as better handling of the possible
+;;; keyword arguments, would be nice.
+#!+sb-unicode
+(deftransform replace ((string1 string2 &key (start1 0) (start2 0)
+                               end1 end2)
+                      ((simple-array character (*))
+                        (simple-array character (*))
+                        &rest t)
+                      *
+                      ;; FIXME: consider replacing this policy test
+                      ;; with some tests for the STARTx and ENDx
+                      ;; indices being valid, conditional on high
+                      ;; SAFETY code.
+                      ;;
+                      ;; FIXME: It turns out that this transform is
+                      ;; critical for the performance of string
+                      ;; streams.  Make this more explicit.
+                      :policy (< (max safety space) 3))
+  `(sb!impl::simple-character-string-replace-from-simple-character-string*
+    string1 string2 start1 end1 start2 end2))
+
 ;;; FIXME: this would be a valid transform for certain excluded cases:
 ;;;   * :TEST 'CHAR= or :TEST #'CHAR=
 ;;;   * :TEST 'EQL   or :TEST #'EQL
 ;;;   * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity)
-;;;
-;;; also, it should be noted that there's nothing much in this
-;;; transform (as opposed to the ones for REPLACE and CONCATENATE)
-;;; that particularly limits it to SIMPLE-BASE-STRINGs.
 (deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2)
-                     (simple-base-string simple-base-string &rest t)
+                     (simple-string simple-string &rest t)
                      *
                      :policy (> speed (max space safety)))
   `(block search
 ;;; at least once DYNAMIC-EXTENT works.
 ;;;
 ;;; FIXME: currently KLUDGEed because of bug 188
+;;;
+;;; FIXME: disabled for sb-unicode: probably want it back
+#!-sb-unicode
 (deftransform concatenate ((rtype &rest sequences)
                           (t &rest (or simple-base-string
                                        (simple-array nil (*))))
index 9fdb1ff..0d56f1a 100644 (file)
@@ -12,7 +12,6 @@
 (in-package "SB!VM")
 \f
 ;;;; allocator for the array header.
-
 (define-vop (make-array-header)
   (:translate make-array-header)
   (:policy :fast-safe)
@@ -36,7 +35,6 @@
       (inst srl ndescr ndescr n-fixnum-tag-bits)
       (storew ndescr header 0 other-pointer-lowtag))
     (move result header)))
-
 \f
 ;;;; Additional accessors and setters for the array header.
 (define-vop (%array-dimension word-index-ref)
@@ -84,7 +82,6 @@
 ;;; Variants built on top of word-index-ref, etc.  I.e. those vectors whos
 ;;; elements are represented in integer registers and are built out of
 ;;; 8, 16, or 32 bit elements.
-
 (macrolet ((def-data-vector-frobs (type variant element-type &rest scs)
   `(progn
      (define-vop (,(symbolicate "DATA-VECTOR-REF/" (string type))
 
   (def-data-vector-frobs simple-base-string byte-index
     character character-reg)
+  #!+sb-unicode
+  (def-data-vector-frobs simple-character-string word-index
+    character character-reg)
   (def-data-vector-frobs simple-vector word-index
     * descriptor-reg any-reg)
 
 \f
 ;;; These VOPs are used for implementing float slots in structures (whose raw
 ;;; data is an unsigned-32 vector.
-;;;
 (define-vop (raw-ref-single data-vector-ref/simple-array-single-float)
   (:translate %raw-ref-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-single data-vector-set/simple-array-single-float)
   (:translate %raw-set-single)
   (:arg-types sb!c::raw-vector positive-fixnum single-float))
-;;;
 (define-vop (raw-ref-double data-vector-ref/simple-array-double-float)
   (:translate %raw-ref-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-double data-vector-set/simple-array-double-float)
   (:translate %raw-set-double)
   (:arg-types sb!c::raw-vector positive-fixnum double-float))
-;;;
 #!+long-float
 (define-vop (raw-ref-long data-vector-ref/simple-array-long-float)
   (:translate %raw-ref-long)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 #!+long-float
 (define-vop (raw-set-double data-vector-set/simple-array-long-float)
   (:translate %raw-set-long)
   (:arg-types sb!c::raw-vector positive-fixnum long-float))
-
 (define-vop (raw-ref-complex-single
             data-vector-ref/simple-array-complex-single-float)
   (:translate %raw-ref-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-single
             data-vector-set/simple-array-complex-single-float)
   (:translate %raw-set-complex-single)
   (:arg-types sb!c::raw-vector positive-fixnum complex-single-float))
-;;;
 (define-vop (raw-ref-complex-double
             data-vector-ref/simple-array-complex-double-float)
   (:translate %raw-ref-complex-double)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 (define-vop (raw-set-complex-double
             data-vector-set/simple-array-complex-double-float)
   (:translate %raw-set-complex-double)
   (:arg-types sb!c::raw-vector positive-fixnum complex-double-float))
-;;;
 #!+long-float
 (define-vop (raw-ref-complex-long
             data-vector-ref/simple-array-complex-long-float)
   (:translate %raw-ref-complex-long)
   (:arg-types sb!c::raw-vector positive-fixnum))
-;;;
 #!+long-float
 (define-vop (raw-set-complex-long
             data-vector-set/simple-array-complex-long-float)
   (:translate %raw-set-complex-long)
   (:arg-types sb!c::raw-vector positive-fixnum complex-long-float))
 
-
 ;;; These vops are useful for accessing the bits of a vector irrespective of
 ;;; what type of vector it is.
-;;; 
-
 (define-vop (raw-bits word-index-ref)
   (:note "raw-bits VOP")
   (:translate %raw-bits)
index 01eba24..26c7380 100644 (file)
 
 (in-package "SB!FASL")
 
+;;; a helper function shared by DUMP-SIMPLE-CHARACTER-STRING and
+;;; DUMP-SYMBOL (in the target compiler: the cross-compiler uses the
+;;; portability knowledge and always dumps BASE-STRINGS).
+#!+sb-unicode
+(defun dump-characters-of-string (s fasl-output)
+  (declare (type string s) (type fasl-output fasl-output))
+  (dovector (c s)
+    (dump-word (char-code c) fasl-output))
+  (values))
+#!+sb-unicode
+(defun dump-simple-character-string (s file)
+  (declare (type (simple-array character (*)) s))
+  (dump-fop* (length s) fop-small-character-string fop-character-string file)
+  (dump-characters-of-string s file)
+  (values))
+
 ;;; Dump the first N bytes of VEC out to FILE. VEC is some sort of unboxed
 ;;; vector-like thing that we can BLT from.
 (defun dump-raw-bytes (vec n fasl-output)
index c1b1908..89d1568 100644 (file)
 
 (define-source-transform atom (x)
   `(not (consp ,x)))
+#!+sb-unicode
+(define-source-transform base-char-p (x)
+  `(typep ,x 'base-char))
 \f
 ;;;; TYPEP source transform
 
index 5a56465..642a535 100644 (file)
 
 ;;; simple-string
 
+#!+sb-unicode
+(progn
+(define-vop (data-vector-ref/simple-base-string)
+  (:translate data-vector-ref)
+  (:policy :fast-safe)
+  (:args (object :scs (descriptor-reg))
+        (index :scs (unsigned-reg)))
+  (:arg-types simple-base-string positive-fixnum)
+  (:results (value :scs (character-reg)))
+  (:result-types character)
+  (:generator 5
+    (inst movzx value
+         (make-ea :byte :base object :index index :scale 1
+                  :disp (- (* vector-data-offset n-word-bytes)
+                           other-pointer-lowtag)))))
+
+(define-vop (data-vector-ref-c/simple-base-string)
+  (:translate data-vector-ref)
+  (:policy :fast-safe)
+  (:args (object :scs (descriptor-reg)))
+  (:info index)
+  (:arg-types simple-base-string (:constant (signed-byte 30)))
+  (:results (value :scs (character-reg)))
+  (:result-types character)
+  (:generator 4
+    (inst movzx value
+         (make-ea :byte :base object
+                  :disp (- (+ (* vector-data-offset n-word-bytes) index)
+                           other-pointer-lowtag)))))
+
+(define-vop (data-vector-set/simple-base-string)
+  (:translate data-vector-set)
+  (:policy :fast-safe)
+  (:args (object :scs (descriptor-reg) :to (:eval 0))
+        (index :scs (unsigned-reg) :to (:eval 0))
+        (value :scs (character-reg) :target eax))
+  (:arg-types simple-base-string positive-fixnum character)
+  (:temporary (:sc character-reg :offset eax-offset :target result
+                   :from (:argument 2) :to (:result 0))
+              eax)
+  (:results (result :scs (character-reg)))
+  (:result-types character)
+  (:generator 5
+    (move eax value)
+    (inst mov (make-ea :byte :base object :index index :scale 1
+                      :disp (- (* vector-data-offset n-word-bytes)
+                               other-pointer-lowtag))
+         al-tn)
+    (move result eax)))
+
+(define-vop (data-vector-set-c/simple-base-string)
+  (:translate data-vector-set)
+  (:policy :fast-safe)
+  (:args (object :scs (descriptor-reg) :to (:eval 0))
+        (value :scs (character-reg)))
+  (:info index)
+  (:arg-types simple-base-string (:constant (signed-byte 30)) character)
+  (:temporary (:sc unsigned-reg :offset eax-offset :target result
+                   :from (:argument 1) :to (:result 0))
+              eax)
+  (:results (result :scs (character-reg)))
+  (:result-types character)
+  (:generator 4
+    (move eax value)
+    (inst mov (make-ea :byte :base object
+                       :disp (- (+ (* vector-data-offset n-word-bytes) index)
+                                other-pointer-lowtag))
+          al-tn)
+    (move result eax)))
+) ; PROGN
+
+#!-sb-unicode
+(progn
 (define-vop (data-vector-ref/simple-base-string)
   (:translate data-vector-ref)
   (:policy :fast-safe)
          value)
     (move result value)))
 
-(define-vop (data-vector-set/simple-base-string-c)
+(define-vop (data-vector-set-c/simple-base-string)
   (:translate data-vector-set)
   (:policy :fast-safe)
   (:args (object :scs (descriptor-reg) :to (:eval 0))
                               other-pointer-lowtag))
         value)
    (move result value)))
+) ; PROGN
+
+#!+sb-unicode
+(define-full-reffer data-vector-ref/simple-character-string
+    simple-character-string vector-data-offset other-pointer-lowtag
+    (character-reg) character data-vector-ref)
+#!+sb-unicode
+(define-full-setter data-vector-ref/simple-character-string
+    simple-character-string vector-data-offset other-pointer-lowtag
+    (character-reg) character data-vector-set)
 
 ;;; signed-byte-8
 
index 9b8d2c0..c0ee144 100644 (file)
 ;;;; moves and coercions
 
 ;;; Move a tagged char to an untagged representation.
+#!+sb-unicode
+(define-vop (move-to-character)
+  (:args (x :scs (any-reg descriptor-reg) :target y
+            :load-if (not (location= x y))))
+  (:results (y :scs (character-reg)
+               :load-if (not (location= x y))))
+  (:note "character untagging")
+  (:generator 1
+    (move y x)
+    (inst shr y n-widetag-bits)))
+#!-sb-unicode
 (define-vop (move-to-character)
   (:args (x :scs (any-reg control-stack) :target al))
   (:temporary (:sc byte-reg :offset al-offset
     (move eax-tn x)
     (move y ah)))
 (define-move-vop move-to-character :move
-  (any-reg control-stack) (character-reg character-stack))
+  (any-reg #!-sb-unicode control-stack)
+  (character-reg #!-sb-unicode character-stack))
 
 ;;; Move an untagged char to a tagged representation.
+#!+sb-unicode
+(define-vop (move-from-character)
+  (:args (x :scs (character-reg)))
+  (:results (y :scs (any-reg descriptor-reg)))
+  (:note "character tagging")
+  (:generator 1
+    ;; FIXME: is this inefficient?  Is there a better way of writing
+    ;; it?  (fixnum tagging is done with LEA).  We can't use SHL
+    ;; because we either scribble over the source register or briefly
+    ;; have a non-descriptor in a descriptor register, unless we
+    ;; introduce a temporary.
+    (inst imul y x (ash 1 n-widetag-bits))
+    (inst or y character-widetag)))
+#!-sb-unicode
 (define-vop (move-from-character)
   (:args (x :scs (character-reg character-stack) :target ah))
   (:temporary (:sc byte-reg :offset al-offset :target y
@@ -44,7 +70,8 @@
     (inst and eax-tn #xffff)           ; Remove any junk bits.
     (move y eax-tn)))
 (define-move-vop move-from-character :move
-  (character-reg character-stack) (any-reg descriptor-reg control-stack))
+  (character-reg #!-sb-unicode character-stack)
+  (any-reg descriptor-reg #!-sb-unicode control-stack))
 
 ;;; Move untagged character values.
 (define-vop (character-move)
       (character-reg
        (move y x))
       (character-stack
+       #!-sb-unicode
        (inst mov
             (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4)))
-            x)))))
+            x)
+       #!+sb-unicode
+       (if (= (tn-offset fp) esp-offset)
+          (storew x fp (tn-offset y))  ; c-call
+          (storew x fp (- (1+ (tn-offset y)))))))))
 (define-move-vop move-character-arg :move-arg
   (any-reg character-reg) (character-reg))
 
   (:results (res :scs (unsigned-reg)))
   (:result-types positive-fixnum)
   (:generator 1
-    (inst movzx res ch)))
+    #!-sb-unicode
+    (inst movzx res ch)
+    #!+sb-unicode
+    (inst mov res ch)))
 
+#!+sb-unicode
+(define-vop (code-char)
+  (:translate code-char)
+  (:policy :fast-safe)
+  (:args (code :scs (unsigned-reg unsigned-stack)))
+  (:arg-types positive-fixnum)
+  (:results (res :scs (character-reg)))
+  (:result-types character)
+  (:generator 1
+    (inst mov res code)))
+#!-sb-unicode
 (define-vop (code-char)
   (:translate code-char)
   (:policy :fast-safe)
index 48fc7ad..377b196 100644 (file)
 
   ;; non-descriptor characters
   (character-reg registers
-                :locations #.*byte-regs*
+                :locations #!-sb-unicode #.*byte-regs*
+                            #!+sb-unicode #.*dword-regs*
+                 #!-sb-unicode #!-sb-unicode
                 :reserve-locations (#.ah-offset #.al-offset)
                 :constant-scs (immediate)
                 :save-p t
   (catch-block stack :element-size kludge-nondeterministic-catch-block-size))
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
-(defparameter *byte-sc-names* '(character-reg byte-reg character-stack))
+(defparameter *byte-sc-names*
+  '(#!-sb-unicode character-reg byte-reg #!-sb-unicode character-stack))
 (defparameter *word-sc-names* '(word-reg))
 (defparameter *dword-sc-names*
   '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack
-    signed-stack unsigned-stack sap-stack single-stack constant))
+    signed-stack unsigned-stack sap-stack single-stack
+    #!+sb-unicode character-reg #!+sb-unicode character-stack constant))
 ;;; added by jrd. I guess the right thing to do is to treat floats
 ;;; as a separate size...
 ;;;
index 9678c2e..b1795c0 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.8.16.24"
+"0.8.16.25"