From: Christophe Rhodes Date: Tue, 2 Nov 2004 08:37:50 +0000 (+0000) Subject: 0.8.16.25: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=bf27595fb567015495b7131707cc85af361567fe;p=sbcl.git 0.8.16.25: Merge the rest of character_branch under #!+sb-unicode ... untested with #!+sb-unicode, but it seems to work OK without. One more build/test cycle to go. This patch brought to you by --ifdef --- diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 303bdb7..e8c2302 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1123,14 +1123,17 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "ARRAY-TYPE" "ARRAY-TYPE-COMPLEXP" "ARRAY-TYPE-DIMENSIONS" "ARRAY-TYPE-ELEMENT-TYPE" "ARRAY-TYPE-P" "ARRAY-TYPE-SPECIALIZED-ELEMENT-TYPE" - "ASH-INDEX" "ASSERT-ERROR" "BASE-STRING-P" + "ASH-INDEX" "ASSERT-ERROR" + #!+sb-unicode "BASE-CHAR-P" + "BASE-STRING-P" "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX" "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE" "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT" "CALLABLE" "CASE-BODY-ERROR" - "CHARACTER-SET" "CHARACTER-SET-TYPE" - "CHARACTER-SET-TYPE-PAIRS" - "CHARPOS" + "CHARACTER-SET" "CHARACTER-SET-TYPE" + "CHARACTER-SET-TYPE-PAIRS" + #!+sb-unicode "CHARACTER-STRING-P" + "CHARPOS" "CHECK-FOR-CIRCULARITY" "CHECK-TYPE-ERROR" "CLOSED-FLAME" "CODE-COMPONENT" "CODE-COMPONENT-P" "CODE-DEBUG-INFO" "CODE-HEADER-REF" "CODE-HEADER-SET" "CODE-INSTRUCTIONS" @@ -1233,7 +1236,9 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "NUMERIC-TYPE-HIGH" "NUMERIC-TYPE-LOW" "NUMERIC-TYPE-P" "OBJECT-NOT-ARRAY-ERROR" "OBJECT-NOT-CHARACTER-ERROR" "OBJECT-NOT-BASE-STRING-ERROR" "OBJECT-NOT-BIGNUM-ERROR" - "OBJECT-NOT-BIT-VECTOR-ERROR" "OBJECT-NOT-COMPLEX-ERROR" + "OBJECT-NOT-BIT-VECTOR-ERROR" + #!+sb-unicode "OBJECT-NOT-CHARACTER-STRING-ERROR" + "OBJECT-NOT-COMPLEX-ERROR" "OBJECT-NOT-COMPLEX-FLOAT-ERROR" "OBJECT-NOT-COMPLEX-SINGLE-FLOAT-ERROR" #!+long-float "OBJECT-NOT-COMPLEX-LONG-FLOAT-ERROR" @@ -1299,6 +1304,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "OBJECT-NOT-SIMPLE-ARRAY-SIGNED-BYTE-8-ERROR" "OBJECT-NOT-SIMPLE-BIT-VECTOR-ERROR" "OBJECT-NOT-SIMPLE-BASE-STRING-ERROR" + #!+sb-unicode "OBJECT-NOT-SIMPLE-CHARACTER-STRING-ERROR" "OBJECT-NOT-SIMPLE-STRING-ERROR" "OBJECT-NOT-SIMPLE-VECTOR-ERROR" "OBJECT-NOT-SINGLE-FLOAT-ERROR" "OBJECT-NOT-STRING-ERROR" @@ -1307,7 +1313,9 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "OBJECT-NOT-UNSIGNED-BYTE-32-ERROR" ;; FIXME: 32/64-bit issues "OBJECT-NOT-UNSIGNED-BYTE-64-ERROR" - "OBJECT-NOT-VECTOR-ERROR" "OBJECT-NOT-WEAK-POINTER-ERROR" + "OBJECT-NOT-VECTOR-ERROR" + "OBJECT-NOT-VECTOR-NIL-ERROR" + "OBJECT-NOT-WEAK-POINTER-ERROR" "ODD-KEY-ARGS-ERROR" "OUTPUT-OBJECT" "OUTPUT-UGLY-OBJECT" "PACKAGE-DESIGNATOR" "PACKAGE-DOC-STRING" "PACKAGE-HASHTABLE-SIZE" "PACKAGE-HASHTABLE-FREE" @@ -1354,6 +1362,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "SIMPLE-ARRAY-SIGNED-BYTE-61-P" "SIMPLE-ARRAY-SIGNED-BYTE-64-P" "SIMPLE-ARRAY-SIGNED-BYTE-8-P" "SIMPLE-BASE-STRING-P" + #!+sb-unicode "SIMPLE-CHARACTER-STRING-P" "SIMPLE-PACKAGE-ERROR" "SIMPLE-UNBOXED-ARRAY" "SINGLE-FLOAT-BITS" "SINGLE-FLOAT-EXPONENT" "SINGLE-FLOAT-INT-EXPONENT" "SINGLE-FLOAT-SIGNIFICAND" @@ -1998,7 +2007,9 @@ structure representations" "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT" "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG" "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER" - "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" "COMPLEX-WIDETAG" + "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" + #!+sb-unicode "COMPLEX-CHARACTER-STRING-WIDETAG" + "COMPLEX-WIDETAG" "COMPLEX-VECTOR-NIL-WIDETAG" "COMPLEX-VECTOR-WIDETAG" "CONS-CAR-SLOT" "CONS-CDR-SLOT" "CONS-SIZE" "CONSTANT-SC-NUMBER" @@ -2138,6 +2149,7 @@ structure representations" "SIMPLE-ARRAY-SIGNED-BYTE-8-WIDETAG" "SIMPLE-BIT-VECTOR-WIDETAG" "SIMPLE-BASE-STRING-WIDETAG" + #!+sb-unicode "SIMPLE-CHARACTER-STRING-WIDETAG" "SIMPLE-VECTOR-WIDETAG" "SINGLE-FLOAT-BIAS" "SINGLE-FLOAT-DIGITS" "SINGLE-FLOAT-EXPONENT-BYTE" "SINGLE-FLOAT-HIDDEN-BIT" "SINGLE-FLOAT-NORMAL-EXPONENT-MAX" diff --git a/src/code/array.lisp b/src/code/array.lisp index f463c5f..d3c09c1 100644 --- a/src/code/array.lisp +++ b/src/code/array.lisp @@ -91,8 +91,11 @@ ;; and for all in any reasonable user programs.) ((t) (values #.sb!vm:simple-vector-widetag #.sb!vm:n-word-bits)) - ((base-char standard-char character) + ((base-char standard-char #!-sb-unicode character) (values #.sb!vm:simple-base-string-widetag #.sb!vm:n-byte-bits)) + #!+sb-unicode + ((character) + (values #.sb!vm:simple-character-string-widetag #.sb!vm:n-word-bits)) ((bit) (values #.sb!vm:simple-bit-vector-widetag 1)) ;; OK, we have to wade into SUBTYPEPing after all. @@ -110,8 +113,11 @@ ;; Pick off some easy common cases. ((t) #.sb!vm:complex-vector-widetag) - ((base-char character) + ((base-char #!-sb-unicode character) #.sb!vm:complex-base-string-widetag) + #!+sb-unicode + ((character) + #.sb!vm:complex-character-string-widetag) ((nil) #.sb!vm:complex-vector-nil-widetag) ((bit) @@ -120,7 +126,12 @@ (t (pick-vector-type type (nil #.sb!vm:complex-vector-nil-widetag) + #!-sb-unicode (character #.sb!vm:complex-base-string-widetag) + #!+sb-unicode + (base-char #.sb!vm:complex-base-string-widetag) + #!+sb-unicode + (character #.sb!vm:complex-character-string-widetag) (bit #.sb!vm:complex-bit-vector-widetag) (t #.sb!vm:complex-vector-widetag))))) @@ -148,11 +159,15 @@ (array (allocate-vector type length - (ceiling (* (if (= type sb!vm:simple-base-string-widetag) - (1+ length) - length) - n-bits) - sb!vm:n-word-bits)))) + (ceiling + (* (if (or (= type sb!vm:simple-base-string-widetag) + #!+sb-unicode + (= type + sb!vm:simple-character-string-widetag)) + (1+ length) + length) + n-bits) + sb!vm:n-word-bits)))) (declare (type index length)) (when initial-element-p (fill array initial-element)) @@ -854,7 +869,9 @@ ,@(map 'list (lambda (saetp) `((simple-array ,(sb!vm:saetp-specifier saetp) (*)) - ,(if (eq (sb!vm:saetp-specifier saetp) 'character) + ,(if (or (eq (sb!vm:saetp-specifier saetp) 'character) + #!+sb-unicode + (eq (sb!vm:saetp-specifier saetp) 'base-char)) *default-init-char-form* (sb!vm:saetp-initial-element-default saetp)))) (remove-if-not diff --git a/src/code/char.lisp b/src/code/char.lisp index 0a89c81..56b3d2c 100644 --- a/src/code/char.lisp +++ b/src/code/char.lisp @@ -12,8 +12,8 @@ (in-package "SB!IMPL") -(def!constant sb!xc:char-code-limit 256 +(def!constant sb!xc:char-code-limit #!-sb-unicode 256 #!+sb-unicode #x110000 #!+sb-doc "the upper exclusive bound on values produced by CHAR-CODE") -(def!constant base-char-code-limit 256) +(def!constant base-char-code-limit #!-sb-unicode 256 #!+sb-unicode 128) diff --git a/src/code/class.lisp b/src/code/class.lisp index d9db35f..57ef609 100644 --- a/src/code/class.lisp +++ b/src/code/class.lisp @@ -1243,6 +1243,21 @@ NIL is returned when no such class exists." :inherits (base-string simple-string string vector simple-array array sequence) :prototype-form (make-array 0 :element-type 'base-char)) + #!+sb-unicode + (character-string + :translation (vector character) + :codes (#.sb!vm:complex-character-string-widetag) + :direct-superclasses (string) + :inherits (string vector array sequence) + :prototype-form (make-array 0 :element-type 'character :fill-pointer t)) + #!+sb-unicode + (simple-character-string + :translation (simple-array character (*)) + :codes (#.sb!vm:simple-character-string-widetag) + :direct-superclasses (character-string simple-string) + :inherits (character-string simple-string string vector simple-array + array sequence) + :prototype-form (make-array 0 :element-type 'character)) (list :translation (or cons (member nil)) :inherits (sequence)) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 528bd52..6ae5f06 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -1112,6 +1112,38 @@ (setf (sap-ref-8 sap tail) bits) (code-char byte)) +#!+sb-unicode +(let ((latin-9-table (let ((table (make-string 256))) + (do ((i 0 (1+ i))) + ((= i 256)) + (setf (aref table i) (code-char i))) + (setf (aref table #xa4) (code-char #x20ac)) + (setf (aref table #xa6) (code-char #x0160)) + (setf (aref table #xa8) (code-char #x0161)) + (setf (aref table #xb4) (code-char #x017d)) + (setf (aref table #xb8) (code-char #x017e)) + (setf (aref table #xbc) (code-char #x0152)) + (setf (aref table #xbd) (code-char #x0153)) + (setf (aref table #xbe) (code-char #x0178)) + table)) + (latin-9-reverse-1 (make-array 16 + :element-type '(unsigned-byte 21) + :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0))) + (latin-9-reverse-2 (make-array 16 + :element-type '(unsigned-byte 8) + :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0)))) + (define-external-format (:latin-9 :latin9 :iso-8859-15) + 1 + (setf (sap-ref-8 sap tail) + (if (< bits 256) + (if (= bits (char-code (aref latin-9-table bits))) + bits + (error "cannot encode ~A in latin-9" bits)) + (if (= (aref latin-9-reverse-1 (logand bits 15)) bits) + (aref latin-9-reverse-2 (logand bits 15)) + (error "cannot encode ~A in latin-9" bits)))) + (aref latin-9-table byte))) + (define-external-format/variable-width (:utf-8 :utf8) (let ((bits (char-code byte))) (cond ((< bits #x80) 1) diff --git a/src/code/fop.lisp b/src/code/fop.lisp index 7f58e99..a981179 100644 --- a/src/code/fop.lisp +++ b/src/code/fop.lisp @@ -71,7 +71,8 @@ ;;; of like READ-SEQUENCE specialized for files of (UNSIGNED-BYTE 8), ;;; with an automatic conversion from (UNSIGNED-BYTE 8) into CHARACTER ;;; for each element read -(declaim (ftype (function (stream simple-string &optional index) (values)) read-string-as-bytes)) +(declaim (ftype (function (stream simple-string &optional index) (values)) + read-string-as-bytes #!+sb-unicode read-string-as-words)) (defun read-string-as-bytes (stream string &optional (length (length string))) (dotimes (i length) (setf (aref string i) @@ -83,6 +84,17 @@ ;; significantly better than the portable version here. If it is, then use ;; it as an alternate definition, protected with #-SB-XC-HOST. (values)) +#!+sb-unicode +(defun read-string-as-words (stream string &optional (length (length string))) + #+sb-xc-host (bug "READ-STRING-AS-WORDS called") + (dotimes (i length) + (setf (aref string i) + (sb!xc:code-char (logior + (read-byte stream) + (ash (read-byte stream) 8) + (ash (read-byte stream) 16) + (ash (read-byte stream) 24))))) + (values)) ;;;; miscellaneous fops @@ -180,9 +192,16 @@ (make-string (* ,n-size 2)))) (done-with-fast-read-byte) (let ((,n-buffer *fasl-symbol-buffer*)) + #+sb-xc-host (read-string-as-bytes *fasl-input-stream* ,n-buffer ,n-size) + #-sb-xc-host + (#!+sb-unicode read-string-as-words + #!-sb-unicode read-string-as-bytes + *fasl-input-stream* + ,n-buffer + ,n-size) (push-fop-table (without-package-locks (intern* ,n-buffer ,n-size @@ -229,7 +248,10 @@ (fop-uninterned-small-symbol-save 13) (let* ((arg (clone-arg)) (res (make-string arg))) + #!-sb-unicode (read-string-as-bytes *fasl-input-stream* res) + #!+sb-unicode + (read-string-as-words *fasl-input-stream* res) (push-fop-table (make-symbol res)))) (define-fop (fop-package 14) @@ -347,6 +369,19 @@ (read-string-as-bytes *fasl-input-stream* res) res)) +#!+sb-unicode +(progn + #+sb-xc-host + (define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (bug "CHARACTER-STRING FOP encountered")) + + #-sb-xc-host + (define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (let* ((arg (clone-arg)) + (res (make-string arg))) + (read-string-as-words *fasl-input-stream* res) + res))) + (define-cloned-fops (fop-vector 39) (fop-small-vector 40) (let* ((size (clone-arg)) (res (make-array size))) diff --git a/src/code/interr.lisp b/src/code/interr.lisp index 66f69f4..0d6ceeb 100644 --- a/src/code/interr.lisp +++ b/src/code/interr.lisp @@ -123,6 +123,17 @@ :datum object :expected-type 'base-string)) +(deferr object-not-vector-nil-error (object) + (error 'type-error + :datum object + :expected-type '(vector nil))) + +#!+sb-unicode +(deferr object-not-character-string-error (object) + (error 'type-error + :datum object + :expected-type '(vector character))) + (deferr object-not-bit-vector-error (object) (error 'type-error :datum object diff --git a/src/code/late-type.lisp b/src/code/late-type.lisp index 5a1e0e8..e6f9931 100644 --- a/src/code/late-type.lisp +++ b/src/code/late-type.lisp @@ -2096,22 +2096,24 @@ (if (eq (car dims) '*) (case eltype (bit 'bit-vector) - ((base-char character) 'base-string) + ((base-char #!-sb-unicode character) 'base-string) (* 'vector) (t `(vector ,eltype))) (case eltype (bit `(bit-vector ,(car dims))) - ((base-char character) `(base-string ,(car dims))) + ((base-char #!-sb-unicode character) + `(base-string ,(car dims))) (t `(vector ,eltype ,(car dims))))) (if (eq (car dims) '*) (case eltype (bit 'simple-bit-vector) - ((base-char character) 'simple-base-string) + ((base-char #!-sb-unicode character) 'simple-base-string) ((t) 'simple-vector) (t `(simple-array ,eltype (*)))) (case eltype (bit `(simple-bit-vector ,(car dims))) - ((base-char character) `(simple-base-string ,(car dims))) + ((base-char #!-sb-unicode character) + `(simple-base-string ,(car dims))) ((t) `(simple-vector ,(car dims))) (t `(simple-array ,eltype ,dims)))))) (t diff --git a/src/code/print.lisp b/src/code/print.lisp index 057df86..02a3ca0 100644 --- a/src/code/print.lisp +++ b/src/code/print.lisp @@ -1262,9 +1262,6 @@ ;;; [CMUC]tradix.press. DO NOT EVEN THINK OF ATTEMPTING TO ;;; UNDERSTAND THIS CODE WITHOUT READING THE PAPER! -(declaim (type (simple-array character (10)) *digits*)) -(defvar *digits* "0123456789") - (defun flonum-to-string (x &optional width fdigits scale fmin) (cond ((zerop x) ;; Zero is a special case which FLOAT-STRING cannot handle. @@ -1285,6 +1282,7 @@ (defun float-string (fraction exponent precision width fdigits scale fmin) (let ((r fraction) (s 1) (m- 1) (m+ 1) (k 0) (digits 0) (decpnt 0) (cutoff nil) (roundup nil) u low high + (digit-characters "0123456789") (digit-string (make-array 50 :element-type 'base-char :fill-pointer 0 @@ -1375,13 +1373,13 @@ ;; Stop when either precision is exhausted or we have printed as ;; many fraction digits as permitted. (when (or low high (and cutoff (<= k cutoff))) (return)) - (vector-push-extend (char *digits* u) digit-string) + (vector-push-extend (char digit-characters u) digit-string) (incf digits)) ;; If cutoff occurred before first digit, then no digits are ;; generated at all. (when (or (not cutoff) (>= k cutoff)) ;; Last digit may need rounding - (vector-push-extend (char *digits* + (vector-push-extend (char digit-characters (cond ((and low (not high)) u) ((and high (not low)) (1+ u)) (t (if (<= (ash r 1) s) u (1+ u))))) @@ -1430,6 +1428,7 @@ (let ((print-base 10) ; B (float-radix 2) ; b (float-digits (float-digits v)) ; p + (digit-characters "0123456789") (min-e (etypecase v (single-float single-float-min-e) @@ -1468,7 +1467,7 @@ (and high-ok (= (+ r m+) s)))) (when (or tc1 tc2) (go end)) - (vector-push-extend (char *digits* d) result) + (vector-push-extend (char digit-characters d) result) (go loop) end (let ((d (cond @@ -1476,7 +1475,7 @@ ((and tc1 (not tc2)) d) (t ; (and tc1 tc2) (if (< (* r 2) s) d (1+ d)))))) - (vector-push-extend (char *digits* d) result) + (vector-push-extend (char digit-characters d) result) (return-from generate result)))))) (if (>= e 0) (if (/= f (expt float-radix (1- float-digits))) diff --git a/src/code/room.lisp b/src/code/room.lisp index cf18258..c7696b2 100644 --- a/src/code/room.lisp +++ b/src/code/room.lisp @@ -72,7 +72,8 @@ :kind :fixed :length size)))))) -(dolist (code (list complex-base-string-widetag simple-array-widetag +(dolist (code (list #!+sb-unicode complex-character-string-widetag + complex-base-string-widetag simple-array-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (setf (svref *meta-room-info* code) @@ -122,6 +123,12 @@ :kind :string :length 0)) +#!+sb-unicode +(setf (svref *meta-room-info* simple-character-string-widetag) + (make-room-info :name 'simple-character-string + :kind :string + :length 2)) + (setf (svref *meta-room-info* simple-array-nil-widetag) (make-room-info :name 'simple-array-nil :kind :fixed @@ -467,6 +474,7 @@ #.single-float-widetag #.double-float-widetag #.simple-base-string-widetag + #!+sb-unicode #.simple-character-string-widetag #.simple-array-nil-widetag #.simple-bit-vector-widetag #.simple-array-unsigned-byte-2-widetag diff --git a/src/code/seq.lisp b/src/code/seq.lisp index 50eec6b..b05450e 100644 --- a/src/code/seq.lisp +++ b/src/code/seq.lisp @@ -569,6 +569,15 @@ (when (null source-end) (setq source-end (length source-sequence))) (mumble-replace-from-mumble)) +#!+sb-unicode +(defun simple-character-string-replace-from-simple-character-string* + (target-sequence source-sequence + target-start target-end source-start source-end) + (declare (type (simple-array character (*)) target-sequence source-sequence)) + (when (null target-end) (setq target-end (length target-sequence))) + (when (null source-end) (setq source-end (length source-sequence))) + (mumble-replace-from-mumble)) + (define-sequence-traverser replace (sequence1 sequence2 &key start1 end1 start2 end2) #!+sb-doc diff --git a/src/code/stream.lisp b/src/code/stream.lisp index b33dfb9..6fc1ac8 100644 --- a/src/code/stream.lisp +++ b/src/code/stream.lisp @@ -419,30 +419,42 @@ ;;; and hence must be an N-BIN method. (defun fast-read-char-refill (stream eof-error-p eof-value) (let* ((ibuf (ansi-stream-cin-buffer stream)) - (count (funcall (ansi-stream-n-bin stream) - stream - ibuf - +ansi-stream-in-buffer-extra+ - (- +ansi-stream-in-buffer-length+ - +ansi-stream-in-buffer-extra+) - nil)) - (start (- +ansi-stream-in-buffer-length+ count))) + (count (funcall (ansi-stream-n-bin stream) + stream + ibuf + +ansi-stream-in-buffer-extra+ + (- +ansi-stream-in-buffer-length+ + +ansi-stream-in-buffer-extra+) + nil)) + (start (- +ansi-stream-in-buffer-length+ count)) + (n-character-array-bytes + #.(/ (sb!vm:saetp-n-bits + (find 'character + sb!vm:*specialized-array-element-type-properties* + :key #'sb!vm:saetp-specifier)) + sb!vm:n-byte-bits))) (declare (type index start count)) (cond ((zerop count) - (setf (ansi-stream-in-index stream) +ansi-stream-in-buffer-length+) - (funcall (ansi-stream-in stream) stream eof-error-p eof-value)) - (t - (when (/= start +ansi-stream-in-buffer-extra+) - (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+ - sb!vm:n-byte-bits) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - ibuf (+ (the index (* start sb!vm:n-byte-bits)) - (* sb!vm:vector-data-offset - sb!vm:n-word-bits)) - (* count sb!vm:n-byte-bits))) - (setf (ansi-stream-in-index stream) (1+ start)) - (aref ibuf start))))) + (setf (ansi-stream-in-index stream) + +ansi-stream-in-buffer-length+) + (funcall (ansi-stream-in stream) stream eof-error-p eof-value)) + (t + (when (/= start +ansi-stream-in-buffer-extra+) + (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+ + sb!vm:n-byte-bits + n-character-array-bytes) + (* sb!vm:vector-data-offset + sb!vm:n-word-bits)) + ibuf (+ (the index (* start + sb!vm:n-byte-bits + n-character-array-bytes)) + (* sb!vm:vector-data-offset + sb!vm:n-word-bits)) + (* count + sb!vm:n-byte-bits + n-character-array-bytes))) + (setf (ansi-stream-in-index stream) (1+ start)) + (aref ibuf start))))) ;;; This is similar to FAST-READ-CHAR-REFILL, but we don't have to ;;; leave room for unreading. diff --git a/src/compiler/alpha/array.lisp b/src/compiler/alpha/array.lisp index e8e2a9b..fa0674c 100644 --- a/src/compiler/alpha/array.lisp +++ b/src/compiler/alpha/array.lisp @@ -12,7 +12,6 @@ (in-package "SB!VM") ;;;; allocator for the array header - (define-vop (make-array-header) (:policy :fast-safe) (:translate make-array-header) @@ -36,8 +35,6 @@ (inst bis alloc-tn other-pointer-lowtag result) (storew header result 0 other-pointer-lowtag) (inst addq alloc-tn bytes alloc-tn)))) - - ;;;; additional accessors and setters for the array header (define-full-reffer %array-dimension * @@ -61,7 +58,6 @@ (inst sll temp n-fixnum-tag-bits res))) ;;;; bounds checking routine - (define-vop (check-bound) (:translate %check-bound) (:policy :fast-safe) @@ -302,6 +298,8 @@ (def-partial-data-vector-frobs simple-base-string character :byte nil character-reg) + #!+sb-unicode ; FIXME: what about when a word is 64 bits? + (def-full-data-vector-frobs simple-character-string character character-reg) (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum :byte nil unsigned-reg signed-reg) diff --git a/src/compiler/dump.lisp b/src/compiler/dump.lisp index a66ac81..ede2e77 100644 --- a/src/compiler/dump.lisp +++ b/src/compiler/dump.lisp @@ -600,9 +600,15 @@ (t (unless *cold-load-dump* (dump-fop 'fop-normal-load file)) + #+sb-xc-host (dump-simple-base-string (coerce (package-name pkg) 'simple-base-string) file) + #-sb-xc-host + (#!+sb-unicode dump-simple-character-string + #!-sb-unicode dump-simple-base-string + (coerce (package-name pkg) '(simple-array character (*))) + file) (dump-fop 'fop-package file) (unless *cold-load-dump* (dump-fop 'fop-maybe-cold-load file)) @@ -733,10 +739,24 @@ (*))) x))) (typecase simple-version + #+sb-xc-host + (simple-string + (unless (string-check-table x file) + (dump-simple-base-string simple-version file) + (string-save-object x file))) + #-sb-xc-host (simple-base-string - (unless (equal-check-table x file) + (unless (string-check-table x file) (dump-simple-base-string simple-version file) - (equal-save-object x file))) + (string-save-object x file))) + #-sb-xc-host + ((simple-array character (*)) + #!+sb-unicode + (unless (string-check-table x file) + (dump-simple-character-string simple-version file) + (string-save-object x file)) + #!-sb-unicode + (bug "how did we get here?")) (simple-vector (dump-simple-vector simple-version file) (eq-save-object x file)) @@ -979,7 +999,10 @@ file) (dump-word pname-length file))) - (dump-base-chars-of-string pname file) + #+sb-xc-host (dump-base-chars-of-string pname file) + #-sb-xc-host (#!+sb-unicode dump-characters-of-string + #!-sb-unicode dump-base-chars-of-string + pname file) (unless *cold-load-dump* (setf (gethash s (fasl-output-eq-table file)) diff --git a/src/compiler/generic/early-objdef.lisp b/src/compiler/generic/early-objdef.lisp index 831cea2..e182880 100644 --- a/src/compiler/generic/early-objdef.lisp +++ b/src/compiler/generic/early-objdef.lisp @@ -167,6 +167,7 @@ simple-array-unsigned-byte-16 ; 10011110 simple-array-nil ; 10100010 simple-base-string ; 10100110 + #!+sb-unicode simple-character-string simple-bit-vector ; 10101010 simple-vector ; 10101110 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) @@ -195,15 +196,18 @@ simple-array ; 11011110 complex-vector-nil ; 11100010 complex-base-string ; 11100110 + #!+sb-unicode complex-character-string complex-bit-vector ; 11101010 complex-vector ; 11101110 complex-array ; 11110010 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused12 ; 11110110 - #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) + #!+(and #.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) + (not sb-unicode)) unused13 ; 11111010 - #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) + #!+(and #.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) + (not sb-unicode)) unused14 ; 11111110 ) diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp index 674db25..b1dee69 100644 --- a/src/compiler/generic/genesis.lisp +++ b/src/compiler/generic/genesis.lisp @@ -2106,6 +2106,11 @@ core and return a descriptor to it." (read-string-as-bytes *fasl-input-stream* string) (base-string-to-core string))) +#!+sb-unicode +(clone-cold-fop (fop-character-string) + (fop-small-character-string) + (bug "CHARACTER-STRING dumped by cross-compiler.")) + (clone-cold-fop (fop-vector) (fop-small-vector) (let* ((size (clone-arg)) diff --git a/src/compiler/generic/interr.lisp b/src/compiler/generic/interr.lisp index a2edee8..8fde951 100644 --- a/src/compiler/generic/interr.lisp +++ b/src/compiler/generic/interr.lisp @@ -61,6 +61,11 @@ "Object is not of type STRING.") (object-not-base-string "Object is not of type BASE-STRING.") + (object-not-vector-nil + "Object is not of type (VECTOR NIL).") + #!+sb-unicode + (object-not-character-string + "Object is not of type (VECTOR CHARACTER).") (object-not-bit-vector "Object is not of type BIT-VECTOR.") (object-not-array diff --git a/src/compiler/generic/late-type-vops.lisp b/src/compiler/generic/late-type-vops.lisp index b782be5..c9a9602 100644 --- a/src/compiler/generic/late-type-vops.lisp +++ b/src/compiler/generic/late-type-vops.lisp @@ -66,7 +66,8 @@ (!define-type-vops simple-string-p check-simple-string nil object-not-simple-string-error - (simple-base-string-widetag simple-array-nil-widetag)) + (#!+sb-unicode simple-character-string-widetag + simple-base-string-widetag simple-array-nil-widetag)) (macrolet ((define-simple-array-type-vops () @@ -109,11 +110,15 @@ (funcallable-instance-header-widetag)) (!define-type-vops array-header-p nil nil nil - (simple-array-widetag complex-base-string-widetag complex-bit-vector-widetag + (simple-array-widetag + #!+sb-unicode complex-character-string-widetag + complex-base-string-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (!define-type-vops stringp check-string nil object-not-string-error - (simple-base-string-widetag complex-base-string-widetag + (#!+sb-unicode simple-character-string-widetag + #!+sb-unicode complex-character-string-widetag + simple-base-string-widetag complex-base-string-widetag simple-array-nil-widetag complex-vector-nil-widetag)) (!define-type-vops base-string-p check-base-string nil object-not-base-string-error @@ -127,6 +132,11 @@ object-not-vector-nil-error (simple-array-nil-widetag complex-vector-nil-widetag)) +#!+sb-unicode +(!define-type-vops character-string-p check-character-string nil + object-not-character-string-error + (simple-character-string-widetag complex-character-string-widetag)) + (!define-type-vops vectorp check-vector nil object-not-vector-error (complex-vector-widetag . #.(append diff --git a/src/compiler/generic/vm-array.lisp b/src/compiler/generic/vm-array.lisp index 503f1bb..f81b183 100644 --- a/src/compiler/generic/vm-array.lisp +++ b/src/compiler/generic/vm-array.lisp @@ -64,6 +64,7 @@ (nil #:mu 0 simple-array-nil :complex-typecode #.sb!vm:complex-vector-nil-widetag :importance 0) + #!-sb-unicode (character ,(code-char 0) 8 simple-base-string ;; (SIMPLE-BASE-STRINGs are stored with an extra ;; trailing #\NULL for convenience in calling out @@ -71,6 +72,19 @@ :n-pad-elements 1 :complex-typecode #.sb!vm:complex-base-string-widetag :importance 17) + #!+sb-unicode + (base-char ,(code-char 0) 8 simple-base-string + ;; (SIMPLE-BASE-STRINGs are stored with an extra + ;; trailing #\NULL for convenience in calling out + ;; to C.) + :n-pad-elements 1 + :complex-typecode #.sb!vm:complex-base-string-widetag + :importance 17) + #!+sb-unicode + (character ,(code-char 0) 32 simple-character-string + :n-pad-elements 1 + :complex-typecode #.sb!vm:complex-character-string-widetag + :importance 17) (single-float 0.0f0 32 simple-array-single-float :importance 6) (double-float 0.0d0 64 simple-array-double-float diff --git a/src/compiler/generic/vm-fndb.lisp b/src/compiler/generic/vm-fndb.lisp index de26bcb..e0a4899 100644 --- a/src/compiler/generic/vm-fndb.lisp +++ b/src/compiler/generic/vm-fndb.lisp @@ -22,6 +22,8 @@ complex-vector-p base-char-p %standard-char-p %instancep base-string-p simple-base-string-p + #!+sb-unicode character-string-p + #!+sb-unicode simple-character-string-p array-header-p simple-array-p simple-array-nil-p vector-nil-p simple-array-unsigned-byte-2-p diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index 0a97ccf..e47aec5 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -49,6 +49,8 @@ (give-up-ir1-transform) `(etypecase string ((simple-array character (*)) (data-vector-ref string index)) + #!+sb-unicode + ((simple-array base-char (*)) (data-vector-ref string index)) ((simple-array nil (*)) (data-vector-ref string index)))))) (deftransform hairy-data-vector-ref ((array index) (array t) *) @@ -99,6 +101,9 @@ `(etypecase string ((simple-array character (*)) (data-vector-set string index new-value)) + #!+sb-unicode + ((simple-array base-char (*)) + (data-vector-set string index new-value)) ((simple-array nil (*)) (data-vector-set string index new-value)))))) diff --git a/src/compiler/generic/vm-typetran.lisp b/src/compiler/generic/vm-typetran.lisp index c1328e1..2595ed0 100644 --- a/src/compiler/generic/vm-typetran.lisp +++ b/src/compiler/generic/vm-typetran.lisp @@ -19,6 +19,7 @@ ;;; They shouldn't be used explicitly. (define-type-predicate base-string-p base-string) (define-type-predicate bignump bignum) +#!+sb-unicode (define-type-predicate character-string-p (vector character)) (define-type-predicate complex-double-float-p (complex double-float)) (define-type-predicate complex-single-float-p (complex single-float)) #!+long-float @@ -92,6 +93,8 @@ (define-type-predicate simple-array-complex-long-float-p (simple-array (complex long-float) (*))) (define-type-predicate simple-base-string-p simple-base-string) +#!+sb-unicode (define-type-predicate simple-character-string-p + (simple-array character (*))) (define-type-predicate system-area-pointer-p system-area-pointer) (define-type-predicate unsigned-byte-32-p (unsigned-byte 32)) (define-type-predicate signed-byte-32-p (signed-byte 32)) diff --git a/src/compiler/hppa/array.lisp b/src/compiler/hppa/array.lisp index 2c675f9..0aa28cb 100644 --- a/src/compiler/hppa/array.lisp +++ b/src/compiler/hppa/array.lisp @@ -10,10 +10,8 @@ ;;;; files for more information. (in-package "SB!VM") - ;;;; Allocator for the array header. - (define-vop (make-array-header) (:translate make-array-header) (:policy :fast-safe) @@ -57,12 +55,8 @@ (loadw res x 0 other-pointer-lowtag) (inst srl res n-widetag-bits res) (inst addi (- (1- array-dimensions-offset)) res res))) - - ;;;; Bounds checking routine. - - (define-vop (check-bound) (:translate %check-bound) (:policy :fast-safe) @@ -84,7 +78,6 @@ ;;; Variants built on top of word-index-ref, etc. I.e. those vectors whos ;;; elements are represented in integer registers and are built out of ;;; 8, 16, or 32 bit elements. - (macrolet ((def-full-data-vector-frobs (type element-type &rest scs) `(progn (define-full-reffer ,(symbolicate "DATA-VECTOR-REF/" type) ,type @@ -105,9 +98,11 @@ ,element-type data-vector-set)))) (def-full-data-vector-frobs simple-vector * descriptor-reg any-reg) - + (def-partial-data-vector-frobs simple-base-string character :byte nil character-reg) - + #!+sb-unicode + (def-full-data-vector-frobs simple-character-string character character-reg) + (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum :byte nil unsigned-reg signed-reg) (def-partial-data-vector-frobs simple-array-unsigned-byte-8 positive-fixnum @@ -137,8 +132,6 @@ ;;; Integer vectors whos elements are smaller than a byte. I.e. bit, 2-bit, ;;; and 4-bit vectors. -;;; - (macrolet ((def-small-data-vector-frobs (type bits) (let* ((elements-per-word (floor n-word-bits bits)) (bit-shift (1- (integer-length elements-per-word)))) @@ -252,8 +245,6 @@ (def-small-data-vector-frobs simple-array-unsigned-byte-4 4)) ;;; And the float variants. -;;; - (define-vop (data-vector-ref/simple-array-single-float) (:note "inline array access") (:translate data-vector-ref) @@ -324,7 +315,6 @@ ;;; Complex float arrays. - (define-vop (data-vector-ref/simple-array-complex-single-float) (:note "inline array access") (:translate data-vector-ref) @@ -424,38 +414,30 @@ ;;; These VOPs are used for implementing float slots in structures (whose raw ;;; data is an unsigned-32 vector. -;;; (define-vop (raw-ref-single data-vector-ref/simple-array-single-float) (:translate %raw-ref-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-single data-vector-set/simple-array-single-float) (:translate %raw-set-single) (:arg-types sb!c::raw-vector positive-fixnum single-float)) -;;; (define-vop (raw-ref-double data-vector-ref/simple-array-double-float) (:translate %raw-ref-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-double data-vector-set/simple-array-double-float) (:translate %raw-set-double) (:arg-types sb!c::raw-vector positive-fixnum double-float)) - (define-vop (raw-ref-complex-single data-vector-ref/simple-array-complex-single-float) (:translate %raw-ref-complex-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-single data-vector-set/simple-array-complex-single-float) (:translate %raw-set-complex-single) (:arg-types sb!c::raw-vector positive-fixnum complex-single-float)) -;;; (define-vop (raw-ref-complex-double data-vector-ref/simple-array-complex-double-float) (:translate %raw-ref-complex-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-double data-vector-set/simple-array-complex-double-float) (:translate %raw-set-complex-double) @@ -463,17 +445,11 @@ ;;; These vops are useful for accessing the bits of a vector irrespective of ;;; what type of vector it is. -;;; - (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) - - ;;;; Misc. Array VOPs. - (define-vop (get-vector-subtype get-header-data)) (define-vop (set-vector-subtype set-header-data)) - diff --git a/src/compiler/mips/array.lisp b/src/compiler/mips/array.lisp index 2265cf4..7a0e774 100644 --- a/src/compiler/mips/array.lisp +++ b/src/compiler/mips/array.lisp @@ -10,10 +10,8 @@ ;;;; files for more information. (in-package "SB!VM") - ;;;; Allocator for the array header. - (define-vop (make-array-header) (:policy :fast-safe) (:translate make-array-header) @@ -37,7 +35,6 @@ (inst or result alloc-tn other-pointer-lowtag) (storew header result 0 other-pointer-lowtag) (inst addu alloc-tn bytes)))) - ;;;; Additional accessors and setters for the array header. (define-full-reffer %array-dimension * @@ -59,12 +56,8 @@ (inst sra temp n-widetag-bits) (inst subu temp (1- array-dimensions-offset)) (inst sll res temp 2))) - - ;;;; Bounds checking routine. - - (define-vop (check-bound) (:translate %check-bound) (:policy :fast-safe) @@ -82,15 +75,12 @@ (inst beq temp zero-tn error) (inst nop) (move result index)))) - - ;;;; Accessors/Setters ;;; Variants built on top of word-index-ref, etc. I.e. those vectors whos ;;; elements are represented in integer registers and are built out of ;;; 8, 16, or 32 bit elements. - (macrolet ((def-full-data-vector-frobs (type element-type &rest scs) `(progn (define-full-reffer ,(symbolicate "DATA-VECTOR-REF/" type) ,type @@ -116,6 +106,8 @@ (def-partial-data-vector-frobs simple-base-string base-char :byte nil base-char-reg) + #!+sb-unicode + (def-full-data-vector-frobs simple-character-string character character-reg) (def-partial-data-vector-frobs simple-array-unsigned-byte-7 positive-fixnum :byte nil unsigned-reg signed-reg) @@ -146,12 +138,8 @@ (def-full-data-vector-frobs simple-array-signed-byte-32 signed-num signed-reg)) - - ;;; Integer vectors whos elements are smaller than a byte. I.e. bit, 2-bit, ;;; and 4-bit vectors. -;;; - (macrolet ((def-small-data-vector-frobs (type bits) (let* ((elements-per-word (floor n-word-bits bits)) (bit-shift (1- (integer-length elements-per-word)))) @@ -317,10 +305,7 @@ (def-small-data-vector-frobs simple-array-unsigned-byte-2 2) (def-small-data-vector-frobs simple-array-unsigned-byte-4 4)) - ;;; And the float variants. -;;; - (define-vop (data-vector-ref/simple-array-single-float) (:note "inline array access") (:translate data-vector-ref) @@ -422,10 +407,8 @@ n-word-bytes)))) (unless (location= result value) (inst fmove :double result value)))) - ;;; Complex float arrays. - (define-vop (data-vector-ref/simple-array-complex-single-float) (:note "inline array access") (:translate data-vector-ref) @@ -447,7 +430,6 @@ other-pointer-lowtag))) (inst nop))) - (define-vop (data-vector-set/simple-array-complex-single-float) (:note "inline array store") (:translate data-vector-set) @@ -530,38 +512,30 @@ ;;; These VOPs are used for implementing float slots in structures (whose raw ;;; data is an unsigned-32 vector. -;;; (define-vop (raw-ref-single data-vector-ref/simple-array-single-float) (:translate %raw-ref-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-single data-vector-set/simple-array-single-float) (:translate %raw-set-single) (:arg-types sb!c::raw-vector positive-fixnum single-float)) -;;; (define-vop (raw-ref-double data-vector-ref/simple-array-double-float) (:translate %raw-ref-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-double data-vector-set/simple-array-double-float) (:translate %raw-set-double) (:arg-types sb!c::raw-vector positive-fixnum double-float)) - (define-vop (raw-ref-complex-single data-vector-ref/simple-array-complex-single-float) (:translate %raw-ref-complex-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-single data-vector-set/simple-array-complex-single-float) (:translate %raw-set-complex-single) (:arg-types sb!c::raw-vector positive-fixnum complex-single-float)) -;;; (define-vop (raw-ref-complex-double data-vector-ref/simple-array-complex-double-float) (:translate %raw-ref-complex-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-double data-vector-set/simple-array-complex-double-float) (:translate %raw-set-complex-double) @@ -569,17 +543,11 @@ ;;; These vops are useful for accessing the bits of a vector irrespective of ;;; what type of vector it is. -;;; - (define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %raw-bits) (define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) unsigned-num %set-raw-bits) - - ;;;; Misc. Array VOPs. - (define-vop (get-vector-subtype get-header-data)) (define-vop (set-vector-subtype set-header-data)) - diff --git a/src/compiler/ppc/array.lisp b/src/compiler/ppc/array.lisp index b546630..6eaef08 100644 --- a/src/compiler/ppc/array.lisp +++ b/src/compiler/ppc/array.lisp @@ -111,9 +111,11 @@ (:result-types ,element-type))))) (def-data-vector-frobs simple-base-string byte-index character character-reg) + #!+sb-unicode + (def-data-vector-frobs simple-character-string word-index + character character-reg) (def-data-vector-frobs simple-vector word-index * descriptor-reg any-reg) - (def-data-vector-frobs simple-array-unsigned-byte-7 byte-index positive-fixnum unsigned-reg) (def-data-vector-frobs simple-array-unsigned-byte-8 byte-index diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index b1fb39d..58585aa 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -712,16 +712,35 @@ sb!vm:n-byte-bits))) string1)) +;;; KLUDGE: This isn't the nicest way of achieving efficient string +;;; streams, but it does work; a more general framework for this kind +;;; of optimization, as well as better handling of the possible +;;; keyword arguments, would be nice. +#!+sb-unicode +(deftransform replace ((string1 string2 &key (start1 0) (start2 0) + end1 end2) + ((simple-array character (*)) + (simple-array character (*)) + &rest t) + * + ;; FIXME: consider replacing this policy test + ;; with some tests for the STARTx and ENDx + ;; indices being valid, conditional on high + ;; SAFETY code. + ;; + ;; FIXME: It turns out that this transform is + ;; critical for the performance of string + ;; streams. Make this more explicit. + :policy (< (max safety space) 3)) + `(sb!impl::simple-character-string-replace-from-simple-character-string* + string1 string2 start1 end1 start2 end2)) + ;;; FIXME: this would be a valid transform for certain excluded cases: ;;; * :TEST 'CHAR= or :TEST #'CHAR= ;;; * :TEST 'EQL or :TEST #'EQL ;;; * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity) -;;; -;;; also, it should be noted that there's nothing much in this -;;; transform (as opposed to the ones for REPLACE and CONCATENATE) -;;; that particularly limits it to SIMPLE-BASE-STRINGs. (deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2) - (simple-base-string simple-base-string &rest t) + (simple-string simple-string &rest t) * :policy (> speed (max space safety))) `(block search @@ -744,6 +763,9 @@ ;;; at least once DYNAMIC-EXTENT works. ;;; ;;; FIXME: currently KLUDGEed because of bug 188 +;;; +;;; FIXME: disabled for sb-unicode: probably want it back +#!-sb-unicode (deftransform concatenate ((rtype &rest sequences) (t &rest (or simple-base-string (simple-array nil (*)))) diff --git a/src/compiler/sparc/array.lisp b/src/compiler/sparc/array.lisp index 9fdb1ff..0d56f1a 100644 --- a/src/compiler/sparc/array.lisp +++ b/src/compiler/sparc/array.lisp @@ -12,7 +12,6 @@ (in-package "SB!VM") ;;;; allocator for the array header. - (define-vop (make-array-header) (:translate make-array-header) (:policy :fast-safe) @@ -36,7 +35,6 @@ (inst srl ndescr ndescr n-fixnum-tag-bits) (storew ndescr header 0 other-pointer-lowtag)) (move result header))) - ;;;; Additional accessors and setters for the array header. (define-vop (%array-dimension word-index-ref) @@ -84,7 +82,6 @@ ;;; Variants built on top of word-index-ref, etc. I.e. those vectors whos ;;; elements are represented in integer registers and are built out of ;;; 8, 16, or 32 bit elements. - (macrolet ((def-data-vector-frobs (type variant element-type &rest scs) `(progn (define-vop (,(symbolicate "DATA-VECTOR-REF/" (string type)) @@ -109,6 +106,9 @@ (def-data-vector-frobs simple-base-string byte-index character character-reg) + #!+sb-unicode + (def-data-vector-frobs simple-character-string word-index + character character-reg) (def-data-vector-frobs simple-vector word-index * descriptor-reg any-reg) @@ -607,70 +607,55 @@ ;;; These VOPs are used for implementing float slots in structures (whose raw ;;; data is an unsigned-32 vector. -;;; (define-vop (raw-ref-single data-vector-ref/simple-array-single-float) (:translate %raw-ref-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-single data-vector-set/simple-array-single-float) (:translate %raw-set-single) (:arg-types sb!c::raw-vector positive-fixnum single-float)) -;;; (define-vop (raw-ref-double data-vector-ref/simple-array-double-float) (:translate %raw-ref-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-double data-vector-set/simple-array-double-float) (:translate %raw-set-double) (:arg-types sb!c::raw-vector positive-fixnum double-float)) -;;; #!+long-float (define-vop (raw-ref-long data-vector-ref/simple-array-long-float) (:translate %raw-ref-long) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; #!+long-float (define-vop (raw-set-double data-vector-set/simple-array-long-float) (:translate %raw-set-long) (:arg-types sb!c::raw-vector positive-fixnum long-float)) - (define-vop (raw-ref-complex-single data-vector-ref/simple-array-complex-single-float) (:translate %raw-ref-complex-single) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-single data-vector-set/simple-array-complex-single-float) (:translate %raw-set-complex-single) (:arg-types sb!c::raw-vector positive-fixnum complex-single-float)) -;;; (define-vop (raw-ref-complex-double data-vector-ref/simple-array-complex-double-float) (:translate %raw-ref-complex-double) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; (define-vop (raw-set-complex-double data-vector-set/simple-array-complex-double-float) (:translate %raw-set-complex-double) (:arg-types sb!c::raw-vector positive-fixnum complex-double-float)) -;;; #!+long-float (define-vop (raw-ref-complex-long data-vector-ref/simple-array-complex-long-float) (:translate %raw-ref-complex-long) (:arg-types sb!c::raw-vector positive-fixnum)) -;;; #!+long-float (define-vop (raw-set-complex-long data-vector-set/simple-array-complex-long-float) (:translate %raw-set-complex-long) (:arg-types sb!c::raw-vector positive-fixnum complex-long-float)) - ;;; These vops are useful for accessing the bits of a vector irrespective of ;;; what type of vector it is. -;;; - (define-vop (raw-bits word-index-ref) (:note "raw-bits VOP") (:translate %raw-bits) diff --git a/src/compiler/target-dump.lisp b/src/compiler/target-dump.lisp index 01eba24..26c7380 100644 --- a/src/compiler/target-dump.lisp +++ b/src/compiler/target-dump.lisp @@ -13,6 +13,22 @@ (in-package "SB!FASL") +;;; a helper function shared by DUMP-SIMPLE-CHARACTER-STRING and +;;; DUMP-SYMBOL (in the target compiler: the cross-compiler uses the +;;; portability knowledge and always dumps BASE-STRINGS). +#!+sb-unicode +(defun dump-characters-of-string (s fasl-output) + (declare (type string s) (type fasl-output fasl-output)) + (dovector (c s) + (dump-word (char-code c) fasl-output)) + (values)) +#!+sb-unicode +(defun dump-simple-character-string (s file) + (declare (type (simple-array character (*)) s)) + (dump-fop* (length s) fop-small-character-string fop-character-string file) + (dump-characters-of-string s file) + (values)) + ;;; Dump the first N bytes of VEC out to FILE. VEC is some sort of unboxed ;;; vector-like thing that we can BLT from. (defun dump-raw-bytes (vec n fasl-output) diff --git a/src/compiler/typetran.lisp b/src/compiler/typetran.lisp index c1b1908..89d1568 100644 --- a/src/compiler/typetran.lisp +++ b/src/compiler/typetran.lisp @@ -148,6 +148,9 @@ (define-source-transform atom (x) `(not (consp ,x))) +#!+sb-unicode +(define-source-transform base-char-p (x) + `(typep ,x 'base-char)) ;;;; TYPEP source transform diff --git a/src/compiler/x86/array.lisp b/src/compiler/x86/array.lisp index 5a56465..642a535 100644 --- a/src/compiler/x86/array.lisp +++ b/src/compiler/x86/array.lisp @@ -1239,6 +1239,79 @@ ;;; simple-string +#!+sb-unicode +(progn +(define-vop (data-vector-ref/simple-base-string) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types simple-base-string positive-fixnum) + (:results (value :scs (character-reg))) + (:result-types character) + (:generator 5 + (inst movzx value + (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-base-string) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-base-string (:constant (signed-byte 30))) + (:results (value :scs (character-reg))) + (:result-types character) + (:generator 4 + (inst movzx value + (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag))))) + +(define-vop (data-vector-set/simple-base-string) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (character-reg) :target eax)) + (:arg-types simple-base-string positive-fixnum character) + (:temporary (:sc character-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) + (:generator 5 + (move eax value) + (inst mov (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + al-tn) + (move result eax))) + +(define-vop (data-vector-set-c/simple-base-string) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (character-reg))) + (:info index) + (:arg-types simple-base-string (:constant (signed-byte 30)) character) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) + (:generator 4 + (move eax value) + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + al-tn) + (move result eax))) +) ; PROGN + +#!-sb-unicode +(progn (define-vop (data-vector-ref/simple-base-string) (:translate data-vector-ref) (:policy :fast-safe) @@ -1283,7 +1356,7 @@ value) (move result value))) -(define-vop (data-vector-set/simple-base-string-c) +(define-vop (data-vector-set-c/simple-base-string) (:translate data-vector-set) (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) @@ -1298,6 +1371,16 @@ other-pointer-lowtag)) value) (move result value))) +) ; PROGN + +#!+sb-unicode +(define-full-reffer data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-ref) +#!+sb-unicode +(define-full-setter data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-set) ;;; signed-byte-8 diff --git a/src/compiler/x86/char.lisp b/src/compiler/x86/char.lisp index 9b8d2c0..c0ee144 100644 --- a/src/compiler/x86/char.lisp +++ b/src/compiler/x86/char.lisp @@ -14,6 +14,17 @@ ;;;; moves and coercions ;;; Move a tagged char to an untagged representation. +#!+sb-unicode +(define-vop (move-to-character) + (:args (x :scs (any-reg descriptor-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (character-reg) + :load-if (not (location= x y)))) + (:note "character untagging") + (:generator 1 + (move y x) + (inst shr y n-widetag-bits))) +#!-sb-unicode (define-vop (move-to-character) (:args (x :scs (any-reg control-stack) :target al)) (:temporary (:sc byte-reg :offset al-offset @@ -27,9 +38,24 @@ (move eax-tn x) (move y ah))) (define-move-vop move-to-character :move - (any-reg control-stack) (character-reg character-stack)) + (any-reg #!-sb-unicode control-stack) + (character-reg #!-sb-unicode character-stack)) ;;; Move an untagged char to a tagged representation. +#!+sb-unicode +(define-vop (move-from-character) + (:args (x :scs (character-reg))) + (:results (y :scs (any-reg descriptor-reg))) + (:note "character tagging") + (:generator 1 + ;; FIXME: is this inefficient? Is there a better way of writing + ;; it? (fixnum tagging is done with LEA). We can't use SHL + ;; because we either scribble over the source register or briefly + ;; have a non-descriptor in a descriptor register, unless we + ;; introduce a temporary. + (inst imul y x (ash 1 n-widetag-bits)) + (inst or y character-widetag))) +#!-sb-unicode (define-vop (move-from-character) (:args (x :scs (character-reg character-stack) :target ah)) (:temporary (:sc byte-reg :offset al-offset :target y @@ -44,7 +70,8 @@ (inst and eax-tn #xffff) ; Remove any junk bits. (move y eax-tn))) (define-move-vop move-from-character :move - (character-reg character-stack) (any-reg descriptor-reg control-stack)) + (character-reg #!-sb-unicode character-stack) + (any-reg descriptor-reg #!-sb-unicode control-stack)) ;;; Move untagged character values. (define-vop (character-move) @@ -74,9 +101,14 @@ (character-reg (move y x)) (character-stack + #!-sb-unicode (inst mov (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4))) - x))))) + x) + #!+sb-unicode + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) (define-move-vop move-character-arg :move-arg (any-reg character-reg) (character-reg)) @@ -95,8 +127,22 @@ (:results (res :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 1 - (inst movzx res ch))) + #!-sb-unicode + (inst movzx res ch) + #!+sb-unicode + (inst mov res ch))) +#!+sb-unicode +(define-vop (code-char) + (:translate code-char) + (:policy :fast-safe) + (:args (code :scs (unsigned-reg unsigned-stack))) + (:arg-types positive-fixnum) + (:results (res :scs (character-reg))) + (:result-types character) + (:generator 1 + (inst mov res code))) +#!-sb-unicode (define-vop (code-char) (:translate code-char) (:policy :fast-safe) diff --git a/src/compiler/x86/vm.lisp b/src/compiler/x86/vm.lisp index 48fc7ad..377b196 100644 --- a/src/compiler/x86/vm.lisp +++ b/src/compiler/x86/vm.lisp @@ -229,7 +229,9 @@ ;; non-descriptor characters (character-reg registers - :locations #.*byte-regs* + :locations #!-sb-unicode #.*byte-regs* + #!+sb-unicode #.*dword-regs* + #!-sb-unicode #!-sb-unicode :reserve-locations (#.ah-offset #.al-offset) :constant-scs (immediate) :save-p t @@ -322,11 +324,13 @@ (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) (eval-when (:compile-toplevel :load-toplevel :execute) -(defparameter *byte-sc-names* '(character-reg byte-reg character-stack)) +(defparameter *byte-sc-names* + '(#!-sb-unicode character-reg byte-reg #!-sb-unicode character-stack)) (defparameter *word-sc-names* '(word-reg)) (defparameter *dword-sc-names* '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack - signed-stack unsigned-stack sap-stack single-stack constant)) + signed-stack unsigned-stack sap-stack single-stack + #!+sb-unicode character-reg #!+sb-unicode character-stack constant)) ;;; added by jrd. I guess the right thing to do is to treat floats ;;; as a separate size... ;;; diff --git a/version.lisp-expr b/version.lisp-expr index 9678c2e..b1795c0 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.16.24" +"0.8.16.25"