From cfc3b695e6452907fef6492710777511ac4af979 Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Sun, 17 May 2009 17:30:23 +0000 Subject: [PATCH] 1.0.28.55: transform FILL to a UB*-BASH-FILL when possible The performance boost for all cases which previously used VECTOR-FILL* is quite noticeable. Also delay the FILL transform if the vector element type is not yet known. ...also one leftover #+sb-xc-host from the previous commit. --- NEWS | 3 ++ package-data-list.lisp-expr | 2 + src/code/seq.lisp | 28 +++++----- src/compiler/array-tran.lisp | 7 ++- src/compiler/dump.lisp | 4 -- src/compiler/generic/vm-array.lisp | 28 ++++++++-- src/compiler/seqtran.lisp | 103 +++++++++++++++++++++++++++++------- tests/seq.impure.lisp | 3 +- version.lisp-expr | 2 +- 9 files changed, 133 insertions(+), 47 deletions(-) diff --git a/NEWS b/NEWS index 64cb027..e97f4a4 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,9 @@ the symbol, prohibits both lexical and dynamic binding. This is mainly an efficiency measure for threaded platforms, but also valueable in expressing intent. + * optimization: the compiler uses a specialized version of FILL when the + element type is know in more cases, making eg. (UNSIGNED-BYTE 8) case + almost 90% faster. * optimization: accesses to potentially non-simple arrays where element type is known are 50% faster. * optimization: compiler now generates faster array typechecking code. diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 0ca3618..81e1fd1 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -2530,6 +2530,8 @@ structure representations" "SAETP-N-BITS" "SAETP-TYPECODE" "SAETP-PRIMITIVE-TYPE-NAME" "SAETP-N-PAD-ELEMENTS" "SAETP-SPECIFIER" "SAETP-COMPLEX-TYPECODE" "SAETP-IMPORTANCE" + "SAETP-FIXNUM-P" + "VALID-BIT-BASH-SAETP-P" "*SPECIALIZED-ARRAY-ELEMENT-TYPE-PROPERTIES*" "SANCTIFY-FOR-EXECUTION" "SAP-POINTER-SLOT" "SAP-REG-SC-NUMBER" "SAP-SIZE" diff --git a/src/code/seq.lisp b/src/code/seq.lisp index c76c586..7cc9692 100644 --- a/src/code/seq.lisp +++ b/src/code/seq.lisp @@ -520,22 +520,18 @@ (end end) :force-inline t :check-fill-pointer t) - (macrolet ((frob () - `(locally (declare (optimize (safety 0) (speed 3))) - (do ((i start (1+ i))) - ((= i end) sequence) - (declare (index i)) - (setf (aref data i) item))))) - (etypecase data - #!+sb-unicode - ((simple-array character (*)) - (let ((item (locally (declare (optimize (safety 3))) - (the character item)))) - (frob))) - ((simple-array base-char (*)) - (let ((item (locally (declare (optimize (safety 3))) - (the base-char item)))) - (frob))))))) + ;; DEFTRANSFORM for FILL will turn these into + ;; calls to UB*-BASH-FILL. + (etypecase data + #!+sb-unicode + ((simple-array character (*)) + (let ((item (locally (declare (optimize (safety 3))) + (the character item)))) + (fill data item :start start :end end))) + ((simple-array base-char (*)) + (let ((item (locally (declare (optimize (safety 3))) + (the base-char item)))) + (fill data item :start start :end end)))))) (defun fill (sequence item &key (start 0) end) #!+sb-doc diff --git a/src/compiler/array-tran.lisp b/src/compiler/array-tran.lisp index c7481e2..387f53f 100644 --- a/src/compiler/array-tran.lisp +++ b/src/compiler/array-tran.lisp @@ -343,11 +343,14 @@ (not (eql default-initial-element (lvar-value initial-element))))) (let ((parameters (eliminate-keyword-args call 1 '((:element-type element-type) - (:initial-element initial-element))))) + (:initial-element initial-element)))) + (init (if (constant-lvar-p initial-element) + (lvar-value initial-element) + 'initial-element))) `(lambda (length ,@parameters) (declare (ignorable ,@parameters)) (truly-the ,result-spec - (fill ,alloc-form (the ,elt-spec initial-element)))))) + (fill ,alloc-form (the ,elt-spec ,init)))))) ;; just :ELEMENT-TYPE, or maybe with :INITIAL-ELEMENT EQL to the ;; default (t diff --git a/src/compiler/dump.lisp b/src/compiler/dump.lisp index 04f6be7..5f007a3 100644 --- a/src/compiler/dump.lisp +++ b/src/compiler/dump.lisp @@ -610,10 +610,6 @@ (dump-complex-double-float (realpart x) (imagpart x) file)) #!+long-float ((complex long-float) - ;; (There's no easy way to mix #!+LONG-FLOAT and #-SB-XC-HOST - ;; conditionalization at read time, so we do this SB-XC-HOST - ;; conditional at runtime instead.) - #+sb-xc-host (error "can't dump COMPLEX-LONG-FLOAT in cross-compiler") (dump-fop 'fop-complex-long-float file) (dump-long-float (realpart x) file) (dump-long-float (imagpart x) file)) diff --git a/src/compiler/generic/vm-array.lisp b/src/compiler/generic/vm-array.lisp index 394fbd5..f2a37c6 100644 --- a/src/compiler/generic/vm-array.lisp +++ b/src/compiler/generic/vm-array.lisp @@ -21,7 +21,7 @@ initial-element-default n-bits primitive-type-name - &key (n-pad-elements 0) complex-typecode (importance 0) + &key (n-pad-elements 0) complex-typecode (importance 0) fixnum-p &aux (typecode (symbol-value (symbolicate primitive-type-name "-WIDETAG"))))) (:copier nil)) @@ -30,6 +30,8 @@ ;; the element type, e.g. # or ;; # (ctype nil :type (or ctype null)) + ;; true if the elements are tagged fixnums + (fixnum-p nil :type boolean :read-only t) ;; what we get when the low-level vector-creation logic zeroes all ;; the bits (which also serves as the default value of MAKE-ARRAY's ;; :INITIAL-ELEMENT keyword) @@ -115,14 +117,16 @@ :importance 12) #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) ((unsigned-byte 29) 0 32 simple-array-unsigned-byte-29 - :importance 8) + :importance 8 + :fixnum-p t) ((unsigned-byte 31) 0 32 simple-array-unsigned-byte-31 :importance 11) ((unsigned-byte 32) 0 32 simple-array-unsigned-byte-32 :importance 11) #!+#.(cl:if (cl:= 64 sb!vm:n-word-bits) '(and) '(or)) ((unsigned-byte 60) 0 64 simple-array-unsigned-byte-60 - :importance 8) + :importance 8 + :fixnum-p t) #!+#.(cl:if (cl:= 64 sb!vm:n-word-bits) '(and) '(or)) ((unsigned-byte 63) 0 64 simple-array-unsigned-byte-63 :importance 9) @@ -138,13 +142,15 @@ ;; not (SIGNED-BYTE 30) #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) (fixnum 0 32 simple-array-signed-byte-30 - :importance 8) + :importance 8 + :fixnum-p t) ((signed-byte 32) 0 32 simple-array-signed-byte-32 :importance 7) ;; KLUDGE: see above KLUDGE for the 32-bit case #!+#.(cl:if (cl:= 64 sb!vm:n-word-bits) '(and) '(or)) (fixnum 0 64 simple-array-signed-byte-61 - :importance 8) + :importance 8 + :fixnum-p t) #!+#.(cl:if (cl:= 64 sb!vm:n-word-bits) '(and) '(or)) ((signed-byte 64) 0 64 simple-array-signed-byte-64 :importance 7) @@ -160,6 +166,18 @@ :importance 1) (t 0 #.sb!vm:n-word-bits simple-vector :importance 18)))) +(defun valid-bit-bash-saetp-p (saetp) + ;; BIT-BASHing isn't allowed on simple vectors that contain pointers + (and (not (eq t (sb!vm:saetp-specifier saetp))) + ;; Disallowing (VECTOR NIL) also means that we won't transform + ;; sequence functions into bit-bashing code and we let the + ;; generic sequence functions signal errors if necessary. + (not (zerop (sb!vm:saetp-n-bits saetp))) + ;; Due to limitations with the current BIT-BASHing code, we can't + ;; BIT-BASH reliably on arrays whose element types are larger + ;; than the word size. + (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits))) + (defvar sb!kernel::*specialized-array-element-types* (map 'list #'saetp-specifier diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index 1eb4d18..3c25cc4 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -516,9 +516,90 @@ (vector t &key (:start t) (:end t)) * :node node) - (let ((type (lvar-type seq)) - (element-type (type-specifier (extract-upgraded-element-type seq)))) - (cond ((and (neq '* element-type) (policy node (> speed space))) + (let* ((element-ctype (extract-upgraded-element-type seq)) + (element-type (type-specifier element-ctype)) + (type (lvar-type seq)) + (saetp (unless (eq *wild-type* element-ctype) + (find-saetp-by-ctype element-ctype)))) + (cond ((eq *wild-type* element-ctype) + (delay-ir1-transform node :constraint) + `(vector-fill* seq item start end)) + ((and saetp (sb!vm::valid-bit-bash-saetp-p saetp)) + (let* ((n-bits (sb!vm:saetp-n-bits saetp)) + (basher-name (format nil "UB~D-BASH-FILL" n-bits)) + (basher (or (find-symbol basher-name + (load-time-value (find-package :sb!kernel))) + (abort-ir1-transform + "Unknown fill basher, please report to sbcl-devel: ~A" + basher-name))) + (kind (cond ((sb!vm:saetp-fixnum-p saetp) :tagged) + ((member element-type '(character base-char)) :char) + ((eq element-type 'single-float) :single-float) + ((eq element-type 'double-float) :double-float) + (t :bits))) + ;; BASH-VALUE is a word that we can repeatedly smash + ;; on the array: for less-than-word sized elements it + ;; contains multiple copies of the fill item. + (bash-value + (if (constant-lvar-p item) + (let ((tmp (lvar-value item))) + (unless (ctypep tmp element-ctype) + (abort-ir1-transform "~S is not ~S" tmp element-type)) + (let* ((bits + (ldb (byte n-bits 0) + (ecase kind + (:tagged + (ash tmp sb!vm:n-fixnum-tag-bits)) + (:char + (char-code tmp)) + (:bits + tmp) + (:single-float + (single-float-bits tmp)) + (:double-float + (logior (ash (double-float-high-bits tmp) 32) + (double-float-low-bits tmp)))))) + (res bits)) + (loop for i of-type sb!vm:word from n-bits by n-bits + until (= i sb!vm:n-word-bits) + do (setf res (ldb (byte sb!vm:n-word-bits 0) + (logior res (ash bits i))))) + res)) + `(let* ((bits (ldb (byte ,n-bits 0) + ,(ecase kind + (:tagged + `(ash item ,sb!vm:n-fixnum-tag-bits)) + (:char + `(char-code item)) + (:bits + `item) + (:single-float + `(single-float-bits item)) + (:double-float + `(logior (ash (double-float-high-bits item) 32) + (double-float-low-bits item)))))) + (res bits)) + (declare (type sb!vm:word res)) + ,@(unless (= sb!vm:n-word-bits n-bits) + `((loop for i of-type sb!vm:word from ,n-bits by ,n-bits + until (= i sb!vm:n-word-bits) + do (setf res + (ldb (byte ,sb!vm:n-word-bits 0) + (logior res (ash bits (truly-the (integer 0 ,(- sb!vm:n-word-bits n-bits)) i)))))))) + res)))) + (values + `(with-array-data ((data seq) + (start start) + (end end) + :check-fill-pointer t) + (declare (type (simple-array ,element-type 1) data)) + (declare (type index start end)) + (declare (optimize (safety 0) (speed 3)) + (muffle-conditions compiler-note)) + (,basher ,bash-value data start (- end start)) + seq) + `((declare (type ,element-type item)))))) + ((policy node (> speed space)) (values `(with-array-data ((data seq) (start start) @@ -625,20 +706,6 @@ (def!constant vector-data-bit-offset (* sb!vm:vector-data-offset sb!vm:n-word-bits)) -(eval-when (:compile-toplevel) -(defun valid-bit-bash-saetp-p (saetp) - ;; BIT-BASHing isn't allowed on simple vectors that contain pointers - (and (not (eq t (sb!vm:saetp-specifier saetp))) - ;; Disallowing (VECTOR NIL) also means that we won't transform - ;; sequence functions into bit-bashing code and we let the - ;; generic sequence functions signal errors if necessary. - (not (zerop (sb!vm:saetp-n-bits saetp))) - ;; Due to limitations with the current BIT-BASHing code, we can't - ;; BIT-BASH reliably on arrays whose element types are larger - ;; than the word size. - (<= (sb!vm:saetp-n-bits saetp) sb!vm:n-word-bits))) -) ; EVAL-WHEN - ;;; FIXME: In the copy loops below, we code the loops in a strange ;;; fashion: ;;; @@ -694,7 +761,7 @@ (unless (<= 0 start2 end2 len2) (sequence-bounding-indices-bad-error seq2 start2 end2)))) ,',(cond - ((and saetp (valid-bit-bash-saetp-p saetp)) + ((and saetp (sb!vm:valid-bit-bash-saetp-p saetp)) (let* ((n-element-bits (sb!vm:saetp-n-bits saetp)) (bash-function (intern (format nil "UB~D-BASH-COPY" n-element-bits) diff --git a/tests/seq.impure.lisp b/tests/seq.impure.lisp index a575352..ddc416d 100644 --- a/tests/seq.impure.lisp +++ b/tests/seq.impure.lisp @@ -957,7 +957,8 @@ standard bashed) ;; fill vectors ;; a) the standard slow way - (fill standard c :start offset :end (+ offset n)) + (locally (declare (notinline fill)) + (fill standard c :start offset :end (+ offset n))) ;; b) the blazingly fast way (let ((value (loop for i from 0 by bitsize until (= i sb-vm:n-word-bits) diff --git a/version.lisp-expr b/version.lisp-expr index 3a4c649..d2a8cb7 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.28.54" +"1.0.28.55" -- 1.7.10.4