From 43b1750ede8767928788b158399d3c5d2910855a Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Wed, 11 Apr 2007 16:35:27 +0000 Subject: [PATCH] 1.0.4.63: Don't zeroize dynamic-extent simple-unboxed-arrays on x86 and x86-64 --- NEWS | 2 ++ OPTIMIZATIONS | 11 +++++---- src/compiler/x86-64/alloc.lisp | 43 ++++++++++++++++++++--------------- src/compiler/x86/alloc.lisp | 49 ++++++++++++++++++++++------------------ version.lisp-expr | 2 +- 5 files changed, 62 insertions(+), 45 deletions(-) diff --git a/NEWS b/NEWS index 6f4a064..617c1dd 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,8 @@ changes in sbcl-1.0.5 relative to sbcl-1.0.4: variants no longer cons. * optimization: Direct calls to CHAR-{EQUAL,LESSP,GREATERP} and their NOT- variants no longer cons. + * optimization: Stack allocation of arrays containing unboxed + elements is slightly more efficient on x86 and x86-64. * enhancement: XREF information is now collected to references made to global variables using SYMBOL-VALUE and a constant argument. * enhancement: SIGINT now causes a specific condition diff --git a/OPTIMIZATIONS b/OPTIMIZATIONS index d234ce3..623e433 100644 --- a/OPTIMIZATIONS +++ b/OPTIMIZATIONS @@ -220,10 +220,13 @@ SBCL cannot derive upper bound for I and uses generic arithmetic here: should know the connection between an NLE and its CLEANUP.) -------------------------------------------------------------------------------- #27 -Initialization of stack-allocated arrays is inefficient: we always -fill the vector with zeroes, even when it is not needed (as for -platforms with conservative GC or for arrays of unboxed objectes) and -is performed later explicitely. +(We always zeroize stack-allocated arrays of boxed elements. The +previous note here suggested that we could avoid that step on +platforms with conservative GC; it's not clear to me (NJF) that +doing so is a wise idea.) + +x86 and x86-64 do not zeroize stack-allocated arrays of unboxed +elements; other platforms could copy what they do. -------------------------------------------------------------------------------- #28 a. Accessing raw slots in structure instances is more inefficient than diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp index 76bad49..707a459 100644 --- a/src/compiler/x86-64/alloc.lisp +++ b/src/compiler/x86-64/alloc.lisp @@ -96,7 +96,7 @@ (storew length result vector-length-slot other-pointer-lowtag)))) (define-vop (allocate-vector-on-stack) - (:args (type :scs (unsigned-reg)) + (:args (type :scs (unsigned-reg immediate)) (length :scs (any-reg)) (words :scs (any-reg) :target ecx)) (:temporary (:sc any-reg :offset ecx-offset :from (:argument 2)) ecx) @@ -110,23 +110,30 @@ (:policy :fast-safe) (:node-var node) (:generator 100 - (inst lea result (make-ea :byte :base words :disp - (+ (1- (ash 1 n-lowtag-bits)) - (* vector-data-offset n-word-bytes)))) - (inst and result (lognot lowtag-mask)) - ;; FIXME: It would be good to check for stack overflow here. - (move ecx words) - (inst shr ecx n-fixnum-tag-bits) - (allocation result result node t) - (inst cld) - (inst lea res - (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) - (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) - (storew type result 0 other-pointer-lowtag) - (storew length result vector-length-slot other-pointer-lowtag) - (zeroize zero) - (inst rep) - (inst stos zero))) + (when (sc-is type immediate) + (aver (typep (tn-value type) '(unsigned-byte 8)))) + (let ((unboxed-elements-p (and (sc-is type immediate) + (/= (tn-value type) + simple-vector-widetag)))) + (inst lea result (make-ea :byte :base words :disp + (+ (1- (ash 1 n-lowtag-bits)) + (* vector-data-offset n-word-bytes)))) + (inst and result (lognot lowtag-mask)) + ;; FIXME: It would be good to check for stack overflow here. + (move ecx words) + (inst shr ecx n-fixnum-tag-bits) + (allocation result result node t) + (unless unboxed-elements-p + (inst cld)) + (inst lea res + (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) + (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) + (storew type result 0 other-pointer-lowtag) + (storew length result vector-length-slot other-pointer-lowtag) + (unless unboxed-elements-p + (zeroize zero) + (inst rep) + (inst stos zero))))) (in-package "SB!C") diff --git a/src/compiler/x86/alloc.lisp b/src/compiler/x86/alloc.lisp index 8bc9c86..e7fc391 100644 --- a/src/compiler/x86/alloc.lisp +++ b/src/compiler/x86/alloc.lisp @@ -134,28 +134,33 @@ (:policy :fast-safe) (:node-var node) (:generator 100 - (inst lea result (make-ea :byte :base words :disp - (+ (1- (ash 1 n-lowtag-bits)) - (* vector-data-offset n-word-bytes)))) - (inst and result (lognot lowtag-mask)) - ;; FIXME: It would be good to check for stack overflow here. - (move ecx words) - (inst shr ecx n-fixnum-tag-bits) - (allocation result result node t) - (inst cld) - (inst lea res - (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) - (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) - (sc-case type - (immediate - (aver (typep (tn-value type) '(unsigned-byte 8))) - (storeb (tn-value type) result 0 other-pointer-lowtag)) - (t - (storew type result 0 other-pointer-lowtag))) - (storew length result vector-length-slot other-pointer-lowtag) - (inst xor zero zero) - (inst rep) - (inst stos zero))) + (let ((unboxed-elements-p (and (sc-is type immediate) + (/= (tn-value type) + simple-vector-widetag)))) + (inst lea result (make-ea :byte :base words :disp + (+ (1- (ash 1 n-lowtag-bits)) + (* vector-data-offset n-word-bytes)))) + (inst and result (lognot lowtag-mask)) + ;; FIXME: It would be good to check for stack overflow here. + (move ecx words) + (inst shr ecx n-fixnum-tag-bits) + (allocation result result node t) + (unless unboxed-elements-p + (inst cld)) + (inst lea res + (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) + (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) + (sc-case type + (immediate + (aver (typep (tn-value type) '(unsigned-byte 8))) + (storeb (tn-value type) result 0 other-pointer-lowtag)) + (t + (storew type result 0 other-pointer-lowtag))) + (storew length result vector-length-slot other-pointer-lowtag) + (unless unboxed-elements-p + (inst xor zero zero) + (inst rep) + (inst stos zero))))) (in-package "SB!C") diff --git a/version.lisp-expr b/version.lisp-expr index 863b227..ff028c1 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.4.62" +"1.0.4.63" -- 1.7.10.4