From: Nikodemus Siivola Date: Sat, 19 Jul 2008 16:07:52 +0000 (+0000) Subject: 1.0.18.25: tweak stack allocation on x86 and x86-64 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=1fc851cde8352f4f3c1062ee46593e66a6284c60;p=sbcl.git 1.0.18.25: tweak stack allocation on x86 and x86-64 * Use MAYBE-PSEUDO-ATOMIC in the LIST-OR-LIST* VOP: stack allocation doesn't need PA. * When using STACK-ALLOCATE-P parameter with ALLOCATION, also pass in the lowtag. This allows us to generate LEA REG [STACK_REG+LOWTAG] instead of MOV REG STACK_REG LEA REG [REG+LOWTAG] for stack allocation & tagging. On x86-64 can use the same trick in the inline path for heap allocation as well. --- diff --git a/NEWS b/NEWS index a0ee21d..f2a3ae6 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ ;;;; -*- coding: utf-8; -*- changes in sbcl-1.0.19 relative to 1.0.18: + * optimization: stack allocation is slightly more efficient on x86 + and x86-64. * bug fix: compiler no longer makes erronous assumptions in the presense of non-foldable SATISFIES types. * bug fix: stack analysis missed cleanups of dynamic-extent diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp index 8a3553c..e0c5211 100644 --- a/src/compiler/x86-64/alloc.lisp +++ b/src/compiler/x86-64/alloc.lisp @@ -44,13 +44,12 @@ (move temp ,tn) temp)))) (storew reg ,list ,slot list-pointer-lowtag)))) - (let ((cons-cells (if star (1- num) num))) - (pseudo-atomic + (let ((cons-cells (if star (1- num) num)) + (stack-allocate-p (awhen (sb!c::node-lvar node) + (sb!c::lvar-dynamic-extent it)))) + (maybe-pseudo-atomic stack-allocate-p (allocation res (* (pad-data-block cons-size) cons-cells) node - (awhen (sb!c::node-lvar node) - (sb!c::lvar-dynamic-extent it))) - (inst lea res - (make-ea :byte :base res :disp list-pointer-lowtag)) + stack-allocate-p list-pointer-lowtag) (move ptr res) (dotimes (i (1- cons-cells)) (store-car (tn-ref-tn things) ptr) @@ -119,11 +118,11 @@ ;; FIXME: It would be good to check for stack overflow here. (move ecx words) (inst shr ecx n-fixnum-tag-bits) - (allocation result result node t) + (allocation result result node t other-pointer-lowtag) (inst cld) (inst lea res - (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) - (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) + (make-ea :byte :base result :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))) (storew type result 0 other-pointer-lowtag) (storew length result vector-length-slot other-pointer-lowtag) (zeroize zero) @@ -215,9 +214,8 @@ (:generator 10 (maybe-pseudo-atomic stack-allocate-p (let ((size (+ length closure-info-offset))) - (allocation result (pad-data-block size) node stack-allocate-p) - (inst lea result - (make-ea :byte :base result :disp fun-pointer-lowtag)) + (allocation result (pad-data-block size) node stack-allocate-p + fun-pointer-lowtag) (storew (logior (ash (1- size) n-widetag-bits) closure-header-widetag) result 0 fun-pointer-lowtag)) (loadw temp function closure-fun-slot fun-pointer-lowtag) @@ -256,8 +254,7 @@ (:node-var node) (:generator 50 (maybe-pseudo-atomic stack-allocate-p - (allocation result (pad-data-block words) node stack-allocate-p) - (inst lea result (make-ea :byte :base result :disp lowtag)) + (allocation result (pad-data-block words) node stack-allocate-p lowtag) (when type (storew (logior (ash (1- words) n-widetag-bits) type) result diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp index 883be90..8dfada6 100644 --- a/src/compiler/x86-64/call.lisp +++ b/src/compiler/x86-64/call.lisp @@ -1298,8 +1298,7 @@ (inst jrcxz done) (inst lea dst (make-ea :qword :base rcx :index rcx)) (maybe-pseudo-atomic stack-allocate-p - (allocation dst dst node stack-allocate-p) - (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) + (allocation dst dst node stack-allocate-p list-pointer-lowtag) (inst shr rcx (1- n-lowtag-bits)) ;; Set decrement mode (successive args at lower addresses) (inst std) diff --git a/src/compiler/x86-64/macros.lisp b/src/compiler/x86-64/macros.lisp index 9857d15..3323512 100644 --- a/src/compiler/x86-64/macros.lisp +++ b/src/compiler/x86-64/macros.lisp @@ -138,30 +138,31 @@ ;;; node-var then it is used to make an appropriate speed vs size ;;; decision. -(defun allocation-dynamic-extent (alloc-tn size) +(defun allocation-dynamic-extent (alloc-tn size lowtag) (inst sub rsp-tn size) ;; see comment in x86/macros.lisp implementation of this (inst and rsp-tn #.(lognot lowtag-mask)) (aver (not (location= alloc-tn rsp-tn))) - (inst mov alloc-tn rsp-tn) + (inst lea alloc-tn (make-ea :byte :base rsp-tn :disp lowtag)) (values)) ;;; This macro should only be used inside a pseudo-atomic section, ;;; which should also cover subsequent initialization of the ;;; object. -(defun allocation-tramp (alloc-tn size &optional ignored) - (declare (ignore ignored)) +(defun allocation-tramp (alloc-tn size lowtag) (inst push size) (inst lea temp-reg-tn (make-ea :qword :disp (make-fixup "alloc_tramp" :foreign))) (inst call temp-reg-tn) (inst pop alloc-tn) + (when lowtag + (inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag))) (values)) -(defun allocation (alloc-tn size &optional ignored dynamic-extent) +(defun allocation (alloc-tn size &optional ignored dynamic-extent lowtag) (declare (ignore ignored)) (when dynamic-extent - (allocation-dynamic-extent alloc-tn size) + (allocation-dynamic-extent alloc-tn size lowtag) (return-from allocation (values))) (let ((NOT-INLINE (gen-label)) (DONE (gen-label)) @@ -188,7 +189,7 @@ :scale 1 :disp (make-fixup "boxed_region" :foreign 8)))) (cond (in-elsewhere - (allocation-tramp alloc-tn size)) + (allocation-tramp alloc-tn size lowtag)) (t (inst mov temp-reg-tn free-pointer) (if (tn-p size) @@ -201,17 +202,19 @@ (inst cmp end-addr alloc-tn) (inst jmp :be NOT-INLINE) (inst mov free-pointer alloc-tn) - (inst mov alloc-tn temp-reg-tn) + (if lowtag + (inst lea alloc-tn (make-ea :byte :base temp-reg-tn :disp lowtag)) + (inst mov alloc-tn temp-reg-tn)) (emit-label DONE) (assemble (*elsewhere*) (emit-label NOT-INLINE) (cond ((numberp size) - (allocation-tramp alloc-tn size)) + (allocation-tramp alloc-tn size lowtag)) (t (inst sub alloc-tn free-pointer) - (allocation-tramp alloc-tn alloc-tn))) - (inst jmp DONE)) - (values))))) + (allocation-tramp alloc-tn alloc-tn lowtag))) + (inst jmp DONE)))) + (values))) ;;; Allocate an other-pointer object of fixed SIZE with a single word ;;; header having the specified WIDETAG value. The result is placed in @@ -222,11 +225,10 @@ (bug "empty &body in WITH-FIXED-ALLOCATION")) (once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p)) `(maybe-pseudo-atomic ,stack-allocate-p - (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p) + (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p + other-pointer-lowtag) (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) - ,result-tn) - (inst lea ,result-tn - (make-ea :qword :base ,result-tn :disp other-pointer-lowtag)) + ,result-tn 0 other-pointer-lowtag) ,@forms))) ;;;; error code diff --git a/src/compiler/x86/alloc.lisp b/src/compiler/x86/alloc.lisp index 23d30a6..67ea2a2 100644 --- a/src/compiler/x86/alloc.lisp +++ b/src/compiler/x86/alloc.lisp @@ -44,12 +44,12 @@ (move temp ,tn) temp)))) (storew reg ,list ,slot list-pointer-lowtag)))) - (let ((cons-cells (if star (1- num) num))) - (pseudo-atomic + (let ((cons-cells (if star (1- num) num)) + (stack-allocate-p (awhen (sb!c::node-lvar node) + (sb!c::lvar-dynamic-extent it)))) + (maybe-pseudo-atomic stack-allocate-p (allocation res (* (pad-data-block cons-size) cons-cells) node - (awhen (sb!c::node-lvar node) (sb!c::lvar-dynamic-extent it))) - (inst lea res - (make-ea :byte :base res :disp list-pointer-lowtag)) + stack-allocate-p list-pointer-lowtag) (move ptr res) (dotimes (i (1- cons-cells)) (store-car (tn-ref-tn things) ptr) @@ -143,11 +143,11 @@ ;; FIXME: It would be good to check for stack overflow here. (move ecx words) (inst shr ecx n-fixnum-tag-bits) - (allocation result result node t) + (allocation result result node t other-pointer-lowtag) (inst cld) (inst lea res - (make-ea :byte :base result :disp (* vector-data-offset n-word-bytes))) - (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) + (make-ea :byte :base result :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))) (sc-case type (immediate (aver (typep (tn-value type) '(unsigned-byte 8))) @@ -245,9 +245,8 @@ (maybe-pseudo-atomic stack-allocate-p (let ((size (+ length closure-info-offset))) (allocation result (pad-data-block size) node - stack-allocate-p) - (inst lea result - (make-ea :byte :base result :disp fun-pointer-lowtag)) + stack-allocate-p + fun-pointer-lowtag) (storew (logior (ash (1- size) n-widetag-bits) closure-header-widetag) result 0 fun-pointer-lowtag)) (loadw temp function closure-fun-slot fun-pointer-lowtag) @@ -306,8 +305,7 @@ (aver (null type)) (inst call (make-fixup dst :assembly-routine))) (maybe-pseudo-atomic stack-allocate-p - (allocation result (pad-data-block words) node stack-allocate-p) - (inst lea result (make-ea :byte :base result :disp lowtag)) + (allocation result (pad-data-block words) node stack-allocate-p lowtag) (when type (storew (logior (ash (1- words) n-widetag-bits) type) result diff --git a/src/compiler/x86/call.lisp b/src/compiler/x86/call.lisp index 1b7d900..3e4d057 100644 --- a/src/compiler/x86/call.lisp +++ b/src/compiler/x86/call.lisp @@ -1354,8 +1354,7 @@ (inst jecxz done) (inst lea dst (make-ea :dword :base ecx :index ecx)) (maybe-pseudo-atomic stack-allocate-p - (allocation dst dst node stack-allocate-p) - (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) + (allocation dst dst node stack-allocate-p list-pointer-lowtag) (inst shr ecx 2) ;; Set decrement mode (successive args at lower addresses) (inst std) diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index 5489a36..4c1a916 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -177,7 +177,7 @@ ;;; the duration. Now we have pseudoatomic there's no need for that ;;; overhead. -(defun allocation-dynamic-extent (alloc-tn size) +(defun allocation-dynamic-extent (alloc-tn size lowtag) (inst sub esp-tn size) ;; FIXME: SIZE _should_ be double-word aligned (suggested but ;; unfortunately not enforced by PAD-DATA-BLOCK and @@ -187,7 +187,7 @@ ;; 2004-03-30 (inst and esp-tn (lognot lowtag-mask)) (aver (not (location= alloc-tn esp-tn))) - (inst mov alloc-tn esp-tn) + (inst lea alloc-tn (make-ea :byte :base esp-tn :disp lowtag)) (values)) (defun allocation-notinline (alloc-tn size) @@ -269,12 +269,16 @@ ;;; (FIXME: so why aren't we asserting this?) -(defun allocation (alloc-tn size &optional inline dynamic-extent) +(defun allocation (alloc-tn size &optional inline dynamic-extent lowtag) (cond - (dynamic-extent (allocation-dynamic-extent alloc-tn size)) + (dynamic-extent + (allocation-dynamic-extent alloc-tn size lowtag)) ((or (null inline) (policy inline (>= speed space))) (allocation-inline alloc-tn size)) - (t (allocation-notinline alloc-tn size))) + (t + (allocation-notinline alloc-tn size))) + (when (and lowtag (not dynamic-extent)) + (inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag))) (values)) ;;; Allocate an other-pointer object of fixed SIZE with a single word @@ -286,12 +290,11 @@ (bug "empty &body in WITH-FIXED-ALLOCATION")) (once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p)) `(maybe-pseudo-atomic ,stack-allocate-p - (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p) - (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) - ,result-tn) - (inst lea ,result-tn - (make-ea :byte :base ,result-tn :disp other-pointer-lowtag)) - ,@forms))) + (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p + other-pointer-lowtag) + (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) + ,result-tn 0 other-pointer-lowtag) + ,@forms))) ;;;; error code (defun emit-error-break (vop kind code values) diff --git a/version.lisp-expr b/version.lisp-expr index 0ef7ba1..50025b5 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.18.24" +"1.0.18.25"