From: Nikodemus Siivola Date: Mon, 10 Mar 2008 18:49:17 +0000 (+0000) Subject: 1.0.15.17: better threaded BIND & UNBIND for x86-64 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=91249484164b74b9df8b65f8ecd1fe228a08276c;p=sbcl.git 1.0.15.17: better threaded BIND & UNBIND for x86-64 * x86-64 port of 1.0.15.7, plus some x86 cleanups: -- Rename ALLOCATE-TLS-INDEX-* to ALLOC-TLS-INDEX-* for more consistent asm routine names. -- Use (FIXNUMIZE 1) instead of magic number 4. Shrinks the threaded x86-64 core by ~200k bytes. --- diff --git a/NEWS b/NEWS index 42a22ac..87652e1 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,8 @@ changes in sbcl-1.0.16 relative to 1.0.15: * minor incompatible change: change PROBE-FILE back to returning NIL whenever we can't get a truename, as was the case before 1.0.14. + * optimization: binding special variables now generates smaller code + on threaded platforms. * optimization: MEMBER and ASSOC are over 50% faster for :TEST #'EQ and cases where no :TEST is given but the compiler can infer that the element to search is of type (OR FIXNUM (NOT NUMBER)). diff --git a/src/assembly/x86-64/alloc.lisp b/src/assembly/x86-64/alloc.lisp index a27364f..aa1fa7e 100644 --- a/src/assembly/x86-64/alloc.lisp +++ b/src/assembly/x86-64/alloc.lisp @@ -70,3 +70,58 @@ (def r13) (def r14) (def r15)) + +#+sb-assembling +(macrolet ((def (reg) + (declare (ignorable reg)) + #!+sb-thread + (let* ((name (intern (format nil "ALLOC-TLS-INDEX-IN-~A" reg))) + (target-offset (intern (format nil "~A-OFFSET" reg))) + (other-offset (if (eql 'rax reg) + 'rcx-offset + 'rax-offset))) + ;; Symbol starts in TARGET, where the TLS-INDEX ends up in. + `(define-assembly-routine ,name + ((:temp other descriptor-reg ,other-offset) + (:temp target descriptor-reg ,target-offset)) + (let ((get-tls-index-lock (gen-label)) + (release-tls-index-lock (gen-label))) + (pseudo-atomic + ;; Save OTHER & push the symbol. RAX is either one of the two. + (inst push other) + (inst push target) + (emit-label get-tls-index-lock) + (inst mov target 1) + (zeroize rax-tn) + (inst lock) + (inst cmpxchg (make-ea-for-symbol-value *tls-index-lock*) target) + (inst jmp :ne get-tls-index-lock) + ;; The symbol is now in OTHER. + (inst pop other) + ;; Now with the lock held, see if the symbol's tls index has been + ;; set in the meantime. + (loadw target other symbol-tls-index-slot other-pointer-lowtag) + (inst or target target) + (inst jmp :ne release-tls-index-lock) + ;; Allocate a new tls-index. + (load-symbol-value target *free-tls-index*) + (inst add (make-ea-for-symbol-value *free-tls-index*) (fixnumize 1)) + (storew target other symbol-tls-index-slot other-pointer-lowtag) + (emit-label release-tls-index-lock) + (store-symbol-value 0 *tls-index-lock*) + ;; Restore OTHER. + (inst pop other)) + (inst ret)))))) + (def rax) + (def rcx) + (def rdx) + (def rbx) + (def rsi) + (def rdi) + (def r8) + (def r9) + (def r10) + (def r12) + (def r13) + (def r14) + (def r15)) diff --git a/src/assembly/x86/alloc.lisp b/src/assembly/x86/alloc.lisp index d011d5d..e52052d 100644 --- a/src/assembly/x86/alloc.lisp +++ b/src/assembly/x86/alloc.lisp @@ -83,7 +83,7 @@ (macrolet ((def (reg) (declare (ignorable reg)) #!+sb-thread - (let* ((name (intern (format nil "ALLOCATE-TLS-INDEX-IN-~A" reg))) + (let* ((name (intern (format nil "ALLOC-TLS-INDEX-IN-~A" reg))) (target-offset (intern (format nil "~A-OFFSET" reg))) (other-offset (if (eql 'eax reg) 'ecx-offset @@ -113,7 +113,7 @@ (inst jmp :ne release-tls-index-lock) ;; Allocate a new tls-index. (load-symbol-value target *free-tls-index*) - (inst add (make-ea-for-symbol-value *free-tls-index*) 4) ; fixnum + 1 + (inst add (make-ea-for-symbol-value *free-tls-index*) (fixnumize 1)) (storew target other symbol-tls-index-slot other-pointer-lowtag) (emit-label release-tls-index-lock) (store-symbol-value 0 *tls-index-lock*) diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp index 3d85b6b..2f546a6 100644 --- a/src/compiler/x86-64/cell.lisp +++ b/src/compiler/x86-64/cell.lisp @@ -327,46 +327,37 @@ (define-vop (bind) (:args (val :scs (any-reg descriptor-reg)) (symbol :scs (descriptor-reg))) - (:temporary (:sc descriptor-reg :offset rax-offset) rax) (:temporary (:sc unsigned-reg) tls-index bsp) (:generator 10 - (let ((tls-index-valid (gen-label)) - (get-tls-index-lock (gen-label)) - (release-tls-index-lock (gen-label))) + (let ((tls-index-valid (gen-label))) (load-binding-stack-pointer bsp) (loadw tls-index symbol symbol-tls-index-slot other-pointer-lowtag) (inst add bsp (* binding-size n-word-bytes)) (store-binding-stack-pointer bsp) (inst or tls-index tls-index) (inst jmp :ne tls-index-valid) - - ;; FIXME: - ;; * We should ensure the existence of TLS index for LET-bound specials - ;; at compile/load time, and use write a FAST-BIND for use with those. - ;; * PROGV will need to do this, but even there this should not be inline. - ;; This is probably best moved to C, since dynbind.c also needs to do this. - (pseudo-atomic - (emit-label get-tls-index-lock) - (inst mov tls-index 1) - (zeroize rax) - (inst lock) - (inst cmpxchg (make-ea-for-symbol-value *tls-index-lock*) tls-index) - (inst jmp :ne get-tls-index-lock) - ;; now with the lock held, see if the symbol's tls index has - ;; been set in the meantime - (loadw tls-index symbol symbol-tls-index-slot other-pointer-lowtag) - (inst or tls-index tls-index) - (inst jmp :ne release-tls-index-lock) - ;; allocate a new tls-index - (load-symbol-value tls-index *free-tls-index*) - (inst add (make-ea-for-symbol-value *free-tls-index*) 8) ; fixnum + 1 - (storew tls-index symbol symbol-tls-index-slot other-pointer-lowtag) - (emit-label release-tls-index-lock) - (store-symbol-value 0 *tls-index-lock*)) - + (inst mov tls-index symbol) + (inst lea temp-reg-tn + (make-ea :qword :disp + (make-fixup (ecase (tn-offset tls-index) + (#.rax-offset 'alloc-tls-index-in-rax) + (#.rcx-offset 'alloc-tls-index-in-rcx) + (#.rdx-offset 'alloc-tls-index-in-rdx) + (#.rbx-offset 'alloc-tls-index-in-rbx) + (#.rsi-offset 'alloc-tls-index-in-rsi) + (#.rdi-offset 'alloc-tls-index-in-rdi) + (#.r8-offset 'alloc-tls-index-in-r8) + (#.r9-offset 'alloc-tls-index-in-r9) + (#.r10-offset 'alloc-tls-index-in-r10) + (#.r12-offset 'alloc-tls-index-in-r12) + (#.r13-offset 'alloc-tls-index-in-r13) + (#.r14-offset 'alloc-tls-index-in-r14) + (#.r15-offset 'alloc-tls-index-in-r15)) + :assembly-routine))) + (inst call temp-reg-tn) (emit-label tls-index-valid) - (inst mov rax (make-ea :qword :base thread-base-tn :scale 1 :index tls-index)) - (storew rax bsp (- binding-value-slot binding-size)) + (inst push (make-ea :qword :base thread-base-tn :scale 1 :index tls-index)) + (popw bsp (- binding-value-slot binding-size)) (storew symbol bsp (- binding-symbol-slot binding-size)) (inst mov (make-ea :qword :base thread-base-tn :scale 1 :index tls-index) val)))) @@ -385,20 +376,19 @@ (storew symbol bsp (- binding-symbol-slot binding-size)) (storew val symbol symbol-value-slot other-pointer-lowtag))) - #!+sb-thread (define-vop (unbind) - ;; four temporaries? - (:temporary (:sc unsigned-reg) symbol value bsp tls-index) + (:temporary (:sc unsigned-reg) temp bsp tls-index) (:generator 0 (load-binding-stack-pointer bsp) - (loadw symbol bsp (- binding-symbol-slot binding-size)) - (loadw value bsp (- binding-value-slot binding-size)) - - (loadw tls-index symbol symbol-tls-index-slot other-pointer-lowtag) + ;; Load SYMBOL from stack, and get the TLS-INDEX + (loadw temp bsp (- binding-symbol-slot binding-size)) + (loadw tls-index temp symbol-tls-index-slot other-pointer-lowtag) + ;; Load VALUE from stack, the restore it to the TLS area. + (loadw temp bsp (- binding-value-slot binding-size)) (inst mov (make-ea :qword :base thread-base-tn :scale 1 :index tls-index) - value) - + temp) + ;; Zero out the stack. (storew 0 bsp (- binding-symbol-slot binding-size)) (storew 0 bsp (- binding-value-slot binding-size)) (inst sub bsp (* binding-size n-word-bytes)) @@ -417,7 +407,6 @@ (inst sub bsp (* binding-size n-word-bytes)) (store-symbol-value bsp *binding-stack-pointer*))) - (define-vop (unbind-to-here) (:args (where :scs (descriptor-reg any-reg))) (:temporary (:sc unsigned-reg) symbol value bsp #!+sb-thread tls-index) diff --git a/src/compiler/x86/cell.lisp b/src/compiler/x86/cell.lisp index bf2a1ac..08862bf 100644 --- a/src/compiler/x86/cell.lisp +++ b/src/compiler/x86/cell.lisp @@ -323,12 +323,12 @@ (inst mov tls-index symbol) (inst call (make-fixup (ecase (tn-offset tls-index) - (#.eax-offset 'allocate-tls-index-in-eax) - (#.ebx-offset 'allocate-tls-index-in-ebx) - (#.ecx-offset 'allocate-tls-index-in-ecx) - (#.edx-offset 'allocate-tls-index-in-edx) - (#.edi-offset 'allocate-tls-index-in-edi) - (#.esi-offset 'allocate-tls-index-in-esi)) + (#.eax-offset 'alloc-tls-index-in-eax) + (#.ebx-offset 'alloc-tls-index-in-ebx) + (#.ecx-offset 'alloc-tls-index-in-ecx) + (#.edx-offset 'alloc-tls-index-in-edx) + (#.edi-offset 'alloc-tls-index-in-edi) + (#.esi-offset 'alloc-tls-index-in-esi)) :assembly-routine)) (emit-label tls-index-valid) (inst fs-segment-prefix) diff --git a/version.lisp-expr b/version.lisp-expr index 6d205eb..35ce4cf 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.15.16" +"1.0.15.17"