X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Fmacros.lisp;h=48224fbe3dfe1bd0b6199e523d19b585a5b24817;hb=942e45e3bb73fd55786e4a0ab4590324063c0c89;hp=b0f685f87a70e9ce73d1245b5c143ce08a94f1a8;hpb=939275c1bc2f18ef93cd1dd4ab35a18f6008cfd9;p=sbcl.git diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index b0f685f..48224fb 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -108,6 +108,25 @@ (declare (ignore temp)) `(store-symbol-value ,reg ,symbol)) +(defmacro load-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov ,reg (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)))) + #!-sb-thread + `(load-symbol-value ,reg *binding-stack-pointer*)) + +(defmacro store-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)) + ,reg)) + #!-sb-thread + `(store-symbol-value ,reg *binding-stack-pointer*)) + (defmacro load-type (target source &optional (offset 0)) #!+sb-doc "Loads the type bits of a pointer into target independent of @@ -125,19 +144,17 @@ ;;;; allocation helpers -;;; All allocation is done by calls to assembler routines that -;;; eventually invoke the C alloc() function. Once upon a time -;;; (before threads) allocation within an alloc_region could also be -;;; done inline, with the aid of two C symbols storing the current -;;; allocation region boundaries; however, C symbols are global. +;;; Allocation within alloc_region (which is thread local) can be done +;;; inline. If the alloc_region is overflown allocation is done by +;;; calling the C alloc() function. ;;; C calls for allocation don't /seem/ to make an awful lot of -;;; difference to speed. Guessing from historical context, it looks -;;; like inline allocation was introduced before pseudo-atomic, at -;;; which time all calls to alloc() would have needed a syscall to -;;; mask signals for the duration. Now we have pseudoatomic there's -;;; no need for that overhead. Still, inline alloc would be a neat -;;; addition someday (except see below). +;;; difference to speed. On pure consing it's about a 25% +;;; gain. Guessing from historical context, it looks like inline +;;; allocation was introduced before pseudo-atomic, at which time all +;;; calls to alloc() would have needed a syscall to mask signals for +;;; the duration. Now we have pseudoatomic there's no need for that +;;; overhead. (defun allocation-dynamic-extent (alloc-tn size) (inst sub esp-tn size) @@ -175,6 +192,7 @@ (defun allocation-inline (alloc-tn size) (let ((ok (gen-label)) + (done (gen-label)) (free-pointer (make-ea :dword :disp #!+sb-thread (* n-word-bytes thread-alloc-region-slot) @@ -191,7 +209,7 @@ (inst add alloc-tn free-pointer) #!+sb-thread (inst fs-segment-prefix) (inst cmp alloc-tn end-addr) - (inst jmp :be OK) + (inst jmp :be ok) (let ((dst (ecase (tn-offset alloc-tn) (#.eax-offset "alloc_overflow_eax") (#.ecx-offset "alloc_overflow_ecx") @@ -200,9 +218,23 @@ (#.esi-offset "alloc_overflow_esi") (#.edi-offset "alloc_overflow_edi")))) (inst call (make-fixup dst :foreign))) + (inst jmp-short done) (emit-label ok) - #!+sb-thread (inst fs-segment-prefix) - (inst xchg free-pointer alloc-tn)) + ;; Swap ALLOC-TN and FREE-POINTER + (cond ((and (tn-p size) (location= alloc-tn size)) + ;; XCHG is extremely slow, use the xor swap trick + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer) + #!+sb-thread (inst fs-segment-prefix) + (inst xor free-pointer alloc-tn) + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer)) + (t + ;; It's easier if SIZE is still available. + #!+sb-thread (inst fs-segment-prefix) + (inst mov free-pointer alloc-tn) + (inst sub alloc-tn size))) + (emit-label done)) (values)) @@ -219,13 +251,7 @@ (defun allocation (alloc-tn size &optional inline dynamic-extent) (cond (dynamic-extent (allocation-dynamic-extent alloc-tn size)) - ;; FIXME: for reasons unknown, inline allocation is a speed win on - ;; non-P4s, and a speed loss on P4s (and probably other such - ;; high-spec high-cache machines). :INLINE-ALLOCATION-IS-GOOD is - ;; a bit of a KLUDGE, really. -- CSR, 2004-08-05 (following - ;; observations made by ASF and Juho Snellman) - ((and (member :inline-allocation-is-good *backend-subfeatures*) - (or (null inline) (policy inline (>= speed space)))) + ((or (null inline) (policy inline (>= speed space))) (allocation-inline alloc-tn size)) (t (allocation-notinline alloc-tn size))) (values)) @@ -317,9 +343,6 @@ (with-unique-names (label) `(let ((,label (gen-label))) (inst fs-segment-prefix) - (inst mov (make-ea :byte - :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) - (inst fs-segment-prefix) (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) (fixnumize 1)) ,@forms @@ -343,14 +366,6 @@ ;; byte) (inst mov (make-ea :byte :disp (+ nil-value (static-symbol-offset - '*pseudo-atomic-interrupted*) - (ash symbol-value-slot word-shift) - ;; FIXME: Use mask, not minus, to - ;; take out type bits. - (- other-pointer-lowtag))) - 0) - (inst mov (make-ea :byte :disp (+ nil-value - (static-symbol-offset '*pseudo-atomic-atomic*) (ash symbol-value-slot word-shift) (- other-pointer-lowtag))) @@ -362,12 +377,6 @@ (ash symbol-value-slot word-shift) (- other-pointer-lowtag))) 0) - ;; KLUDGE: Is there any requirement for interrupts to be - ;; handled in order? It seems as though an interrupt coming - ;; in at this point will be executed before any pending - ;; interrupts. Or do incoming interrupts check to see - ;; whether any interrupts are pending? I wish I could find - ;; the documentation for pseudo-atomics.. -- WHN 19991130 (inst cmp (make-ea :byte :disp (+ nil-value (static-symbol-offset @@ -455,8 +464,12 @@ garbage collection" `(multiple-value-prog1 (progn ,@(loop for p in objects - collect `(push-word-on-c-stack - (int-sap (sb!kernel:get-lisp-obj-address ,p)))) + collect + ;; There is no race here wrt to gc, because at every + ;; point during the execution there is a reference to + ;; P on the stack or in a register. + `(push-word-on-c-stack + (int-sap (sb!kernel:get-lisp-obj-address ,p)))) ,@body) ;; If the body returned normally, we should restore the stack pointer ;; for the benefit of any following code in the same function. If