X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Fmacros.lisp;h=8a1dc871a366f93391798ea58acf149e78651b5b;hb=5de74c72e5a9522c7fdd3dbb31a39641e9de8877;hp=e61732b3abad7966f9030a2287de7ccd6262aee0;hpb=495f7dfb9c4ce0ba965f3297a4c94f6c75691b70;p=sbcl.git diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index e61732b..8a1dc87 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -108,6 +108,25 @@ (declare (ignore temp)) `(store-symbol-value ,reg ,symbol)) +(defmacro load-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov ,reg (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)))) + #!-sb-thread + `(load-symbol-value ,reg *binding-stack-pointer*)) + +(defmacro store-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)) + ,reg)) + #!-sb-thread + `(store-symbol-value ,reg *binding-stack-pointer*)) + (defmacro load-type (target source &optional (offset 0)) #!+sb-doc "Loads the type bits of a pointer into target independent of @@ -125,19 +144,17 @@ ;;;; allocation helpers -;;; All allocation is done by calls to assembler routines that -;;; eventually invoke the C alloc() function. Once upon a time -;;; (before threads) allocation within an alloc_region could also be -;;; done inline, with the aid of two C symbols storing the current -;;; allocation region boundaries; however, C symbols are global. +;;; Allocation within alloc_region (which is thread local) can be done +;;; inline. If the alloc_region is overflown allocation is done by +;;; calling the C alloc() function. ;;; C calls for allocation don't /seem/ to make an awful lot of -;;; difference to speed. Guessing from historical context, it looks -;;; like inline allocation was introduced before pseudo-atomic, at -;;; which time all calls to alloc() would have needed a syscall to -;;; mask signals for the duration. Now we have pseudoatomic there's -;;; no need for that overhead. Still, inline alloc would be a neat -;;; addition someday (except see below). +;;; difference to speed. On pure consing it's about a 25% +;;; gain. Guessing from historical context, it looks like inline +;;; allocation was introduced before pseudo-atomic, at which time all +;;; calls to alloc() would have needed a syscall to mask signals for +;;; the duration. Now we have pseudoatomic there's no need for that +;;; overhead. (defun allocation-dynamic-extent (alloc-tn size) (inst sub esp-tn size) @@ -175,6 +192,7 @@ (defun allocation-inline (alloc-tn size) (let ((ok (gen-label)) + (done (gen-label)) (free-pointer (make-ea :dword :disp #!+sb-thread (* n-word-bytes thread-alloc-region-slot) @@ -191,7 +209,7 @@ (inst add alloc-tn free-pointer) #!+sb-thread (inst fs-segment-prefix) (inst cmp alloc-tn end-addr) - (inst jmp :be OK) + (inst jmp :be ok) (let ((dst (ecase (tn-offset alloc-tn) (#.eax-offset "alloc_overflow_eax") (#.ecx-offset "alloc_overflow_ecx") @@ -200,9 +218,23 @@ (#.esi-offset "alloc_overflow_esi") (#.edi-offset "alloc_overflow_edi")))) (inst call (make-fixup dst :foreign))) + (inst jmp-short done) (emit-label ok) - #!+sb-thread (inst fs-segment-prefix) - (inst xchg free-pointer alloc-tn)) + ;; Swap ALLOC-TN and FREE-POINTER + (cond ((and (tn-p size) (location= alloc-tn size)) + ;; XCHG is extremely slow, use the xor swap trick + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer) + #!+sb-thread (inst fs-segment-prefix) + (inst xor free-pointer alloc-tn) + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer)) + (t + ;; It's easier if SIZE is still available. + #!+sb-thread (inst fs-segment-prefix) + (inst mov free-pointer alloc-tn) + (inst sub alloc-tn size))) + (emit-label done)) (values)) @@ -219,13 +251,7 @@ (defun allocation (alloc-tn size &optional inline dynamic-extent) (cond (dynamic-extent (allocation-dynamic-extent alloc-tn size)) - ;; FIXME: for reasons unknown, inline allocation is a speed win on - ;; non-P4s, and a speed loss on P4s (and probably other such - ;; high-spec high-cache machines). :INLINE-ALLOCATION-IS-GOOD is - ;; a bit of a KLUDGE, really. -- CSR, 2004-08-05 (following - ;; observations made by ASF and Juho Snellman) - ((and (member :inline-allocation-is-good *backend-subfeatures*) - (or (null inline) (policy inline (>= speed space)))) + ((or (null inline) (policy inline (>= speed space))) (allocation-inline alloc-tn size)) (t (allocation-notinline alloc-tn size))) (values)) @@ -250,7 +276,15 @@ (eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) (defun emit-error-break (vop kind code values) (let ((vector (gensym))) - `((inst int 3) ; i386 breakpoint instruction + `((progn + #-darwin (inst int 3) ; i386 breakpoint instruction + ;; CLH 20060314 + ;; On Darwin, we need to use #x0b0f instead of int3 in order + ;; to generate a SIGILL instead of a SIGTRAP as darwin/x86 + ;; doesn't seem to be reliably firing SIGTRAP + ;; handlers. Hopefully this will be fixed by Apple at a + ;; later date. + #+darwin (inst word #x0b0f)) ;; The return PC points here; note the location for the debugger. (let ((vop ,vop)) (when vop @@ -317,9 +351,6 @@ (with-unique-names (label) `(let ((,label (gen-label))) (inst fs-segment-prefix) - (inst mov (make-ea :byte - :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) - (inst fs-segment-prefix) (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) (fixnumize 1)) ,@forms @@ -343,14 +374,6 @@ ;; byte) (inst mov (make-ea :byte :disp (+ nil-value (static-symbol-offset - '*pseudo-atomic-interrupted*) - (ash symbol-value-slot word-shift) - ;; FIXME: Use mask, not minus, to - ;; take out type bits. - (- other-pointer-lowtag))) - 0) - (inst mov (make-ea :byte :disp (+ nil-value - (static-symbol-offset '*pseudo-atomic-atomic*) (ash symbol-value-slot word-shift) (- other-pointer-lowtag))) @@ -362,12 +385,6 @@ (ash symbol-value-slot word-shift) (- other-pointer-lowtag))) 0) - ;; KLUDGE: Is there any requirement for interrupts to be - ;; handled in order? It seems as though an interrupt coming - ;; in at this point will be executed before any pending - ;; interrupts. Or do incoming interrupts check to see - ;; whether any interrupts are pending? I wish I could find - ;; the documentation for pseudo-atomics.. -- WHN 19991130 (inst cmp (make-ea :byte :disp (+ nil-value (static-symbol-offset @@ -390,27 +407,21 @@ `((:translate ,translate))) (:policy :fast-safe) (:args (object :scs (descriptor-reg)) - (index :scs (any-reg))) + (index :scs (any-reg immediate))) (:arg-types ,type tagged-num) (:results (value :scs ,scs)) (:result-types ,el-type) (:generator 3 ; pw was 5 - (inst mov value (make-ea :dword :base object :index index - :disp (- (* ,offset n-word-bytes) - ,lowtag))))) - (define-vop (,(symbolicate name "-C")) - ,@(when translate - `((:translate ,translate))) - (:policy :fast-safe) - (:args (object :scs (descriptor-reg))) - (:info index) - (:arg-types ,type (:constant (signed-byte 30))) - (:results (value :scs ,scs)) - (:result-types ,el-type) - (:generator 2 ; pw was 5 - (inst mov value (make-ea :dword :base object - :disp (- (* (+ ,offset index) n-word-bytes) - ,lowtag))))))) + (sc-case index + (immediate + (inst mov value (make-ea :dword :base object + :disp (- (* (+ ,offset (tn-value index)) + n-word-bytes) + ,lowtag)))) + (t + (inst mov value (make-ea :dword :base object :index index + :disp (- (* ,offset n-word-bytes) + ,lowtag))))))))) (defmacro define-full-setter (name type offset lowtag scs el-type &optional translate) `(progn @@ -419,32 +430,24 @@ `((:translate ,translate))) (:policy :fast-safe) (:args (object :scs (descriptor-reg)) - (index :scs (any-reg)) + (index :scs (any-reg immediate)) (value :scs ,scs :target result)) (:arg-types ,type tagged-num ,el-type) (:results (result :scs ,scs)) (:result-types ,el-type) (:generator 4 ; was 5 - (inst mov (make-ea :dword :base object :index index - :disp (- (* ,offset n-word-bytes) ,lowtag)) - value) - (move result value))) - (define-vop (,(symbolicate name "-C")) - ,@(when translate - `((:translate ,translate))) - (:policy :fast-safe) - (:args (object :scs (descriptor-reg)) - (value :scs ,scs :target result)) - (:info index) - (:arg-types ,type (:constant (signed-byte 30)) ,el-type) - (:results (result :scs ,scs)) - (:result-types ,el-type) - (:generator 3 ; was 5 - (inst mov (make-ea :dword :base object - :disp (- (* (+ ,offset index) n-word-bytes) - ,lowtag)) - value) - (move result value))))) + (sc-case index + (immediate + (inst mov (make-ea :dword :base object + :disp (- (* (+ ,offset (tn-value index)) + n-word-bytes) + ,lowtag)) + value)) + (t + (inst mov (make-ea :dword :base object :index index + :disp (- (* ,offset n-word-bytes) ,lowtag)) + value))) + (move result value))))) ;;; helper for alien stuff. (defmacro with-pinned-objects ((&rest objects) &body body)