X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Fmacros.lisp;h=8a1dc871a366f93391798ea58acf149e78651b5b;hb=5de74c72e5a9522c7fdd3dbb31a39641e9de8877;hp=028d6b71f7c4d0271c2a11d23721526e5c17e233;hpb=0f534f7e3e8298e8acdd2abb8316e50bcfbc1603;p=sbcl.git diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index 028d6b7..8a1dc87 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -34,7 +34,7 @@ (inst fstp ,tn) ,@body (unless (zerop (tn-offset ,tn)) - (inst fxch ,tn)))) ; save into new dest and restore st(0) + (inst fxch ,tn)))) ; save into new dest and restore st(0) ;;;; instruction-like macros @@ -42,7 +42,7 @@ #!+sb-doc "Move SRC into DST unless they are location=." (once-only ((n-dst dst) - (n-src src)) + (n-src src)) `(unless (location= ,n-dst ,n-src) (inst mov ,n-dst ,n-src)))) @@ -67,32 +67,31 @@ (defmacro load-symbol (reg symbol) `(inst mov ,reg (+ nil-value (static-symbol-offset ,symbol)))) +(defmacro make-ea-for-symbol-value (symbol) + `(make-ea :dword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag)))) + (defmacro load-symbol-value (reg symbol) - `(inst mov ,reg - (make-ea :dword - :disp (+ nil-value - (static-symbol-offset ',symbol) - (ash symbol-value-slot word-shift) - (- other-pointer-lowtag))))) + `(inst mov ,reg (make-ea-for-symbol-value ,symbol))) (defmacro store-symbol-value (reg symbol) - `(inst mov - (make-ea :dword - :disp (+ nil-value - (static-symbol-offset ',symbol) - (ash symbol-value-slot word-shift) - (- other-pointer-lowtag))) - ,reg)) + `(inst mov (make-ea-for-symbol-value ,symbol) ,reg)) + +#!+sb-thread +(defmacro make-ea-for-symbol-tls-index (symbol) + `(make-ea :dword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-tls-index-slot word-shift) + (- other-pointer-lowtag)))) #!+sb-thread (defmacro load-tl-symbol-value (reg symbol) `(progn - (inst mov ,reg - (make-ea :dword - :disp (+ nil-value - (static-symbol-offset ',symbol) - (ash symbol-tls-index-slot word-shift) - (- other-pointer-lowtag)))) + (inst mov ,reg (make-ea-for-symbol-tls-index ,symbol)) (inst fs-segment-prefix) (inst mov ,reg (make-ea :dword :scale 1 :index ,reg)))) #!-sb-thread @@ -101,49 +100,61 @@ #!+sb-thread (defmacro store-tl-symbol-value (reg symbol temp) `(progn - (inst mov ,temp - (make-ea :dword - :disp (+ nil-value - (static-symbol-offset ',symbol) - (ash symbol-tls-index-slot word-shift) - (- other-pointer-lowtag)))) + (inst mov ,temp (make-ea-for-symbol-tls-index ,symbol)) (inst fs-segment-prefix) (inst mov (make-ea :dword :scale 1 :index ,temp) ,reg))) #!-sb-thread (defmacro store-tl-symbol-value (reg symbol temp) (declare (ignore temp)) `(store-symbol-value ,reg ,symbol)) - + +(defmacro load-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov ,reg (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)))) + #!-sb-thread + `(load-symbol-value ,reg *binding-stack-pointer*)) + +(defmacro store-binding-stack-pointer (reg) + #!+sb-thread + `(progn + (inst fs-segment-prefix) + (inst mov (make-ea :dword + :disp (* 4 thread-binding-stack-pointer-slot)) + ,reg)) + #!-sb-thread + `(store-symbol-value ,reg *binding-stack-pointer*)) + (defmacro load-type (target source &optional (offset 0)) #!+sb-doc "Loads the type bits of a pointer into target independent of byte-ordering issues." (once-only ((n-target target) - (n-source source) - (n-offset offset)) + (n-source source) + (n-offset offset)) (ecase *backend-byte-order* (:little-endian `(inst mov ,n-target - (make-ea :byte :base ,n-source :disp ,n-offset))) + (make-ea :byte :base ,n-source :disp ,n-offset))) (:big-endian `(inst mov ,n-target - (make-ea :byte :base ,n-source :disp (+ ,n-offset 3))))))) + (make-ea :byte :base ,n-source :disp (+ ,n-offset 3))))))) ;;;; allocation helpers -;;; All allocation is done by calls to assembler routines that -;;; eventually invoke the C alloc() function. Once upon a time -;;; (before threads) allocation within an alloc_region could also be -;;; done inline, with the aid of two C symbols storing the current -;;; allocation region boundaries; however, C symbols are global. +;;; Allocation within alloc_region (which is thread local) can be done +;;; inline. If the alloc_region is overflown allocation is done by +;;; calling the C alloc() function. ;;; C calls for allocation don't /seem/ to make an awful lot of -;;; difference to speed. Guessing from historical context, it looks -;;; like inline allocation was introduced before pseudo-atomic, at -;;; which time all calls to alloc() would have needed a syscall to -;;; mask signals for the duration. Now we have pseudoatomic there's -;;; no need for that overhead. Still, inline alloc would be a neat -;;; addition someday (except see below). +;;; difference to speed. On pure consing it's about a 25% +;;; gain. Guessing from historical context, it looks like inline +;;; allocation was introduced before pseudo-atomic, at which time all +;;; calls to alloc() would have needed a syscall to mask signals for +;;; the duration. Now we have pseudoatomic there's no need for that +;;; overhead. (defun allocation-dynamic-extent (alloc-tn size) (inst sub esp-tn size) @@ -160,58 +171,70 @@ (defun allocation-notinline (alloc-tn size) (let* ((alloc-tn-offset (tn-offset alloc-tn)) - ;; C call to allocate via dispatch routines. Each - ;; destination has a special entry point. The size may be a - ;; register or a constant. - (tn-text (ecase alloc-tn-offset - (#.eax-offset "eax") - (#.ecx-offset "ecx") - (#.edx-offset "edx") - (#.ebx-offset "ebx") - (#.esi-offset "esi") - (#.edi-offset "edi"))) - (size-text (case size (8 "8_") (16 "16_") (t "")))) + ;; C call to allocate via dispatch routines. Each + ;; destination has a special entry point. The size may be a + ;; register or a constant. + (tn-text (ecase alloc-tn-offset + (#.eax-offset "eax") + (#.ecx-offset "ecx") + (#.edx-offset "edx") + (#.ebx-offset "ebx") + (#.esi-offset "esi") + (#.edi-offset "edi"))) + (size-text (case size (8 "8_") (16 "16_") (t "")))) (unless (or (eql size 8) (eql size 16)) (unless (and (tn-p size) (location= alloc-tn size)) - (inst mov alloc-tn size))) - (inst call (make-fixup (extern-alien-name - (concatenate 'string - "alloc_" size-text - "to_" tn-text)) - :foreign)))) + (inst mov alloc-tn size))) + (inst call (make-fixup (concatenate 'string + "alloc_" size-text + "to_" tn-text) + :foreign)))) (defun allocation-inline (alloc-tn size) (let ((ok (gen-label)) - (free-pointer - (make-ea :dword :disp - #!+sb-thread (* n-word-bytes thread-alloc-region-slot) - #!-sb-thread (make-fixup (extern-alien-name "boxed_region") - :foreign) - :scale 1)) ; thread->alloc_region.free_pointer - (end-addr - (make-ea :dword :disp - #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot)) - #!-sb-thread (make-fixup (extern-alien-name "boxed_region") - :foreign 4) - :scale 1))) ; thread->alloc_region.end_addr + (done (gen-label)) + (free-pointer + (make-ea :dword :disp + #!+sb-thread (* n-word-bytes thread-alloc-region-slot) + #!-sb-thread (make-fixup "boxed_region" :foreign) + :scale 1)) ; thread->alloc_region.free_pointer + (end-addr + (make-ea :dword :disp + #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot)) + #!-sb-thread (make-fixup "boxed_region" :foreign 4) + :scale 1))) ; thread->alloc_region.end_addr (unless (and (tn-p size) (location= alloc-tn size)) (inst mov alloc-tn size)) #!+sb-thread (inst fs-segment-prefix) (inst add alloc-tn free-pointer) #!+sb-thread (inst fs-segment-prefix) (inst cmp alloc-tn end-addr) - (inst jmp :be OK) + (inst jmp :be ok) (let ((dst (ecase (tn-offset alloc-tn) - (#.eax-offset "alloc_overflow_eax") - (#.ecx-offset "alloc_overflow_ecx") - (#.edx-offset "alloc_overflow_edx") - (#.ebx-offset "alloc_overflow_ebx") - (#.esi-offset "alloc_overflow_esi") - (#.edi-offset "alloc_overflow_edi")))) - (inst call (make-fixup (extern-alien-name dst) :foreign))) + (#.eax-offset "alloc_overflow_eax") + (#.ecx-offset "alloc_overflow_ecx") + (#.edx-offset "alloc_overflow_edx") + (#.ebx-offset "alloc_overflow_ebx") + (#.esi-offset "alloc_overflow_esi") + (#.edi-offset "alloc_overflow_edi")))) + (inst call (make-fixup dst :foreign))) + (inst jmp-short done) (emit-label ok) - #!+sb-thread (inst fs-segment-prefix) - (inst xchg free-pointer alloc-tn)) + ;; Swap ALLOC-TN and FREE-POINTER + (cond ((and (tn-p size) (location= alloc-tn size)) + ;; XCHG is extremely slow, use the xor swap trick + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer) + #!+sb-thread (inst fs-segment-prefix) + (inst xor free-pointer alloc-tn) + #!+sb-thread (inst fs-segment-prefix) + (inst xor alloc-tn free-pointer)) + (t + ;; It's easier if SIZE is still available. + #!+sb-thread (inst fs-segment-prefix) + (inst mov free-pointer alloc-tn) + (inst sub alloc-tn size))) + (emit-label done)) (values)) @@ -228,13 +251,7 @@ (defun allocation (alloc-tn size &optional inline dynamic-extent) (cond (dynamic-extent (allocation-dynamic-extent alloc-tn size)) - ;; FIXME: for reasons unknown, inline allocation is a speed win on - ;; non-P4s, and a speed loss on P4s (and probably other such - ;; high-spec high-cache machines). :INLINE-ALLOCATION-IS-GOOD is - ;; a bit of a KLUDGE, really. -- CSR, 2004-08-05 (following - ;; observations made by ASF and Juho Snellman) - ((and (member :inline-allocation-is-good *backend-subfeatures*) - (or (null inline) (policy inline (>= speed space)))) + ((or (null inline) (policy inline (>= speed space))) (allocation-inline alloc-tn size)) (t (allocation-notinline alloc-tn size))) (values)) @@ -243,47 +260,58 @@ ;;; header having the specified WIDETAG value. The result is placed in ;;; RESULT-TN. (defmacro with-fixed-allocation ((result-tn widetag size &optional inline) - &rest forms) - `(pseudo-atomic - (allocation ,result-tn (pad-data-block ,size) ,inline) - (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) - ,result-tn) - (inst lea ,result-tn - (make-ea :byte :base ,result-tn :disp other-pointer-lowtag)) - ,@forms)) + &body forms) + (unless forms + (bug "empty &body in WITH-FIXED-ALLOCATION")) + (once-only ((result-tn result-tn) (size size)) + `(pseudo-atomic + (allocation ,result-tn (pad-data-block ,size) ,inline) + (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) + ,result-tn) + (inst lea ,result-tn + (make-ea :byte :base ,result-tn :disp other-pointer-lowtag)) + ,@forms))) ;;;; error code (eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) (defun emit-error-break (vop kind code values) (let ((vector (gensym))) - `((inst int 3) ; i386 breakpoint instruction - ;; The return PC points here; note the location for the debugger. - (let ((vop ,vop)) - (when vop - (note-this-location vop :internal-error))) - (inst byte ,kind) ; eg trap_Xyyy - (with-adjustable-vector (,vector) ; interr arguments - (write-var-integer (error-number-or-lose ',code) ,vector) - ,@(mapcar (lambda (tn) - `(let ((tn ,tn)) - ;; classic CMU CL comment: - ;; zzzzz jrd here. tn-offset is zero for constant - ;; tns. - (write-var-integer (make-sc-offset (sc-number - (tn-sc tn)) - (or (tn-offset tn) - 0)) - ,vector))) - values) - (inst byte (length ,vector)) - (dotimes (i (length ,vector)) - (inst byte (aref ,vector i)))))))) + `((progn + #-darwin (inst int 3) ; i386 breakpoint instruction + ;; CLH 20060314 + ;; On Darwin, we need to use #x0b0f instead of int3 in order + ;; to generate a SIGILL instead of a SIGTRAP as darwin/x86 + ;; doesn't seem to be reliably firing SIGTRAP + ;; handlers. Hopefully this will be fixed by Apple at a + ;; later date. + #+darwin (inst word #x0b0f)) + ;; The return PC points here; note the location for the debugger. + (let ((vop ,vop)) + (when vop + (note-this-location vop :internal-error))) + (inst byte ,kind) ; eg trap_Xyyy + (with-adjustable-vector (,vector) ; interr arguments + (write-var-integer (error-number-or-lose ',code) ,vector) + ,@(mapcar (lambda (tn) + `(let ((tn ,tn)) + ;; classic CMU CL comment: + ;; zzzzz jrd here. tn-offset is zero for constant + ;; tns. + (write-var-integer (make-sc-offset (sc-number + (tn-sc tn)) + (or (tn-offset tn) + 0)) + ,vector))) + values) + (inst byte (length ,vector)) + (dotimes (i (length ,vector)) + (inst byte (aref ,vector i)))))))) (defmacro error-call (vop error-code &rest values) #!+sb-doc "Cause an error. ERROR-CODE is the error to cause." (cons 'progn - (emit-error-break vop error-trap error-code values))) + (emit-error-break vop error-trap error-code values))) (defmacro generate-error-code (vop error-code &rest values) #!+sb-doc @@ -302,7 +330,7 @@ ;;; around. It's an operation which the AOP weenies would describe as ;;; having "cross-cutting concerns", meaning it appears all over the ;;; place and there's no logical single place to attach documentation. -;;; grep (mostly in src/runtime) is your friend +;;; grep (mostly in src/runtime) is your friend ;;; FIXME: *PSEUDO-ATOMIC-FOO* could be made into *PSEUDO-ATOMIC-BITS*, ;;; set with a single operation and cleared with SHR *PSEUDO-ATOMIC-BITS*,-2; @@ -312,80 +340,63 @@ ;;; KLUDGE: since the stack on the x86 is treated conservatively, it ;;; does not matter whether a signal occurs during construction of a ;;; dynamic-extent object, as the half-finished construction of the -;;; object will not cause any difficulty. We can therefore elide -(defvar *dynamic-extent* nil) +;;; object will not cause any difficulty. We can therefore elide +(defmacro maybe-pseudo-atomic (really-p &body forms) + `(if ,really-p + (progn ,@forms) + (pseudo-atomic ,@forms))) #!+sb-thread (defmacro pseudo-atomic (&rest forms) (with-unique-names (label) - `(if *dynamic-extent* ; I will burn in hell - (progn ,@forms) - (let ((,label (gen-label))) - (inst fs-segment-prefix) - (inst mov (make-ea :byte - :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) - (inst fs-segment-prefix) - (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 1) - ,@forms - (inst fs-segment-prefix) - (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 0) - (inst fs-segment-prefix) - (inst cmp (make-ea :byte - :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) - (inst jmp :eq ,label) - ;; if PAI was set, interrupts were disabled at the same - ;; time using the process signal mask. - (inst break pending-interrupt-trap) - (emit-label ,label))))) + `(let ((,label (gen-label))) + (inst fs-segment-prefix) + (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) + (fixnumize 1)) + ,@forms + (inst fs-segment-prefix) + (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 0) + (inst fs-segment-prefix) + (inst cmp (make-ea :byte + :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) + (inst jmp :eq ,label) + ;; if PAI was set, interrupts were disabled at the same + ;; time using the process signal mask. + (inst break pending-interrupt-trap) + (emit-label ,label)))) #!-sb-thread (defmacro pseudo-atomic (&rest forms) (with-unique-names (label) - `(if *dynamic-extent* - (progn ,@forms) - (let ((,label (gen-label))) - ;; FIXME: The MAKE-EA noise should become a MACROLET macro - ;; or something. (perhaps SVLB, for static variable low - ;; byte) - (inst mov (make-ea :byte :disp (+ nil-value - (static-symbol-offset - '*pseudo-atomic-interrupted*) - (ash symbol-value-slot word-shift) - ;; FIXME: Use mask, not minus, to - ;; take out type bits. - (- other-pointer-lowtag))) - 0) - (inst mov (make-ea :byte :disp (+ nil-value - (static-symbol-offset - '*pseudo-atomic-atomic*) - (ash symbol-value-slot word-shift) - (- other-pointer-lowtag))) - (fixnumize 1)) - ,@forms - (inst mov (make-ea :byte :disp (+ nil-value - (static-symbol-offset - '*pseudo-atomic-atomic*) - (ash symbol-value-slot word-shift) - (- other-pointer-lowtag))) - 0) - ;; KLUDGE: Is there any requirement for interrupts to be - ;; handled in order? It seems as though an interrupt coming - ;; in at this point will be executed before any pending - ;; interrupts. Or do incoming interrupts check to see - ;; whether any interrupts are pending? I wish I could find - ;; the documentation for pseudo-atomics.. -- WHN 19991130 - (inst cmp (make-ea :byte - :disp (+ nil-value - (static-symbol-offset - '*pseudo-atomic-interrupted*) - (ash symbol-value-slot word-shift) - (- other-pointer-lowtag))) - 0) - (inst jmp :eq ,label) - ;; if PAI was set, interrupts were disabled at the same - ;; time using the process signal mask. - (inst break pending-interrupt-trap) - (emit-label ,label))))) + `(let ((,label (gen-label))) + ;; FIXME: The MAKE-EA noise should become a MACROLET macro + ;; or something. (perhaps SVLB, for static variable low + ;; byte) + (inst mov (make-ea :byte :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-atomic*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + (fixnumize 1)) + ,@forms + (inst mov (make-ea :byte :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-atomic*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + 0) + (inst cmp (make-ea :byte + :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-interrupted*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + 0) + (inst jmp :eq ,label) + ;; if PAI was set, interrupts were disabled at the same + ;; time using the process signal mask. + (inst break pending-interrupt-trap) + (emit-label ,label)))) ;;;; indexed references @@ -393,64 +404,50 @@ `(progn (define-vop (,name) ,@(when translate - `((:translate ,translate))) + `((:translate ,translate))) (:policy :fast-safe) (:args (object :scs (descriptor-reg)) - (index :scs (any-reg))) + (index :scs (any-reg immediate))) (:arg-types ,type tagged-num) (:results (value :scs ,scs)) (:result-types ,el-type) - (:generator 3 ; pw was 5 - (inst mov value (make-ea :dword :base object :index index - :disp (- (* ,offset n-word-bytes) - ,lowtag))))) - (define-vop (,(symbolicate name "-C")) - ,@(when translate - `((:translate ,translate))) - (:policy :fast-safe) - (:args (object :scs (descriptor-reg))) - (:info index) - (:arg-types ,type (:constant (signed-byte 30))) - (:results (value :scs ,scs)) - (:result-types ,el-type) - (:generator 2 ; pw was 5 - (inst mov value (make-ea :dword :base object - :disp (- (* (+ ,offset index) n-word-bytes) - ,lowtag))))))) + (:generator 3 ; pw was 5 + (sc-case index + (immediate + (inst mov value (make-ea :dword :base object + :disp (- (* (+ ,offset (tn-value index)) + n-word-bytes) + ,lowtag)))) + (t + (inst mov value (make-ea :dword :base object :index index + :disp (- (* ,offset n-word-bytes) + ,lowtag))))))))) (defmacro define-full-setter (name type offset lowtag scs el-type &optional translate) `(progn (define-vop (,name) ,@(when translate - `((:translate ,translate))) + `((:translate ,translate))) (:policy :fast-safe) (:args (object :scs (descriptor-reg)) - (index :scs (any-reg)) - (value :scs ,scs :target result)) + (index :scs (any-reg immediate)) + (value :scs ,scs :target result)) (:arg-types ,type tagged-num ,el-type) (:results (result :scs ,scs)) (:result-types ,el-type) - (:generator 4 ; was 5 - (inst mov (make-ea :dword :base object :index index - :disp (- (* ,offset n-word-bytes) ,lowtag)) - value) - (move result value))) - (define-vop (,(symbolicate name "-C")) - ,@(when translate - `((:translate ,translate))) - (:policy :fast-safe) - (:args (object :scs (descriptor-reg)) - (value :scs ,scs :target result)) - (:info index) - (:arg-types ,type (:constant (signed-byte 30)) ,el-type) - (:results (result :scs ,scs)) - (:result-types ,el-type) - (:generator 3 ; was 5 - (inst mov (make-ea :dword :base object - :disp (- (* (+ ,offset index) n-word-bytes) - ,lowtag)) - value) - (move result value))))) + (:generator 4 ; was 5 + (sc-case index + (immediate + (inst mov (make-ea :dword :base object + :disp (- (* (+ ,offset (tn-value index)) + n-word-bytes) + ,lowtag)) + value)) + (t + (inst mov (make-ea :dword :base object :index index + :disp (- (* ,offset n-word-bytes) ,lowtag)) + value))) + (move result value))))) ;;; helper for alien stuff. (defmacro with-pinned-objects ((&rest objects) &body body) @@ -460,10 +457,14 @@ Useful for e.g. foreign calls where another thread may trigger garbage collection" `(multiple-value-prog1 (progn - ,@(loop for p in objects - collect `(push-word-on-c-stack - (int-sap (sb!kernel:get-lisp-obj-address ,p)))) - ,@body) + ,@(loop for p in objects + collect + ;; There is no race here wrt to gc, because at every + ;; point during the execution there is a reference to + ;; P on the stack or in a register. + `(push-word-on-c-stack + (int-sap (sb!kernel:get-lisp-obj-address ,p)))) + ,@body) ;; If the body returned normally, we should restore the stack pointer ;; for the benefit of any following code in the same function. If ;; there's a non-local exit in the body, sp is garbage anyway and