;;; will probably be loading the wrong register!
(defmacro with-empty-tn@fp-top((tn) &body body)
`(progn
- (inst fstp ,tn)
- ,@body
- (unless (zerop (tn-offset ,tn))
- (inst fxch ,tn)))) ; save into new dest and restore st(0)
+ (inst fstp ,tn)
+ ,@body
+ (unless (zerop (tn-offset ,tn))
+ (inst fxch ,tn)))) ; save into new dest and restore st(0)
\f
;;;; instruction-like macros
#!+sb-thread
(defmacro load-tl-symbol-value (reg symbol)
- `(progn
- (inst mov ,reg (make-ea-for-symbol-tls-index ,symbol))
- (inst fs-segment-prefix)
- (inst mov ,reg (make-ea :dword :base ,reg))))
+ `(with-tls-ea (EA :base ,reg
+ :disp-type :index
+ :disp (make-ea-for-symbol-tls-index ,symbol))
+ (inst mov ,reg (make-ea :dword :base ,reg) :maybe-fs)))
#!-sb-thread
(defmacro load-tl-symbol-value (reg symbol) `(load-symbol-value ,reg ,symbol))
#!+sb-thread
(defmacro store-tl-symbol-value (reg symbol temp)
- `(progn
- (inst mov ,temp (make-ea-for-symbol-tls-index ,symbol))
- (inst fs-segment-prefix)
- (inst mov (make-ea :dword :base ,temp) ,reg)))
+ `(with-tls-ea (EA :base ,temp
+ :disp-type :index
+ :disp (make-ea-for-symbol-tls-index ,symbol))
+ (inst mov EA ,reg :maybe-fs)))
#!-sb-thread
(defmacro store-tl-symbol-value (reg symbol temp)
(declare (ignore temp))
(defmacro load-binding-stack-pointer (reg)
#!+sb-thread
- `(progn
- (inst fs-segment-prefix)
- (inst mov ,reg (make-ea :dword
- :disp (* 4 thread-binding-stack-pointer-slot))))
+ `(with-tls-ea (EA :base ,reg
+ :disp-type :constant
+ :disp (* 4 thread-binding-stack-pointer-slot))
+ (inst mov ,reg EA :maybe-fs))
#!-sb-thread
`(load-symbol-value ,reg *binding-stack-pointer*))
(defmacro store-binding-stack-pointer (reg)
#!+sb-thread
`(progn
- (inst fs-segment-prefix)
- (inst mov (make-ea :dword
+ #!+win32
+ (progn
+ (inst push eax-tn)
+ (inst push ,reg)
+ (with-tls-ea (EA :base eax-tn
+ :disp-type :constant
:disp (* 4 thread-binding-stack-pointer-slot))
- ,reg))
+ (inst pop EA))
+ (inst pop eax-tn))
+ #!-win32
+ (with-tls-ea (EA :disp-type :constant
+ :disp (* 4 thread-binding-stack-pointer-slot))
+ (inst mov EA ,reg :maybe-fs)))
#!-sb-thread
`(store-symbol-value ,reg *binding-stack-pointer*))
;;; the duration. Now we have pseudoatomic there's no need for that
;;; overhead.
-(defun allocation-dynamic-extent (alloc-tn size)
+(defun allocation-dynamic-extent (alloc-tn size lowtag)
(inst sub esp-tn size)
;; FIXME: SIZE _should_ be double-word aligned (suggested but
;; unfortunately not enforced by PAD-DATA-BLOCK and
;; 2004-03-30
(inst and esp-tn (lognot lowtag-mask))
(aver (not (location= alloc-tn esp-tn)))
- (inst mov alloc-tn esp-tn)
+ (inst lea alloc-tn (make-ea :byte :base esp-tn :disp lowtag))
(values))
(defun allocation-notinline (alloc-tn size)
:foreign))))
(defun allocation-inline (alloc-tn size)
- (let ((ok (gen-label))
+ (let* ((ok (gen-label)) ;reindent after merging
(done (gen-label))
+ #!+(and sb-thread win32)
+ (scratch-tns (loop for my-tn in `(,eax-tn ,ebx-tn ,edx-tn ,ecx-tn)
+ when (and (not (location= alloc-tn my-tn))
+ (or (not (tn-p size))
+ (not (location= size my-tn))))
+ collect my-tn))
+ (tls-prefix #!+sb-thread :fs #!-sb-thread nil)
(free-pointer
(make-ea :dword :disp
#!+sb-thread (* n-word-bytes thread-alloc-region-slot)
(make-ea :dword :disp
#!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot))
#!-sb-thread (make-fixup "boxed_region" :foreign 4)
- :scale 1))) ; thread->alloc_region.end_addr
+ :scale 1)) ; thread->alloc_region.end_addr
+ #!+(and sb-thread win32) (scratch-tn (pop scratch-tns))
+ #!+(and sb-thread win32) (swap-tn (pop scratch-tns)))
(unless (and (tn-p size) (location= alloc-tn size))
(inst mov alloc-tn size))
- #!+sb-thread (inst fs-segment-prefix)
- (inst add alloc-tn free-pointer)
- #!+sb-thread (inst fs-segment-prefix)
- (inst cmp alloc-tn end-addr)
+ #!+(and sb-thread win32)
+ (progn
+ (inst push scratch-tn)
+ (inst push swap-tn)
+ (inst mov scratch-tn
+ (make-ea :dword :disp
+ +win32-tib-arbitrary-field-offset+) tls-prefix)
+ (setf (ea-base free-pointer) scratch-tn
+ (ea-base end-addr) scratch-tn
+ tls-prefix nil))
+ (inst add alloc-tn free-pointer tls-prefix)
+ (inst cmp alloc-tn end-addr tls-prefix)
(inst jmp :be ok)
(let ((dst (ecase (tn-offset alloc-tn)
(#.eax-offset "alloc_overflow_eax")
;; Swap ALLOC-TN and FREE-POINTER
(cond ((and (tn-p size) (location= alloc-tn size))
;; XCHG is extremely slow, use the xor swap trick
- #!+sb-thread (inst fs-segment-prefix)
- (inst xor alloc-tn free-pointer)
- #!+sb-thread (inst fs-segment-prefix)
- (inst xor free-pointer alloc-tn)
- #!+sb-thread (inst fs-segment-prefix)
- (inst xor alloc-tn free-pointer))
+ #!-(and sb-thread win32)
+ (progn
+ (inst xor alloc-tn free-pointer tls-prefix)
+ (inst xor free-pointer alloc-tn tls-prefix)
+ (inst xor alloc-tn free-pointer tls-prefix))
+ #!+(and sb-thread win32)
+ (progn
+ (inst mov swap-tn free-pointer tls-prefix)
+ (inst mov free-pointer alloc-tn tls-prefix)
+ (inst mov alloc-tn swap-tn)))
(t
;; It's easier if SIZE is still available.
- #!+sb-thread (inst fs-segment-prefix)
- (inst mov free-pointer alloc-tn)
+ (inst mov free-pointer alloc-tn tls-prefix)
(inst sub alloc-tn size)))
- (emit-label done))
- (values))
+ (emit-label done)
+ #!+(and sb-thread win32)
+ (progn
+ (inst pop swap-tn)
+ (inst pop scratch-tn))
+ (values)))
;;; Emit code to allocate an object with a size in bytes given by
;;; (FIXME: so why aren't we asserting this?)
-(defun allocation (alloc-tn size &optional inline dynamic-extent)
+(defun allocation (alloc-tn size &optional inline dynamic-extent lowtag)
+ (declare (ignorable inline))
(cond
- (dynamic-extent (allocation-dynamic-extent alloc-tn size))
+ (dynamic-extent
+ (allocation-dynamic-extent alloc-tn size lowtag))
((or (null inline) (policy inline (>= speed space)))
(allocation-inline alloc-tn size))
- (t (allocation-notinline alloc-tn size)))
+ (t
+ (allocation-notinline alloc-tn size)))
+ (when (and lowtag (not dynamic-extent))
+ (inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag)))
(values))
;;; Allocate an other-pointer object of fixed SIZE with a single word
(bug "empty &body in WITH-FIXED-ALLOCATION"))
(once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p))
`(maybe-pseudo-atomic ,stack-allocate-p
- (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p)
- (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
- ,result-tn)
- (inst lea ,result-tn
- (make-ea :byte :base ,result-tn :disp other-pointer-lowtag))
- ,@forms)))
+ (allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p
+ other-pointer-lowtag)
+ (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
+ ,result-tn 0 other-pointer-lowtag)
+ ,@forms)))
\f
;;;; error code
-(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute)
- (defun emit-error-break (vop kind code values)
- (let ((vector (gensym)))
- `((progn
- #-darwin (inst int 3) ; i386 breakpoint instruction
- ;; CLH 20060314
- ;; On Darwin, we need to use #x0b0f instead of int3 in order
- ;; to generate a SIGILL instead of a SIGTRAP as darwin/x86
- ;; doesn't seem to be reliably firing SIGTRAP
- ;; handlers. Hopefully this will be fixed by Apple at a
- ;; later date.
- #+darwin (inst word #x0b0f))
- ;; The return PC points here; note the location for the debugger.
- (let ((vop ,vop))
- (when vop
- (note-this-location vop :internal-error)))
- (inst byte ,kind) ; eg trap_Xyyy
- (with-adjustable-vector (,vector) ; interr arguments
- (write-var-integer (error-number-or-lose ',code) ,vector)
- ,@(mapcar (lambda (tn)
- `(let ((tn ,tn))
- ;; classic CMU CL comment:
- ;; zzzzz jrd here. tn-offset is zero for constant
- ;; tns.
- (write-var-integer (make-sc-offset (sc-number
- (tn-sc tn))
- (or (tn-offset tn)
- 0))
- ,vector)))
- values)
- (inst byte (length ,vector))
- (dotimes (i (length ,vector))
- (inst byte (aref ,vector i))))))))
-
-(defmacro error-call (vop error-code &rest values)
+(defun emit-error-break (vop kind code values)
+ (assemble ()
+ #!-ud2-breakpoints
+ (inst int 3) ; i386 breakpoint instruction
+ ;; CLH 20060314
+ ;; On Darwin, we need to use #x0b0f instead of int3 in order
+ ;; to generate a SIGILL instead of a SIGTRAP as darwin/x86
+ ;; doesn't seem to be reliably firing SIGTRAP
+ ;; handlers. Hopefully this will be fixed by Apple at a
+ ;; later date.
+ #!+ud2-breakpoints
+ (inst word #x0b0f)
+ ;; The return PC points here; note the location for the debugger.
+ (when vop
+ (note-this-location vop :internal-error))
+ (inst byte kind) ; e.g. trap_xyyy
+ (with-adjustable-vector (vector) ; interr arguments
+ (write-var-integer code vector)
+ (dolist (tn values)
+ ;; classic CMU CL comment:
+ ;; zzzzz jrd here. tn-offset is zero for constant
+ ;; tns.
+ (write-var-integer (make-sc-offset (sc-number (tn-sc tn))
+ (or (tn-offset tn) 0))
+ vector))
+ (inst byte (length vector))
+ (dotimes (i (length vector))
+ (inst byte (aref vector i))))))
+
+(defun error-call (vop error-code &rest values)
#!+sb-doc
"Cause an error. ERROR-CODE is the error to cause."
- (cons 'progn
- (emit-error-break vop error-trap error-code values)))
+ (emit-error-break vop error-trap (error-number-or-lose error-code) values))
-(defmacro generate-error-code (vop error-code &rest values)
+(defun generate-error-code (vop error-code &rest values)
#!+sb-doc
"Generate-Error-Code Error-code Value*
Emit code for an error with the specified Error-Code and context Values."
- `(assemble (*elsewhere*)
- (let ((start-lab (gen-label)))
- (emit-label start-lab)
- (error-call ,vop ,error-code ,@values)
- start-lab)))
+ (assemble (*elsewhere*)
+ (let ((start-lab (gen-label)))
+ (emit-label start-lab)
+ (emit-error-break vop error-trap (error-number-or-lose error-code) values)
+ start-lab)))
\f
;;;; PSEUDO-ATOMIC
;;; does not matter whether a signal occurs during construction of a
;;; dynamic-extent object, as the half-finished construction of the
;;; object will not cause any difficulty. We can therefore elide
-(defmacro maybe-pseudo-atomic (really-p &body forms)
- `(if ,really-p
+(defmacro maybe-pseudo-atomic (not-really-p &body forms)
+ `(if ,not-really-p
(progn ,@forms)
(pseudo-atomic ,@forms)))
+;;; Unsafely clear pa flags so that the image can properly lose in a
+;;; pa section.
+#!+sb-thread
+(defmacro %clear-pseudo-atomic ()
+ #!+win32
+ `(progn)
+ #!-win32
+ '(inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot)) 0 :fs))
+
+#!+sb-safepoint
+(defun emit-safepoint ()
+ (inst test eax-tn (make-ea :dword :disp sb!vm::gc-safepoint-page-addr)))
+
#!+sb-thread
(defmacro pseudo-atomic (&rest forms)
+ #!+sb-safepoint-strictly
+ `(progn ,@forms (emit-safepoint))
+ #!-sb-safepoint-strictly
(with-unique-names (label)
`(let ((,label (gen-label)))
- (inst fs-segment-prefix)
- (inst or (make-ea :byte :disp (* 4 thread-pseudo-atomic-bits-slot))
- (fixnumize 1))
+ (inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot))
+ ebp-tn :fs)
,@forms
- (inst fs-segment-prefix)
- (inst xor (make-ea :byte :disp (* 4 thread-pseudo-atomic-bits-slot))
- (fixnumize 1))
+ (inst xor (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot))
+ ebp-tn :fs)
(inst jmp :z ,label)
- ;; if PAI was set, interrupts were disabled at the same
- ;; time using the process signal mask.
+ ;; if PAI was set, interrupts were disabled at the same time
+ ;; using the process signal mask.
(inst break pending-interrupt-trap)
- (emit-label ,label))))
+ (emit-label ,label)
+ #!+sb-safepoint
+ ;; In this case, when allocation thinks a GC should be done, it
+ ;; does not mark PA as interrupted, but schedules a safepoint
+ ;; trap instead. Let's take the opportunity to trigger that
+ ;; safepoint right now.
+ (emit-safepoint))))
#!-sb-thread
(defmacro pseudo-atomic (&rest forms)
(with-unique-names (label)
`(let ((,label (gen-label)))
- (inst or (make-ea-for-symbol-value *pseudo-atomic-bits* :byte)
- (fixnumize 1))
+ (inst mov (make-ea-for-symbol-value *pseudo-atomic-bits* :dword)
+ ebp-tn)
,@forms
- (inst xor (make-ea-for-symbol-value *pseudo-atomic-bits* :byte)
- (fixnumize 1))
+ (inst xor (make-ea-for-symbol-value *pseudo-atomic-bits* :dword)
+ ebp-tn)
(inst jmp :z ,label)
- ;; if PAI was set, interrupts were disabled at the same
- ;; time using the process signal mask.
+ ;; if PAI was set, interrupts were disabled at the same time
+ ;; using the process signal mask.
(inst break pending-interrupt-trap)
(emit-label ,label))))
\f
(:result-types ,el-type)
(:generator 5
(move eax old-value)
- #!+sb-thread
- (inst lock)
(let ((ea (sc-case index
(immediate
(make-ea :dword :base object
(make-ea :dword :base object :index index
:disp (- (* ,offset n-word-bytes)
,lowtag))))))
- (inst cmpxchg ea new-value))
+ (inst cmpxchg ea new-value :lock))
(move value eax)))))
(defmacro define-full-reffer (name type offset lowtag scs el-type &optional translate)
collection."
(if objects
(let ((pins (make-gensym-list (length objects)))
- (wpo (block-gensym "WPO")))
+ (wpo (gensym "WITH-PINNED-OBJECTS-THUNK")))
;; BODY is stuffed in a function to preserve the lexical
;; environment.
`(flet ((,wpo () (progn ,@body)))
+ (declare (muffle-conditions compiler-note))
;; PINS are dx-allocated in case the compiler for some
;; unfathomable reason decides to allocate value-cells
;; for them -- since we have DX value-cells on x86oid
`(touch-object ,pin))
pins)))))
`(progn ,@body)))
+
+;;; Helper to hide the fact that thread access on Windows needs one more
+;;; instruction, needs the FS prefix in that instruction _instead_ of
+;;; the actual load/store, and partially hide the resulting need for a
+;;; temporary TN when the non-windows might have have dereferenced an EA
+;;; without a TN as a base.
+
+(defmacro with-tls-ea ((ea-var &key base
+ base-already-live-p
+ (disp-type :constant)
+ (disp 0))
+ &body body)
+ "Execute BODY with various magic. BODY is expected to emit instructions.
+
+ In the body, EA-VAR will be an alias for an EA which BODY can use to
+ perform a thread-local load or store.
+
+ Within the body, :MAYBE-FS will be replaced with :FS or NIL,
+ depending on the target, and needs to be included in any instruction
+ performing an access through the EA.
+
+ DISP-TYPE must be :INDEX, or :CONSTANT, and DISP must be an EA/TN,
+ or an expression returning an integer, respectively.
+
+ BASE must be a temporary TN, except in the following situation: BASE
+ will be unused when DISP-TYPE is constant, BASE-ALREADY-LIVE-P is
+ true, _and_ we're on POSIX. This is an intentional optimization, and
+ the caller needs to take care to ignore the TN in this case, or can
+ omit this parameter.
+
+ BASE-ALREADY-LIVE-P means that at run-time, the BASE register already
+ holds an offset that we should add to instead of overwriting it.
+ The value of the BASE register is undefined following the macro invocation."
+ (check-type base-already-live-p boolean)
+ (check-type disp-type (member :index :constant))
+ #!-(and win32 sb-thread)
+ (let ((body (subst :fs :maybe-fs body)))
+ (ecase disp-type
+ (:constant
+ `(progn
+ ,@(subst (if base-already-live-p
+ ;; use BASE and DISP
+ `(make-ea :dword :base ,base :disp ,disp)
+ ;; BASE not live and not needed, just use DISP
+ `(make-ea :dword :disp ,disp))
+ ea-var
+ body)))
+ (:index
+ ;; need to use BASE in any case; and DISP is an EA
+ `(progn
+ (inst ,(if base-already-live-p 'add 'mov) ,base ,disp)
+ ,@(subst `(make-ea :dword :base ,base)
+ ea-var
+ body)))))
+ #!+(and win32 sb-thread)
+ ;; goes through a temporary register to add the thread address into it
+ (multiple-value-bind (constant-disp ea-disp)
+ (ecase disp-type
+ (:constant (values disp nil))
+ (:index (values 0 disp)))
+ `(progn
+ ,@(when ea-disp
+ `((inst ,(if base-already-live-p 'add 'mov) ,base ,ea-disp)))
+ (inst ,(if (or base-already-live-p ea-disp) 'add 'mov)
+ ,base
+ (make-ea :dword :disp +win32-tib-arbitrary-field-offset+)
+ :fs)
+ ,@(subst `(make-ea :dword :base ,base :disp ,constant-disp)
+ ea-var
+ (subst nil :maybe-fs body)))))