From 1dd3616e9eadaba9f1ca86b72d64551fbd75f399 Mon Sep 17 00:00:00 2001 From: David Lichteblau Date: Wed, 13 Jul 2011 18:01:45 +0200 Subject: [PATCH] Port WITH-TLS-EA and other remaining FS prefix uses to Windows. - current-thread-offset-sap - emit-single-step-test - Allocation routines - Disable PSEUDO-ATOMIC on threaded Windows entirely instead of changing TLS uses of pseudo-atomic-bits. We would need a temporary register for those changes, and Windows threading is all safepoint-based, without the need to support asynchronous signals. Thanks to Dmitry Kalyanov and Anton Kovalenko. --- src/compiler/x86/call.lisp | 6 ++-- src/compiler/x86/macros.lisp | 76 ++++++++++++++++++++++++++++++++++++------ src/compiler/x86/parms.lisp | 3 ++ src/compiler/x86/system.lisp | 11 +++++- src/runtime/gencgc.c | 6 ++++ src/runtime/x86-assem.S | 28 ++++++++++------ 6 files changed, 106 insertions(+), 24 deletions(-) diff --git a/src/compiler/x86/call.lisp b/src/compiler/x86/call.lisp index d9fdc1a..a8e09c6 100644 --- a/src/compiler/x86/call.lisp +++ b/src/compiler/x86/call.lisp @@ -1465,10 +1465,12 @@ ;; register on -SB-THREAD. #!+sb-thread (progn - (with-tls-ea (EA :base :unused + #!+win32 (inst push eax-tn) + (with-tls-ea (EA :base #!+win32 eax-tn #!-win32 :unused :disp-type :constant :disp (* thread-stepping-slot n-word-bytes)) - (inst cmp EA nil-value :maybe-fs))) + (inst cmp EA nil-value :maybe-fs)) + #!+win32 (inst pop eax-tn)) #!-sb-thread (inst cmp (make-ea-for-symbol-value sb!impl::*stepping*) nil-value)) diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index b218fa9..f0ed91e 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -221,8 +221,15 @@ :foreign)))) (defun allocation-inline (alloc-tn size) - (let ((ok (gen-label)) + (let* ((ok (gen-label)) ;reindent after merging (done (gen-label)) + #!+(and sb-thread win32) + (scratch-tns (loop for my-tn in `(,eax-tn ,ebx-tn ,edx-tn ,ecx-tn) + when (and (not (location= alloc-tn my-tn)) + (or (not (tn-p size)) + (not (location= size my-tn)))) + collect my-tn)) + (tls-prefix #!+sb-thread :fs #!-sb-thread nil) (free-pointer (make-ea :dword :disp #!+sb-thread (* n-word-bytes thread-alloc-region-slot) @@ -232,11 +239,23 @@ (make-ea :dword :disp #!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot)) #!-sb-thread (make-fixup "boxed_region" :foreign 4) - :scale 1))) ; thread->alloc_region.end_addr + :scale 1)) ; thread->alloc_region.end_addr + #!+(and sb-thread win32) (scratch-tn (pop scratch-tns)) + #!+(and sb-thread win32) (swap-tn (pop scratch-tns))) (unless (and (tn-p size) (location= alloc-tn size)) (inst mov alloc-tn size)) - (inst add alloc-tn free-pointer #!+sb-thread :fs) - (inst cmp alloc-tn end-addr #!+sb-thread :fs) + #!+(and sb-thread win32) + (progn + (inst push scratch-tn) + (inst push swap-tn) + (inst mov scratch-tn + (make-ea :dword :disp + +win32-tib-arbitrary-field-offset+) tls-prefix) + (setf (ea-base free-pointer) scratch-tn + (ea-base end-addr) scratch-tn + tls-prefix nil)) + (inst add alloc-tn free-pointer tls-prefix) + (inst cmp alloc-tn end-addr tls-prefix) (inst jmp :be ok) (let ((dst (ecase (tn-offset alloc-tn) (#.eax-offset "alloc_overflow_eax") @@ -251,15 +270,26 @@ ;; Swap ALLOC-TN and FREE-POINTER (cond ((and (tn-p size) (location= alloc-tn size)) ;; XCHG is extremely slow, use the xor swap trick - (inst xor alloc-tn free-pointer #!+sb-thread :fs) - (inst xor free-pointer alloc-tn #!+sb-thread :fs) - (inst xor alloc-tn free-pointer #!+sb-thread :fs)) + #!-(and sb-thread win32) + (progn + (inst xor alloc-tn free-pointer tls-prefix) + (inst xor free-pointer alloc-tn tls-prefix) + (inst xor alloc-tn free-pointer tls-prefix)) + #!+(and sb-thread win32) + (progn + (inst mov swap-tn free-pointer tls-prefix) + (inst mov free-pointer alloc-tn tls-prefix) + (inst mov alloc-tn swap-tn))) (t ;; It's easier if SIZE is still available. - (inst mov free-pointer alloc-tn #!+sb-thread :fs) + (inst mov free-pointer alloc-tn tls-prefix) (inst sub alloc-tn size))) - (emit-label done)) - (values)) + (emit-label done) + #!+(and sb-thread win32) + (progn + (inst pop swap-tn) + (inst pop scratch-tn)) + (values))) ;;; Emit code to allocate an object with a size in bytes given by @@ -273,6 +303,7 @@ ;;; (FIXME: so why aren't we asserting this?) (defun allocation (alloc-tn size &optional inline dynamic-extent lowtag) + (declare (ignorable inline)) (cond (dynamic-extent (allocation-dynamic-extent alloc-tn size lowtag)) @@ -366,6 +397,9 @@ ;;; pa section. #!+sb-thread (defmacro %clear-pseudo-atomic () + #!+win32 + `(progn) + #!-win32 '(inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot)) 0 :fs)) #!+sb-safepoint @@ -375,6 +409,9 @@ #!+sb-thread (defmacro pseudo-atomic (&rest forms) + #!+win32 + `(progn ,@forms (emit-safepoint)) + #!-win32 (with-unique-names (label) `(let ((,label (gen-label))) (inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot)) @@ -624,6 +661,7 @@ collection." The value of the BASE register is undefined following the macro invocation." (check-type base-already-live-p boolean) (check-type disp-type (member :index :constant)) + #!-(and win32 sb-thread) (let ((body (subst :fs :maybe-fs body))) (ecase disp-type (:constant @@ -641,4 +679,20 @@ collection." (inst ,(if base-already-live-p 'add 'mov) ,base ,disp) ,@(subst `(make-ea :dword :base ,base) ea-var - body)))))) + body))))) + #!+(and win32 sb-thread) + ;; goes through a temporary register to add the thread address into it + (multiple-value-bind (constant-disp ea-disp) + (ecase disp-type + (:constant (values disp nil)) + (:index (values 0 disp))) + `(progn + ,@(when ea-disp + `((inst ,(if base-already-live-p 'add 'mov) ,base ,ea-disp))) + (inst ,(if (or base-already-live-p ea-disp) 'add 'mov) + ,base + (make-ea :dword :disp +win32-tib-arbitrary-field-offset+) + :fs) + ,@(subst `(make-ea :dword :base ,base :disp ,constant-disp) + ea-var + (subst nil :maybe-fs body))))) diff --git a/src/compiler/x86/parms.lisp b/src/compiler/x86/parms.lisp index fc51de6..0fd0dca 100644 --- a/src/compiler/x86/parms.lisp +++ b/src/compiler/x86/parms.lisp @@ -383,3 +383,6 @@ ;;; FIXME: Is this used? Delete it or document it. ;;; cf the sparc PARMS.LISP (defparameter *assembly-unit-length* 8) + +#!+win32 +(defconstant +win32-tib-arbitrary-field-offset+ #.(+ #xE10 (* 4 63))) diff --git a/src/compiler/x86/system.lisp b/src/compiler/x86/system.lisp index 0cd90a3..6ee2c63 100644 --- a/src/compiler/x86/system.lisp +++ b/src/compiler/x86/system.lisp @@ -270,10 +270,19 @@ (:results (sap :scs (sap-reg))) (:result-types system-area-pointer) (:translate current-thread-offset-sap) - (:args (n :scs (unsigned-reg) :target sap)) + (:args (n :scs (unsigned-reg) + #!+win32 #!+win32 :to :save + #!-win32 #!-win32 :target sap)) (:arg-types unsigned-num) (:policy :fast-safe) (:generator 2 + #!+win32 + (progn + ;; Note that SAP conflicts with N in this case, hence the reader + ;; conditionals above. + (inst mov sap (make-ea :dword :disp +win32-tib-arbitrary-field-offset+) :fs) + (inst mov sap (make-ea :dword :base sap :disp 0 :index n :scale 4))) + #!-win32 (inst mov sap (make-ea :dword :disp 0 :index n :scale 4) :fs))) (define-vop (halt) diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index c53d574..823410c 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -1255,10 +1255,12 @@ gc_heap_exhausted_error_or_lose (long available, long requested) else { /* FIXME: assert free_pages_lock held */ (void)thread_mutex_unlock(&free_pages_lock); +#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD)) gc_assert(get_pseudo_atomic_atomic(thread)); clear_pseudo_atomic_atomic(thread); if (get_pseudo_atomic_interrupted(thread)) do_pending_interrupt(); +#endif /* Another issue is that signalling HEAP-EXHAUSTED error leads * to running user code at arbitrary places, even in a * WITHOUT-INTERRUPTS which may lead to a deadlock without @@ -4181,8 +4183,10 @@ general_alloc_internal(long nbytes, int page_type_flag, struct alloc_region *reg gc_assert((((unsigned long)region->free_pointer & LOWTAG_MASK) == 0) && ((nbytes & LOWTAG_MASK) == 0)); +#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD)) /* Must be inside a PA section. */ gc_assert(get_pseudo_atomic_atomic(thread)); +#endif if (nbytes > large_allocation) large_allocation = nbytes; @@ -4284,7 +4288,9 @@ general_alloc(long nbytes, int page_type_flag) lispobj * alloc(long nbytes) { +#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD)) gc_assert(get_pseudo_atomic_atomic(arch_os_get_current_thread())); +#endif return general_alloc(nbytes, BOXED_PAGE_FLAG); } diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 6723b67..4987db6 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -712,10 +712,18 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16) #define START_REGION GNAME(boxed_region) #endif -#define ALLOC_OVERFLOW(size) \ - /* Calculate the size for the allocation. */ \ - subl START_REGION,size; \ - ALLOC(size) +#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32) +#define ALLOC_OVERFLOW(size,scratch) \ + movl SBCL_THREAD_BASE_EA, scratch; \ + /* Calculate the size for the allocation. */ \ + subl THREAD_ALLOC_REGION_OFFSET(scratch),size; \ + ALLOC(size) +#else +#define ALLOC_OVERFLOW(size,scratch) \ + /* Calculate the size for the allocation. */ \ + subl START_REGION,size; \ + ALLOC(size) +#endif /* This routine handles an overflow with eax=crfp+size. So the size=eax-crfp. */ @@ -725,7 +733,7 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16) GNAME(alloc_overflow_eax): pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%eax) + ALLOC_OVERFLOW(%eax,%edx) popl %edx # Restore edx. popl %ecx # Restore ecx. ret @@ -737,7 +745,7 @@ GNAME(alloc_overflow_eax): GNAME(alloc_overflow_ecx): pushl %eax # Save eax pushl %edx # Save edx - ALLOC_OVERFLOW(%ecx) + ALLOC_OVERFLOW(%ecx,%edx) movl %eax,%ecx # setup the destination. popl %edx # Restore edx. popl %eax # Restore eax. @@ -750,7 +758,7 @@ GNAME(alloc_overflow_ecx): GNAME(alloc_overflow_edx): pushl %eax # Save eax pushl %ecx # Save ecx - ALLOC_OVERFLOW(%edx) + ALLOC_OVERFLOW(%edx,%ecx) movl %eax,%edx # setup the destination. popl %ecx # Restore ecx. popl %eax # Restore eax. @@ -766,7 +774,7 @@ GNAME(alloc_overflow_ebx): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%ebx) + ALLOC_OVERFLOW(%ebx,%edx) movl %eax,%ebx # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -783,7 +791,7 @@ GNAME(alloc_overflow_esi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%esi) + ALLOC_OVERFLOW(%esi,%edx) movl %eax,%esi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. @@ -798,7 +806,7 @@ GNAME(alloc_overflow_edi): pushl %eax # Save eax pushl %ecx # Save ecx pushl %edx # Save edx - ALLOC_OVERFLOW(%edi) + ALLOC_OVERFLOW(%edi,%edx) movl %eax,%edi # setup the destination. popl %edx # Restore edx. popl %ecx # Restore ecx. -- 1.7.10.4