From 0b5119848b6b8713e473fa669356645747e11dbd Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Tue, 4 Oct 2005 17:31:26 +0000 Subject: [PATCH] 0.9.5.20: * thread stacks are freed by the thread that exists next. There is at most one freeable stack at any time. Details: * protect_control_stack_*_page always operates on the current thread * the lisp thread object does not keep a pointer to the C thread struct because it can now be freed at any time * the interruption queue is moved to the lisp thread object * much simpler thread start/stop and gc interaction and locking * STATE_STARTING is removed --- NEWS | 4 +- package-data-list.lisp-expr | 2 +- src/code/exhaust.lisp | 4 +- src/code/target-thread.lisp | 218 ++++++++++++++++------------- src/compiler/generic/genesis.lisp | 3 +- src/compiler/generic/objdef.lisp | 2 - src/compiler/x86-64/parms.lisp | 1 + src/compiler/x86/parms.lisp | 1 + src/runtime/gencgc.c | 4 - src/runtime/interrupt.c | 33 ++--- src/runtime/thread.c | 277 ++++++++++++++----------------------- src/runtime/thread.h | 1 - src/runtime/validate.c | 6 +- src/runtime/validate.h | 5 +- src/runtime/x86-64-arch.h | 11 ++ src/runtime/x86-arch.h | 13 ++ tests/threads.impure.lisp | 21 ++- version.lisp-expr | 2 +- 18 files changed, 282 insertions(+), 326 deletions(-) diff --git a/NEWS b/NEWS index 00ef33b..3d0e521 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ ;;;; -*- coding: utf-8; -*- changes in sbcl-0.9.6 relative to sbcl-0.9.5: - * bug fix: add a workaround to SBCL looping infinitely at startup on + * bug fix: add a workaround to SBCL looping infinitely at startup on Linux kernels with apparently buggy implementations of personality(). (thanks to Svein Ove Aas) * bug fix: Unicode symbols are correctly printed in LDB backtraces @@ -10,6 +10,8 @@ changes in sbcl-0.9.6 relative to sbcl-0.9.5: on platforms supporting dynamic-extent allocation. * enhancement: saving cores with foreign code loaded is now supported on MIPS/Linux in addition to the previously supported platforms. + * bug fix: threads stacks belonging to dead threads are freed by the + next exiting thread, no need to gc to collect thread stacks anymore changes in sbcl-0.9.5 relative to sbcl-0.9.4: * new feature: timers based on Zach Beane's excellent timer package diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 0e993bd..2bae841 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1143,7 +1143,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%WITH-ARRAY-DATA" "%WITH-ARRAY-DATA-MACRO" "*CURRENT-LEVEL-IN-PRINT*" "*EMPTY-TYPE*" "*GC-INHIBIT*" "*GC-PENDING*" - #!+sb-thread"*STOP-FOR-GC-PENDING*" + #!+sb-thread "*STOP-FOR-GC-PENDING*" "*CONTROL-STACK-EXHAUSTION-SAP*" "*UNIVERSAL-TYPE*" "*UNIVERSAL-FUN-TYPE*" "*UNPARSE-FUN-TYPE-SIMPLIFY*" "*WILD-TYPE*" "WORD-LOGICAL-AND" "WORD-LOGICAL-ANDC1" diff --git a/src/code/exhaust.lisp b/src/code/exhaust.lisp index b1cf791..a96112b 100644 --- a/src/code/exhaust.lisp +++ b/src/code/exhaust.lisp @@ -14,8 +14,6 @@ (define-alien-routine ("protect_control_stack_guard_page" %protect-control-stack-guard-page) sb!alien:void - (thread-sap system-area-pointer) (protect-p sb!alien:int)) (defun protect-control-stack-guard-page (n) - (%protect-control-stack-guard-page - (sb!thread::thread-%sap sb!thread:*current-thread*) (if n 1 0))) + (%protect-control-stack-guard-page (if n 1 0))) diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 19e293a..8920729 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -22,7 +22,10 @@ "Thread type. Do not rely on threads being structs as it may change in future versions." name - %sap) + %alive-p + os-thread + interruptions + (interruptions-lock (make-mutex :name "thread interruptions lock"))) #!+sb-doc (setf (sb!kernel:fdocumentation 'thread-name 'function) @@ -38,22 +41,10 @@ in future versions." )) thread) -(defun thread-state (thread) - (let ((state - (sb!sys:sap-int - (sb!sys:sap-ref-sap (thread-%sap thread) - (* sb!vm::thread-state-slot - sb!vm::n-word-bytes))))) - (ecase state - (#.(sb!vm:fixnumize 0) :starting) - (#.(sb!vm:fixnumize 1) :running) - (#.(sb!vm:fixnumize 2) :suspended) - (#.(sb!vm:fixnumize 3) :dead)))) - (defun thread-alive-p (thread) #!+sb-doc "Check if THREAD is running." - (not (eq :dead (thread-state thread)))) + (thread-%alive-p thread)) ;; A thread is eligible for gc iff it has finished and there are no ;; more references to it. This list is supposed to keep a reference to @@ -78,7 +69,8 @@ in future versions." (defun init-initial-thread () (let ((initial-thread (%make-thread :name "initial thread" - :%sap (current-thread-sap)))) + :%alive-p t + :os-thread (current-thread-sap-id)))) (setq *current-thread* initial-thread) ;; Either *all-threads* is empty or it contains exactly one thread ;; in case we are in reinit since saving core with multiple @@ -89,15 +81,18 @@ in future versions." #!+sb-thread (progn + ;; FIXME it would be good to define what a thread id is or isn't + ;; (our current assumption is that it's a fixnum). It so happens + ;; that on Linux it's a pid, but it might not be on posix thread + ;; implementations. (define-alien-routine ("create_thread" %create-thread) - system-area-pointer - (lisp-fun-address unsigned-long)) + unsigned-long (lisp-fun-address unsigned-long)) - (define-alien-routine "block_blockable_signals" - void) + (define-alien-routine "signal_interrupt_thread" + integer (os-thread unsigned-long)) - (define-alien-routine reap-dead-thread void - (thread-sap system-area-pointer)) + (define-alien-routine "block_blockable_signals" + void) (declaim (inline futex-wait futex-wake)) @@ -501,57 +496,60 @@ returns the thread exits." (let* ((thread (%make-thread :name name)) (setup-sem (make-semaphore :name "Thread setup semaphore")) (real-function (coerce function 'function)) - (thread-sap - ;; don't let the child inherit *CURRENT-THREAD* because that - ;; can prevent gc'ing this thread while the child runs - (let ((*current-thread* nil)) - (%create-thread - (sb!kernel:get-lisp-obj-address - (lambda () - ;; in time we'll move some of the binding presently done in C - ;; here too - (let ((*current-thread* thread) - (sb!kernel::*restart-clusters* nil) - (sb!kernel::*handler-clusters* nil) - (sb!kernel::*condition-restarts* nil) - (sb!impl::*descriptor-handlers* nil)) ; serve-event - (wait-on-semaphore setup-sem) - ;; can't use handling-end-of-the-world, because that flushes - ;; output streams, and we don't necessarily have any (or we - ;; could be sharing them) - (unwind-protect - (catch 'sb!impl::toplevel-catcher - (catch 'sb!impl::%end-of-the-world - (with-simple-restart - (terminate-thread - (format nil - "~~@" - *current-thread*)) - ;; now that most things have a chance to - ;; work properly without messing up other - ;; threads, it's time to enable signals - (sb!unix::reset-signal-mask) - (unwind-protect - (funcall real-function) - ;; we're going down, can't handle - ;; interrupts sanely anymore - (let ((sb!impl::*gc-inhibit* t)) - (block-blockable-signals) - ;; and remove what can be the last - ;; reference to this thread - (handle-thread-exit thread)))))) - 0)) - (values))))))) - (when (sb!sys:sap= thread-sap (sb!sys:int-sap 0)) - (error "Can't create a new thread")) - (setf (thread-%sap thread) thread-sap) - (with-mutex (*all-threads-lock*) - (push thread *all-threads*)) - (with-session-lock (*session*) - (push thread (session-threads *session*))) - (signal-semaphore setup-sem) - (sb!impl::finalize thread (lambda () (reap-dead-thread thread-sap))) - thread)) + (initial-function + (lambda () + ;; in time we'll move some of the binding presently done in C + ;; here too + (let ((*current-thread* thread) + (sb!kernel::*restart-clusters* nil) + (sb!kernel::*handler-clusters* nil) + (sb!kernel::*condition-restarts* nil) + (sb!impl::*descriptor-handlers* nil)) ; serve-event + (setf (thread-os-thread thread) (current-thread-sap-id)) + (with-mutex (*all-threads-lock*) + (push thread *all-threads*)) + (with-session-lock (*session*) + (push thread (session-threads *session*))) + (setf (thread-%alive-p thread) t) + (signal-semaphore setup-sem) + ;; can't use handling-end-of-the-world, because that flushes + ;; output streams, and we don't necessarily have any (or we + ;; could be sharing them) + (catch 'sb!impl::toplevel-catcher + (catch 'sb!impl::%end-of-the-world + (with-simple-restart + (terminate-thread + (format nil + "~~@" + *current-thread*)) + (unwind-protect + (progn + ;; now that most things have a chance to + ;; work properly without messing up other + ;; threads, it's time to enable signals + (sb!unix::reset-signal-mask) + (funcall real-function)) + ;; we're going down, can't handle + ;; interrupts sanely anymore + (let ((sb!impl::*gc-inhibit* t)) + (block-blockable-signals) + (setf (thread-%alive-p thread) nil) + (setf (thread-os-thread thread) nil) + ;; and remove what can be the last + ;; reference to this thread + (handle-thread-exit thread))))))) + (values)))) + (sb!sys:with-pinned-objects (initial-function) + (let ((os-thread + ;; don't let the child inherit *CURRENT-THREAD* because that + ;; can prevent gc'ing this thread while the child runs + (let ((*current-thread* nil)) + (%create-thread + (sb!kernel:get-lisp-obj-address initial-function))))) + (when (zerop os-thread) + (error "Can't create a new thread")) + (wait-on-semaphore setup-sem) + thread)))) (defun destroy-thread (thread) #!+sb-doc @@ -559,15 +557,12 @@ returns the thread exits." (terminate-thread thread)) (define-condition interrupt-thread-error (error) - ((thread :reader interrupt-thread-error-thread :initarg :thread) - (errno :reader interrupt-thread-error-errno :initarg :errno)) + ((thread :reader interrupt-thread-error-thread :initarg :thread)) #!+sb-doc (:documentation "Interrupting thread failed.") (:report (lambda (c s) - (format s "interrupt thread ~A failed (~A: ~A)" - (interrupt-thread-error-thread c) - (interrupt-thread-error-errno c) - (strerror (interrupt-thread-error-errno c)))))) + (format s "Interrupt thread failed: thread ~A has exited." + (interrupt-thread-error-thread c))))) #!+sb-doc (setf (sb!kernel:fdocumentation 'interrupt-thread-error-thread 'function) @@ -575,6 +570,24 @@ returns the thread exits." (sb!kernel:fdocumentation 'interrupt-thread-error-errno 'function) "The reason why the interruption failed.") +(defmacro with-interruptions-lock ((thread) &body body) + `(sb!sys:without-interrupts + (with-mutex ((thread-interruptions-lock ,thread)) + ,@body))) + +;; Called from the signal handler. +(defun run-interruption () + (let ((interruption (with-interruptions-lock (*current-thread*) + (pop (thread-interruptions *current-thread*))))) + (funcall interruption))) + +;; The order of interrupt execution is peculiar. If thread A +;; interrupts thread B with I1, I2 and B for some reason receives I1 +;; when FUN2 is already on the list, then it is FUN2 that gets to run +;; first. But when FUN2 is run SIG_INTERRUPT_THREAD is enabled again +;; and I2 hits pretty soon in FUN2 and run FUN1. This is of course +;; just one scenario, and the order of thread interrupt execution is +;; undefined. (defun interrupt-thread (thread function) #!+sb-doc "Interrupt the live THREAD and make it run FUNCTION. A moderate @@ -590,18 +603,14 @@ won't like the effect." #!+sb-thread (if (eq thread *current-thread*) (funcall function) - (let ((function (coerce function 'function))) - (multiple-value-bind (res err) - ;; protect against gcing just when the ub32 address is - ;; just ready to be passed to C - (sb!sys::with-pinned-objects (function) - (sb!unix::syscall ("interrupt_thread" - system-area-pointer sb!alien:unsigned-long) - thread - (thread-%sap thread) - (sb!kernel:get-lisp-obj-address function))) - (unless res - (error 'interrupt-thread-error :thread thread :errno err)))))) + (let ((os-thread (thread-os-thread thread))) + (cond ((not os-thread) + (error 'interrupt-thread-error :thread thread)) + (t + (with-interruptions-lock (thread) + (push function (thread-interruptions thread))) + (when (minusp (signal-interrupt-thread os-thread)) + (error 'interrupt-thread-error :thread thread))))))) (defun terminate-thread (thread) #!+sb-doc @@ -614,11 +623,24 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" ;;; with an SBCL developer first, or are doing something that you ;;; should probably discuss with a professional psychiatrist first #!+sb-thread -(defun symbol-value-in-thread (symbol thread) - (let ((thread-sap (thread-%sap thread))) - (let* ((index (sb!vm::symbol-tls-index symbol)) - (tl-val (sb!sys:sap-ref-word thread-sap - (* sb!vm:n-word-bytes index)))) - (if (eql tl-val sb!vm::no-tls-value-marker-widetag) - (sb!vm::symbol-global-value symbol) - (sb!kernel:make-lisp-obj tl-val))))) +(defun thread-sap-for-id (id) + (let ((thread-sap (alien-sap (extern-alien "all_threads" (* t))))) + (loop + (when (sb!sys:sap= thread-sap (sb!sys:int-sap 0)) (return nil)) + (let ((os-thread (sb!sys:sap-ref-word thread-sap + (* sb!vm:n-word-bytes + sb!vm::thread-os-thread-slot)))) + (print os-thread) + (when (= os-thread id) (return thread-sap)) + (setf thread-sap + (sb!sys:sap-ref-sap thread-sap (* sb!vm:n-word-bytes + sb!vm::thread-next-slot))))))) + +#!+sb-thread +(defun symbol-value-in-thread (symbol thread-sap) + (let* ((index (sb!vm::symbol-tls-index symbol)) + (tl-val (sb!sys:sap-ref-word thread-sap + (* sb!vm:n-word-bytes index)))) + (if (eql tl-val sb!vm::no-tls-value-marker-widetag) + (sb!vm::symbol-global-value symbol) + (sb!kernel:make-lisp-obj tl-val)))) diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp index bb05ce1..1d6941a 100644 --- a/src/compiler/generic/genesis.lisp +++ b/src/compiler/generic/genesis.lisp @@ -1269,7 +1269,8 @@ core and return a descriptor to it." (frob sb!kernel::undefined-alien-function-error) (frob sb!kernel::memory-fault-error) (frob sb!di::handle-breakpoint) - (frob sb!di::handle-fun-end-breakpoint)) + (frob sb!di::handle-fun-end-breakpoint) + #!+sb-thread (frob sb!thread::run-interruption)) (cold-set 'sb!vm::*current-catch-block* (make-fixnum-descriptor 0)) (cold-set 'sb!vm::*current-unwind-protect-block* (make-fixnum-descriptor 0)) diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp index cae8f6f..5ac22cb 100644 --- a/src/compiler/generic/objdef.lisp +++ b/src/compiler/generic/objdef.lisp @@ -423,8 +423,6 @@ (tls-cookie) ; on x86, the LDT index #!+(or x86 x86-64) (pseudo-atomic-atomic) #!+(or x86 x86-64) (pseudo-atomic-interrupted) - (interrupt-fun) - (interrupt-fun-lock :c-type "volatile lispobj") (interrupt-data :c-type "struct interrupt_data *" :length #!+alpha 2 #!-alpha 1) (interrupt-contexts :c-type "os_context_t *" :rest-p t)) diff --git a/src/compiler/x86-64/parms.lisp b/src/compiler/x86-64/parms.lisp index f80bbf2..a6e20fe 100644 --- a/src/compiler/x86-64/parms.lisp +++ b/src/compiler/x86-64/parms.lisp @@ -193,6 +193,7 @@ *gc-inhibit* #!+sb-thread *stop-for-gc-pending* *gc-pending* + #!+sb-thread sb!thread::run-interruption *free-tls-index* *tls-index-lock* diff --git a/src/compiler/x86/parms.lisp b/src/compiler/x86/parms.lisp index 0107ecd..7368aed 100644 --- a/src/compiler/x86/parms.lisp +++ b/src/compiler/x86/parms.lisp @@ -313,6 +313,7 @@ *gc-inhibit* #!+sb-thread *stop-for-gc-pending* *gc-pending* + #!+sb-thread sb!thread::run-interruption *free-tls-index* *tls-index-lock* diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 6b2f6f7..0584403 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -3629,10 +3629,6 @@ garbage_collect_generation(int generation, int raise) scavenge((lispobj *)(interrupt_handlers + i), 1); } } - /* Scavenge the function list for INTERRUPT-THREAD. */ - for_each_thread(th) { - scavenge(&th->interrupt_fun,1); - } /* Scavenge the binding stacks. */ { struct thread *th; diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 73eeed8..4497428 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -874,30 +874,13 @@ void arrange_return_to_lisp_function(os_context_t *context, lispobj function) } #ifdef LISP_FEATURE_SB_THREAD + +/* FIXME: this function can go away when all lisp handlers are invoked + * via arrange_return_to_lisp_function. */ void interrupt_thread_handler(int num, siginfo_t *info, void *v_context) { os_context_t *context = (os_context_t*)arch_os_get_context(&v_context); - /* The order of interrupt execution is peculiar. If thread A - * interrupts thread B with I1, I2 and B for some reason receives - * I1 when FUN2 is already on the list, then it is FUN2 that gets - * to run first. But when FUN2 is run SIG_INTERRUPT_THREAD is - * enabled again and I2 hits pretty soon in FUN2 and run - * FUN1. This is of course just one scenario, and the order of - * thread interrupt execution is undefined. */ - struct thread *th=arch_os_get_current_thread(); - struct cons *c; - lispobj function; - if (th->state != STATE_RUNNING) - lose("interrupt_thread_handler: thread %lu in wrong state: %d\n", - th->os_thread,fixnum_value(th->state)); - get_spinlock(&th->interrupt_fun_lock,(long)th); - c=((struct cons *)native_pointer(th->interrupt_fun)); - function=c->car; - th->interrupt_fun=c->cdr; - release_spinlock(&th->interrupt_fun_lock); - if (function==NIL) - lose("interrupt_thread_handler: NIL function\n"); - arrange_return_to_lisp_function(context,function); + arrange_return_to_lisp_function(context, SymbolFunction(RUN_INTERRUPTION)); } #endif @@ -924,8 +907,8 @@ boolean handle_guard_page_triggered(os_context_t *context,os_vm_address_t addr) * protection so the error handler has some headroom, protect the * previous page so that we can catch returns from the guard page * and restore it. */ - protect_control_stack_guard_page(th,0); - protect_control_stack_return_guard_page(th,1); + protect_control_stack_guard_page(0); + protect_control_stack_return_guard_page(1); arrange_return_to_lisp_function (context, SymbolFunction(CONTROL_STACK_EXHAUSTED_ERROR)); @@ -937,8 +920,8 @@ boolean handle_guard_page_triggered(os_context_t *context,os_vm_address_t addr) * unprotect this one. This works even if we somehow missed * the return-guard-page, and hit it on our way to new * exhaustion instead. */ - protect_control_stack_guard_page(th,1); - protect_control_stack_return_guard_page(th,0); + protect_control_stack_guard_page(1); + protect_control_stack_return_guard_page(0); return 1; } else if (addr >= undefined_alien_address && diff --git a/src/runtime/thread.c b/src/runtime/thread.c index ab0db08..3614c80 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -25,6 +25,13 @@ #define ALIEN_STACK_SIZE (1*1024*1024) /* 1Mb size chosen at random */ +struct freeable_stack { + os_thread_t os_thread; + os_vm_address_t stack; +}; + +static struct freeable_stack * volatile freeable_stack = 0; + int dynamic_values_bytes=4096*sizeof(lispobj); /* same for all threads */ struct thread * volatile all_threads; extern struct interrupt_data * global_interrupt_data; @@ -34,47 +41,34 @@ extern int linux_no_threads_p; pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER; -/* When trying to get all_threads_lock one should make sure that - * sig_stop_for_gc is not blocked. Else there would be a possible - * deadlock: gc locks it, other thread blocks signals, gc sends stop - * request to other thread and waits, other thread blocks on lock. */ -void check_sig_stop_for_gc_can_arrive_or_lose() -{ - /* Get the current sigmask, by blocking the empty set. */ - sigset_t empty,current; - sigemptyset(&empty); - thread_sigmask(SIG_BLOCK, &empty, ¤t); - if (sigismember(¤t,SIG_STOP_FOR_GC)) - lose("SIG_STOP_FOR_GC cannot arrive: it is blocked\n"); - if (SymbolValue(GC_INHIBIT,arch_os_get_current_thread()) != NIL) - lose("SIG_STOP_FOR_GC cannot arrive: gc is inhibited\n"); - if (arch_pseudo_atomic_atomic(NULL)) - lose("SIG_STOP_FOR_GC cannot arrive: in pseudo atomic\n"); -} +#if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64) +extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs); +#endif -#define GET_ALL_THREADS_LOCK(name) \ - { \ - sigset_t _newset,_oldset; \ - sigemptyset(&_newset); \ - sigaddset_deferrable(&_newset); \ - thread_sigmask(SIG_BLOCK, &_newset, &_oldset); \ - check_sig_stop_for_gc_can_arrive_or_lose(); \ - FSHOW_SIGNAL((stderr,"/%s:waiting on lock=%ld, thread=%lu\n",name, \ - all_threads_lock,arch_os_get_current_thread()->os_thread)); \ - pthread_mutex_lock(&all_threads_lock); \ - FSHOW_SIGNAL((stderr,"/%s:got lock, thread=%lu\n", \ - name,arch_os_get_current_thread()->os_thread)); - -#define RELEASE_ALL_THREADS_LOCK(name) \ - FSHOW_SIGNAL((stderr,"/%s:released lock\n",name)); \ - pthread_mutex_unlock(&all_threads_lock); \ - thread_sigmask(SIG_SETMASK,&_oldset,0); \ - } #endif +static void +link_thread(struct thread *th) +{ + th->os_thread=thread_self(); + if (all_threads) all_threads->prev=th; + th->next=all_threads; + th->prev=0; + all_threads=th; + protect_control_stack_guard_page(1); +} -#if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64) -extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs); +#ifdef LISP_FEATURE_SB_THREAD +static void +unlink_thread(struct thread *th) +{ + if (th->prev) + th->prev->next = th->next; + else + all_threads = th->next; + if (th->next) + th->next->prev = th->prev; +} #endif static int @@ -87,9 +81,8 @@ initial_thread_trampoline(struct thread *th) function = th->no_tls_value_marker; th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG; if(arch_os_thread_init(th)==0) return 1; + link_thread(th); - if(th->os_thread < 1) lose("th->os_thread not set up right"); - th->state=STATE_RUNNING; #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64) return call_into_lisp_first_time(function,args,0); #else @@ -97,8 +90,38 @@ initial_thread_trampoline(struct thread *th) #endif } +#define THREAD_STRUCT_SIZE (THREAD_CONTROL_STACK_SIZE + BINDING_STACK_SIZE + \ + ALIEN_STACK_SIZE + dynamic_values_bytes + \ + 32 * SIGSTKSZ) + #ifdef LISP_FEATURE_SB_THREAD +static void +free_thread_stack_later(struct thread *thread_to_be_cleaned_up) +{ + struct freeable_stack *new_freeable_stack = 0; + if (thread_to_be_cleaned_up) { + new_freeable_stack = (struct freeable_stack *) + os_validate(0, sizeof(struct freeable_stack)); + new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread; + new_freeable_stack->stack = (os_vm_address_t) + thread_to_be_cleaned_up->control_stack_start; + } + new_freeable_stack = (struct freeable_stack *) + swap_lispobjs((lispobj *)(void *)&freeable_stack, + (lispobj)new_freeable_stack); + if (new_freeable_stack) { + FSHOW((stderr,"/reaping %lu\n", new_freeable_stack->os_thread)); + /* Under NPTL pthread_join really waits until the thread + * exists and the stack can be safely freed. This is sadly not + * mandated by the pthread spec. */ + gc_assert(pthread_join(new_freeable_stack->os_thread, NULL) == 0); + os_invalidate(new_freeable_stack->stack, THREAD_STRUCT_SIZE); + os_invalidate((os_vm_address_t) new_freeable_stack, + sizeof(struct freeable_stack)); + } +} + /* this is the first thing that runs in the child (which is why the * silly calling convention). Basically it calls the user's requested * lisp function after doing arch_os_thread_init and whatever other @@ -109,6 +132,7 @@ new_thread_trampoline(struct thread *th) { lispobj function; int result; + FSHOW((stderr,"/creating thread %lu\n", thread_self())); function = th->no_tls_value_marker; th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG; if(arch_os_thread_init(th)==0) { @@ -116,22 +140,32 @@ new_thread_trampoline(struct thread *th) lose("arch_os_thread_init failed\n"); } - /* wait here until our thread is linked into all_threads: see below */ - { - volatile os_thread_t *tid=&th->os_thread; - while(*tid<1) sched_yield(); - } + /* Since GC can only know about this thread from the all_threads + * list and we're just adding this thread to it there is no danger + * of deadlocking even with SIG_STOP_FOR_GC blocked. */ + pthread_mutex_lock(&all_threads_lock); + link_thread(th); + pthread_mutex_unlock(&all_threads_lock); - th->state=STATE_RUNNING; result = funcall0(function); th->state=STATE_DEAD; + + /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this + * thread, but since we are already dead it won't wait long. */ + pthread_mutex_lock(&all_threads_lock); + unlink_thread(th); + pthread_mutex_unlock(&all_threads_lock); + + gc_alloc_update_page_tables(0, &th->alloc_region); + if(th->tls_cookie>=0) arch_os_thread_cleanup(th); + os_invalidate((os_vm_address_t)th->interrupt_data, + (sizeof (struct interrupt_data))); + free_thread_stack_later(th); + FSHOW((stderr,"/exiting thread %lu\n", thread_self())); return result; } -#endif /* LISP_FEATURE_SB_THREAD */ -#define THREAD_STRUCT_SIZE (THREAD_CONTROL_STACK_SIZE + BINDING_STACK_SIZE + \ - ALIEN_STACK_SIZE + dynamic_values_bytes + \ - 32 * SIGSTKSZ) +#endif /* LISP_FEATURE_SB_THREAD */ static void free_thread_struct(struct thread *th) @@ -209,9 +243,7 @@ create_thread_struct(lispobj initial_function) { th->binding_stack_pointer=th->binding_stack_start; th->this=th; th->os_thread=0; - th->interrupt_fun=NIL; - th->interrupt_fun_lock=0; - th->state=STATE_STARTING; + th->state=STATE_RUNNING; #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD th->alien_stack_pointer=((void *)th->alien_stack_start + ALIEN_STACK_SIZE-N_WORD_BYTES); @@ -267,29 +299,10 @@ create_thread_struct(lispobj initial_function) { return th; } -static void -link_thread(struct thread *th,os_thread_t kid_tid) -{ - if (all_threads) all_threads->prev=th; - th->next=all_threads; - th->prev=0; - all_threads=th; - /* note that th->os_thread is 0 at this time. We rely on - * all_threads_lock to ensure that we don't have >1 thread with - * os_thread=0 on the list at once - */ - protect_control_stack_guard_page(th,1); - /* child will not start until this is set */ - th->os_thread=kid_tid; - FSHOW((stderr,"/created thread %lu\n",kid_tid)); -} - void create_initial_thread(lispobj initial_function) { struct thread *th=create_thread_struct(initial_function); - os_thread_t kid_tid=thread_self(); - if(th && kid_tid>0) { - link_thread(th,kid_tid); - initial_thread_trampoline(all_threads); /* no return */ + if(th) { + initial_thread_trampoline(th); /* no return */ } else lose("can't create initial thread"); } @@ -308,9 +321,9 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) sigset_t newset,oldset; boolean r=1; sigemptyset(&newset); - /* Blocking deferrable signals is enough, since gc_stop_the_world - * waits until the child leaves STATE_STARTING. And why not let gc - * proceed as soon as possible? */ + /* Blocking deferrable signals is enough, no need to block + * SIG_STOP_FOR_GC because the child process is not linked onto + * all_threads until it's ready. */ sigaddset_deferrable(&newset); thread_sigmask(SIG_BLOCK, &newset, &oldset); @@ -324,61 +337,21 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) return r; } -struct thread *create_thread(lispobj initial_function) { +os_thread_t create_thread(lispobj initial_function) { struct thread *th; - os_thread_t kid_tid=0; - boolean success; + os_thread_t kid_tid; if(linux_no_threads_p) return 0; th=create_thread_struct(initial_function); if(th==0) return 0; - /* we must not be interrupted here after a successful - * create_os_thread, because the kid will be waiting for its - * thread struct to be linked */ - GET_ALL_THREADS_LOCK("create_thread") - - success=create_os_thread(th,&kid_tid); - if (success) - link_thread(th,kid_tid); - else + if (create_os_thread(th,&kid_tid)) { + return kid_tid; + } else { free_thread_struct(th); - - RELEASE_ALL_THREADS_LOCK("create_thread") - - if (success) - return th; - else return 0; -} - -/* called from lisp from the thread object finalizer */ -void reap_dead_thread(struct thread *th) -{ - if(th->state!=STATE_DEAD) - lose("thread %p is not joinable, state=%d\n",th,th->state); -#ifdef LISP_FEATURE_GENCGC - { - sigset_t newset,oldset; - sigemptyset(&newset); - sigaddset_blockable(&newset); - thread_sigmask(SIG_BLOCK, &newset, &oldset); - gc_alloc_update_page_tables(0, &th->alloc_region); - thread_sigmask(SIG_SETMASK,&oldset,0); } -#endif - GET_ALL_THREADS_LOCK("reap_dead_thread") - FSHOW((stderr,"/reap_dead_thread: reaping %lu\n",th->os_thread)); - if(th->prev) - th->prev->next=th->next; - else all_threads=th->next; - if(th->next) - th->next->prev=th->prev; - RELEASE_ALL_THREADS_LOCK("reap_dead_thread") - if(th->tls_cookie>=0) arch_os_thread_cleanup(th); - gc_assert(pthread_join(th->os_thread,NULL)==0); - free_thread_struct(th); } /* Send the signo to os_thread, retry if the rt signal queue is @@ -402,56 +375,16 @@ static int kill_thread_safely(os_thread_t os_thread, int signo) return r; } -int interrupt_thread(struct thread *th, lispobj function) +int signal_interrupt_thread(os_thread_t os_thread) { - /* In clone_threads, if A and B both interrupt C at approximately - * the same time, it does not matter: the second signal will be - * masked until the handler has returned from the first one. In - * pthreads though, we can't put the knowledge of what function to - * call into the siginfo, so we have to store it in the - * destination thread, and do it in such a way that A won't - * clobber B's interrupt. Hence, this stupid linked list. - * - * This does depend on SIG_INTERRUPT_THREAD being queued (as POSIX - * RT signals are): we need to keep interrupt_fun data for exactly - * as many signals as are going to be received by the destination - * thread. - */ - lispobj c=alloc_cons(function,NIL); - sigset_t newset,oldset; - sigemptyset(&newset); - /* interrupt_thread_handler locks this spinlock with blockables - * blocked (it does so for the sake of - * arrange_return_to_lisp_function), so we must also block them or - * else SIG_STOP_FOR_GC and all_threads_lock will find a way to - * deadlock. */ - sigaddset_blockable(&newset); - thread_sigmask(SIG_BLOCK, &newset, &oldset); - if (th == arch_os_get_current_thread()) - lose("cannot interrupt current thread"); - get_spinlock(&th->interrupt_fun_lock, - (long)arch_os_get_current_thread()); - ((struct cons *)native_pointer(c))->cdr=th->interrupt_fun; - th->interrupt_fun=c; - release_spinlock(&th->interrupt_fun_lock); - thread_sigmask(SIG_SETMASK,&oldset,0); - /* Called from lisp with the thread object as a parameter. Thus, - * the object cannot be garbage collected and consequently reaped - * and joined. Because it's not joined, kill should work (even if - * the thread has died/exited). */ - { - int status=kill_thread_safely(th->os_thread,SIG_INTERRUPT_THREAD); - if (status==0) { - return 0; - } else if (status==ESRCH) { - /* This thread has exited. */ - th->interrupt_fun=NIL; - errno=ESRCH; - return -1; - } else { - lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s", - th->os_thread,status,strerror(status)); - } + int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD); + if (status == 0) { + return 0; + } else if (status == ESRCH) { + return -1; + } else { + lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s", + os_thread, status, strerror(status)); } } @@ -476,8 +409,7 @@ void gc_stop_the_world() th->os_thread)); /* stop all other threads by sending them SIG_STOP_FOR_GC */ for(p=all_threads; p; p=p->next) { - while(p->state==STATE_STARTING) sched_yield(); - if((p!=th) && (p->state==STATE_RUNNING)) { + if((p!=th) && ((p->state==STATE_RUNNING))) { FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending %lu\n", p->os_thread)); status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC); @@ -494,7 +426,6 @@ void gc_stop_the_world() /* wait for the running threads to stop or finish */ for(p=all_threads;p;) { gc_assert(p->os_thread!=0); - gc_assert(p->state!=STATE_STARTING); if((p==th) || (p->state==STATE_SUSPENDED) || (p->state==STATE_DEAD)) { p=p->next; diff --git a/src/runtime/thread.h b/src/runtime/thread.h index 3f823cc..37325b8 100644 --- a/src/runtime/thread.h +++ b/src/runtime/thread.h @@ -18,7 +18,6 @@ struct alloc_region { }; #include "genesis/static-symbols.h" #include "genesis/thread.h" -#define STATE_STARTING (make_fixnum(0)) #define STATE_RUNNING (make_fixnum(1)) #define STATE_SUSPENDED (make_fixnum(2)) #define STATE_DEAD (make_fixnum(3)) diff --git a/src/runtime/validate.c b/src/runtime/validate.c index e11b0bf..11b970f 100644 --- a/src/runtime/validate.c +++ b/src/runtime/validate.c @@ -81,14 +81,16 @@ validate(void) } void -protect_control_stack_guard_page(struct thread *th, int protect_p) { +protect_control_stack_guard_page(int protect_p) { + struct thread *th = arch_os_get_current_thread(); os_protect(CONTROL_STACK_GUARD_PAGE(th), os_vm_page_size,protect_p ? (OS_VM_PROT_READ|OS_VM_PROT_EXECUTE) : OS_VM_PROT_ALL); } void -protect_control_stack_return_guard_page(struct thread *th, int protect_p) { +protect_control_stack_return_guard_page(int protect_p) { + struct thread *th = arch_os_get_current_thread(); os_protect(CONTROL_STACK_RETURN_GUARD_PAGE(th), os_vm_page_size,protect_p ? (OS_VM_PROT_READ|OS_VM_PROT_EXECUTE) : OS_VM_PROT_ALL); diff --git a/src/runtime/validate.h b/src/runtime/validate.h index ed09ac3..05f58e8 100644 --- a/src/runtime/validate.h +++ b/src/runtime/validate.h @@ -49,9 +49,8 @@ #endif extern void validate(void); -extern void protect_control_stack_guard_page(struct thread *th, int protect_p); -extern void protect_control_stack_return_guard_page(struct thread *th, - int protect_p); +extern void protect_control_stack_guard_page(int protect_p); +extern void protect_control_stack_return_guard_page(int protect_p); extern os_vm_address_t undefined_alien_address; #endif diff --git a/src/runtime/x86-64-arch.h b/src/runtime/x86-64-arch.h index 90b5150..2085503 100644 --- a/src/runtime/x86-64-arch.h +++ b/src/runtime/x86-64-arch.h @@ -43,4 +43,15 @@ release_spinlock(volatile lispobj *word) *word=0; } +static inline lispobj +swap_lispobjs(volatile lispobj *dest, lispobj value) +{ + lispobj old_value; + asm ("lock xchg %0,(%1)" + : "=r" (old_value) + : "r" (dest), "0" (value) + : "memory"); + return old_value; +} + #endif /* _X86_64_ARCH_H */ diff --git a/src/runtime/x86-arch.h b/src/runtime/x86-arch.h index b4ce701..f8174c2 100644 --- a/src/runtime/x86-arch.h +++ b/src/runtime/x86-arch.h @@ -43,4 +43,17 @@ release_spinlock(volatile lispobj *word) *word=0; } +#include + +static inline lispobj +swap_lispobjs(volatile lispobj *dest, lispobj value) +{ + lispobj old_value; + asm ("lock xchg %0,(%1)" + : "=r" (old_value) + : "r" (dest), "0" (value) + : "memory"); + return old_value; +} + #endif /* _X86_ARCH_H */ diff --git a/tests/threads.impure.lisp b/tests/threads.impure.lisp index f9bec10..b4e5bba 100644 --- a/tests/threads.impure.lisp +++ b/tests/threads.impure.lisp @@ -313,7 +313,7 @@ (interrupt-thread c (lambda () (princ ".") (force-output) - (assert (eq (thread-state *current-thread*) :running)) + (assert (thread-alive-p *current-thread*)) (assert (zerop SB-KERNEL:*PSEUDO-ATOMIC-ATOMIC*))))) (terminate-thread c) (wait-for-threads (list c))) @@ -460,22 +460,21 @@ (format t "~&session lock test done~%") -(sb-ext:gc :full t) -(loop repeat 20 do - (wait-for-threads - (loop for i below 100 collect - (sb-thread:make-thread (lambda ())))) - (sb-ext:gc :full t) - (princ "+") - (force-output)) +(wait-for-threads + (loop for i below 2000 collect + (sb-thread:make-thread (lambda ())))) (format t "~&creation test done~%") ;; watch out for *current-thread* being the parent thread after exit -(let ((thread (sb-thread:make-thread (lambda ())))) +(let* (sap + (thread (sb-thread:make-thread + (lambda () + (setq sap (thread-sap-for-id + (thread-os-thread *current-thread*))))))) (wait-for-threads (list thread)) (assert (null (symbol-value-in-thread 'sb-thread:*current-thread* - thread)))) + sap)))) ;; interrupt handlers are per-thread with pthreads, make sure the ;; handler installed in one thread is global diff --git a/version.lisp-expr b/version.lisp-expr index 24a86dd..da33990 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.5.19" +"0.9.5.20" -- 1.7.10.4