From 77090f53cbf9c0df39e8b052891b84b2c6812676 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Mon, 16 Feb 2009 22:04:26 +0000 Subject: [PATCH] 1.0.25.39: thread start/stop fixes - disable interrupts during create_thread ... to protect against signals (async unwinds, reentrancy, ...) during malloc, pthread code and to make the pinning of INITIAL-FUNCTION effective. Also add checks to new_thread_trampoline. - make-thread: assert gc enabled (to prevent deadlocks) - block blockables while holding LOCK_CREATE_THREAD lock - make dying threads safe from interrupts --- NEWS | 1 + src/code/target-thread.lisp | 65 ++++++++++++++++++++++++++++--------------- src/runtime/thread.c | 40 +++++++++++++------------- version.lisp-expr | 2 +- 4 files changed, 65 insertions(+), 43 deletions(-) diff --git a/NEWS b/NEWS index 0226ff5..5f1f129 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,7 @@ changes in sbcl-1.0.26 relative to 1.0.25: * bug fix: real-time signals are not used anymore, so no more hanging when the system wide real-time signal queue gets full. * bug fix: finalizers, gc hooks never run in a WITHOUT-INTERRUPTS + * bug fix: fix deadlocks related to starting threads changes in sbcl-1.0.25 relative to 1.0.24: * incompatible change: SB-INTROSPECT:FUNCTION-ARGLIST is deprecated, to be diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index a0cf2e0..58d6ffa 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -638,9 +638,6 @@ on this semaphore, then N of them is woken up." #!+sb-thread (defun handle-thread-exit (thread) (/show0 "HANDLING THREAD EXIT") - ;; We're going down, can't handle interrupts sanely anymore. GC - ;; remains enabled. - (block-deferrable-signals) ;; Lisp-side cleanup (with-all-threads-lock (setf (thread-%alive-p thread) nil) @@ -818,28 +815,50 @@ around and can be retrieved by JOIN-THREAD." (format nil "~~@" *current-thread*)) - (unwind-protect - (progn - ;; now that most things have a chance to - ;; work properly without messing up other - ;; threads, it's time to enable signals - (sb!unix::unblock-deferrable-signals) - (setf (thread-result thread) - (cons t - (multiple-value-list - (funcall real-function))))) - (handle-thread-exit thread))))))) + (without-interrupts + (unwind-protect + (with-local-interrupts + ;; Now that most things have a chance + ;; to work properly without messing up + ;; other threads, it's time to enable + ;; signals. + (sb!unix::unblock-deferrable-signals) + (setf (thread-result thread) + (cons t + (multiple-value-list + (funcall real-function)))) + ;; Try to block deferrables. An + ;; interrupt may unwind it, but for a + ;; normal exit it prevents interrupt + ;; loss. + (block-deferrable-signals)) + ;; We're going down, can't handle interrupts + ;; sanely anymore. GC remains enabled. + (block-deferrable-signals) + ;; We don't want to run interrupts in a dead + ;; thread when we leave WITHOUT-INTERRUPTS. + ;; This potentially causes important + ;; interupts to be lost: SIGINT comes to + ;; mind. + (setq *interrupt-pending* nil) + (handle-thread-exit thread)))))))) (values)))) + ;; If the starting thread is stopped for gc before it signals the + ;; semaphore then we'd be stuck. + (assert (not *gc-inhibit*)) ;; Keep INITIAL-FUNCTION pinned until the child thread is - ;; initialized properly. - (with-pinned-objects (initial-function) - (let ((os-thread - (%create-thread - (get-lisp-obj-address initial-function)))) - (when (zerop os-thread) - (error "Can't create a new thread")) - (wait-on-semaphore setup-sem) - thread)))) + ;; initialized properly. Wrap the whole thing in + ;; WITHOUT-INTERRUPTS because we pass INITIAL-FUNCTION to another + ;; thread. + (without-interrupts + (with-pinned-objects (initial-function) + (let ((os-thread + (%create-thread + (get-lisp-obj-address initial-function)))) + (when (zerop os-thread) + (error "Can't create a new thread")) + (wait-on-semaphore setup-sem) + thread))))) (define-condition join-thread-error (error) ((thread :reader join-thread-error-thread :initarg :thread)) diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 8e6c3c2..5a2fa87 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -258,6 +258,8 @@ new_thread_trampoline(struct thread *th) int result, lock_ret; FSHOW((stderr,"/creating thread %lu\n", thread_self())); + check_deferrables_blocked_or_lose(); + check_gc_signals_unblocked_or_lose(); function = th->no_tls_value_marker; th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG; if(arch_os_thread_init(th)==0) { @@ -268,9 +270,9 @@ new_thread_trampoline(struct thread *th) th->os_thread=thread_self(); protect_control_stack_guard_page(1); /* Since GC can only know about this thread from the all_threads - * list and we're just adding this thread to it there is no danger - * of deadlocking even with SIG_STOP_FOR_GC blocked (which it is - * not). */ + * list and we're just adding this thread to it, there is no + * danger of deadlocking even with SIG_STOP_FOR_GC blocked (which + * it is not). */ lock_ret = pthread_mutex_lock(&all_threads_lock); gc_assert(lock_ret == 0); link_thread(th); @@ -501,23 +503,22 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) { /* The new thread inherits the restrictive signal mask set here, * and enables signals again when it is set up properly. */ - sigset_t newset,oldset; + sigset_t oldset; boolean r=1; int retcode = 0, initcode; FSHOW_SIGNAL((stderr,"/create_os_thread: creating new thread\n")); + /* Blocking deferrable signals is enough, no need to block + * SIG_STOP_FOR_GC because the child process is not linked onto + * all_threads until it's ready. */ + thread_sigmask(SIG_BLOCK, &deferrable_sigset, &oldset); + #ifdef LOCK_CREATE_THREAD retcode = pthread_mutex_lock(&create_thread_lock); gc_assert(retcode == 0); FSHOW_SIGNAL((stderr,"/create_os_thread: got lock\n")); #endif - sigemptyset(&newset); - /* Blocking deferrable signals is enough, no need to block - * SIG_STOP_FOR_GC because the child process is not linked onto - * all_threads until it's ready. */ - sigaddset_deferrable(&newset); - thread_sigmask(SIG_BLOCK, &newset, &oldset); if((initcode = pthread_attr_init(th->os_attr)) || /* call_into_lisp_first_time switches the stack for the initial thread. For the @@ -533,32 +534,33 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) r=0; } - thread_sigmask(SIG_SETMASK,&oldset,0); #ifdef LOCK_CREATE_THREAD retcode = pthread_mutex_unlock(&create_thread_lock); gc_assert(retcode == 0); FSHOW_SIGNAL((stderr,"/create_os_thread: released lock\n")); #endif + thread_sigmask(SIG_SETMASK,&oldset,0); return r; } os_thread_t create_thread(lispobj initial_function) { - struct thread *th; - os_thread_t kid_tid; + struct thread *th, *thread = arch_os_get_current_thread(); + os_thread_t kid_tid = 0; + /* Must defend against async unwinds. */ + if (SymbolValue(INTERRUPTS_ENABLED, thread) != NIL) + lose("create_thread is not safe when interrupts are enabled.\n"); + /* Assuming that a fresh thread struct has no lisp objects in it, * linking it to all_threads can be left to the thread itself * without fear of gc lossage. initial_function violates this * assumption and must stay pinned until the child starts up. */ th = create_thread_struct(initial_function); - if(th==0) return 0; - - if (create_os_thread(th,&kid_tid)) { - return kid_tid; - } else { + if (th && !create_os_thread(th,&kid_tid)) { free_thread_struct(th); - return 0; + kid_tid = 0; } + return kid_tid; } int signal_interrupt_thread(os_thread_t os_thread) diff --git a/version.lisp-expr b/version.lisp-expr index 4ece006..1ef8e95 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.25.38" +"1.0.25.39" -- 1.7.10.4