1.0.25.39: thread start/stop fixes
authorGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 22:04:26 +0000 (22:04 +0000)
committerGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 22:04:26 +0000 (22:04 +0000)
- disable interrupts during create_thread

  ... to protect against signals (async unwinds, reentrancy, ...)
  during malloc, pthread code and to make the pinning of
  INITIAL-FUNCTION effective. Also add checks to
  new_thread_trampoline.

- make-thread: assert gc enabled (to prevent deadlocks)

- block blockables while holding LOCK_CREATE_THREAD lock

- make dying threads safe from interrupts

NEWS
src/code/target-thread.lisp
src/runtime/thread.c
version.lisp-expr

diff --git a/NEWS b/NEWS
index 0226ff5..5f1f129 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ changes in sbcl-1.0.26 relative to 1.0.25:
   * bug fix: real-time signals are not used anymore, so no more
     hanging when the system wide real-time signal queue gets full.
   * bug fix: finalizers, gc hooks never run in a WITHOUT-INTERRUPTS
+  * bug fix: fix deadlocks related to starting threads
 
 changes in sbcl-1.0.25 relative to 1.0.24:
   * incompatible change: SB-INTROSPECT:FUNCTION-ARGLIST is deprecated, to be
index a0cf2e0..58d6ffa 100644 (file)
@@ -638,9 +638,6 @@ on this semaphore, then N of them is woken up."
 #!+sb-thread
 (defun handle-thread-exit (thread)
   (/show0 "HANDLING THREAD EXIT")
-  ;; We're going down, can't handle interrupts sanely anymore. GC
-  ;; remains enabled.
-  (block-deferrable-signals)
   ;; Lisp-side cleanup
   (with-all-threads-lock
     (setf (thread-%alive-p thread) nil)
@@ -818,28 +815,50 @@ around and can be retrieved by JOIN-THREAD."
                          (format nil
                                  "~~@<Terminate this thread (~A)~~@:>"
                                  *current-thread*))
-                      (unwind-protect
-                           (progn
-                             ;; now that most things have a chance to
-                             ;; work properly without messing up other
-                             ;; threads, it's time to enable signals
-                             (sb!unix::unblock-deferrable-signals)
-                             (setf (thread-result thread)
-                                   (cons t
-                                         (multiple-value-list
-                                          (funcall real-function)))))
-                        (handle-thread-exit thread)))))))
+                      (without-interrupts
+                        (unwind-protect
+                             (with-local-interrupts
+                               ;; Now that most things have a chance
+                               ;; to work properly without messing up
+                               ;; other threads, it's time to enable
+                               ;; signals.
+                               (sb!unix::unblock-deferrable-signals)
+                               (setf (thread-result thread)
+                                     (cons t
+                                           (multiple-value-list
+                                            (funcall real-function))))
+                               ;; Try to block deferrables. An
+                               ;; interrupt may unwind it, but for a
+                               ;; normal exit it prevents interrupt
+                               ;; loss.
+                               (block-deferrable-signals))
+                          ;; We're going down, can't handle interrupts
+                          ;; sanely anymore. GC remains enabled.
+                          (block-deferrable-signals)
+                          ;; We don't want to run interrupts in a dead
+                          ;; thread when we leave WITHOUT-INTERRUPTS.
+                          ;; This potentially causes important
+                          ;; interupts to be lost: SIGINT comes to
+                          ;; mind.
+                          (setq *interrupt-pending* nil)
+                          (handle-thread-exit thread))))))))
             (values))))
+    ;; If the starting thread is stopped for gc before it signals the
+    ;; semaphore then we'd be stuck.
+    (assert (not *gc-inhibit*))
     ;; Keep INITIAL-FUNCTION pinned until the child thread is
-    ;; initialized properly.
-    (with-pinned-objects (initial-function)
-      (let ((os-thread
-             (%create-thread
-              (get-lisp-obj-address initial-function))))
-        (when (zerop os-thread)
-          (error "Can't create a new thread"))
-        (wait-on-semaphore setup-sem)
-        thread))))
+    ;; initialized properly. Wrap the whole thing in
+    ;; WITHOUT-INTERRUPTS because we pass INITIAL-FUNCTION to another
+    ;; thread.
+    (without-interrupts
+      (with-pinned-objects (initial-function)
+        (let ((os-thread
+               (%create-thread
+                (get-lisp-obj-address initial-function))))
+          (when (zerop os-thread)
+            (error "Can't create a new thread"))
+          (wait-on-semaphore setup-sem)
+          thread)))))
 
 (define-condition join-thread-error (error)
   ((thread :reader join-thread-error-thread :initarg :thread))
index 8e6c3c2..5a2fa87 100644 (file)
@@ -258,6 +258,8 @@ new_thread_trampoline(struct thread *th)
     int result, lock_ret;
 
     FSHOW((stderr,"/creating thread %lu\n", thread_self()));
+    check_deferrables_blocked_or_lose();
+    check_gc_signals_unblocked_or_lose();
     function = th->no_tls_value_marker;
     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
     if(arch_os_thread_init(th)==0) {
@@ -268,9 +270,9 @@ new_thread_trampoline(struct thread *th)
     th->os_thread=thread_self();
     protect_control_stack_guard_page(1);
     /* Since GC can only know about this thread from the all_threads
-     * list and we're just adding this thread to it there is no danger
-     * of deadlocking even with SIG_STOP_FOR_GC blocked (which it is
-     * not). */
+     * list and we're just adding this thread to it, there is no
+     * danger of deadlocking even with SIG_STOP_FOR_GC blocked (which
+     * it is not). */
     lock_ret = pthread_mutex_lock(&all_threads_lock);
     gc_assert(lock_ret == 0);
     link_thread(th);
@@ -501,23 +503,22 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid)
 {
     /* The new thread inherits the restrictive signal mask set here,
      * and enables signals again when it is set up properly. */
-    sigset_t newset,oldset;
+    sigset_t oldset;
     boolean r=1;
     int retcode = 0, initcode;
 
     FSHOW_SIGNAL((stderr,"/create_os_thread: creating new thread\n"));
 
+    /* Blocking deferrable signals is enough, no need to block
+     * SIG_STOP_FOR_GC because the child process is not linked onto
+     * all_threads until it's ready. */
+    thread_sigmask(SIG_BLOCK, &deferrable_sigset, &oldset);
+
 #ifdef LOCK_CREATE_THREAD
     retcode = pthread_mutex_lock(&create_thread_lock);
     gc_assert(retcode == 0);
     FSHOW_SIGNAL((stderr,"/create_os_thread: got lock\n"));
 #endif
-    sigemptyset(&newset);
-    /* Blocking deferrable signals is enough, no need to block
-     * SIG_STOP_FOR_GC because the child process is not linked onto
-     * all_threads until it's ready. */
-    sigaddset_deferrable(&newset);
-    thread_sigmask(SIG_BLOCK, &newset, &oldset);
 
     if((initcode = pthread_attr_init(th->os_attr)) ||
        /* call_into_lisp_first_time switches the stack for the initial thread. For the
@@ -533,32 +534,33 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid)
         r=0;
     }
 
-    thread_sigmask(SIG_SETMASK,&oldset,0);
 #ifdef LOCK_CREATE_THREAD
     retcode = pthread_mutex_unlock(&create_thread_lock);
     gc_assert(retcode == 0);
     FSHOW_SIGNAL((stderr,"/create_os_thread: released lock\n"));
 #endif
+    thread_sigmask(SIG_SETMASK,&oldset,0);
     return r;
 }
 
 os_thread_t create_thread(lispobj initial_function) {
-    struct thread *th;
-    os_thread_t kid_tid;
+    struct thread *th, *thread = arch_os_get_current_thread();
+    os_thread_t kid_tid = 0;
 
+    /* Must defend against async unwinds. */
+    if (SymbolValue(INTERRUPTS_ENABLED, thread) != NIL)
+        lose("create_thread is not safe when interrupts are enabled.\n");
+    
     /* Assuming that a fresh thread struct has no lisp objects in it,
      * linking it to all_threads can be left to the thread itself
      * without fear of gc lossage. initial_function violates this
      * assumption and must stay pinned until the child starts up. */
     th = create_thread_struct(initial_function);
-    if(th==0) return 0;
-
-    if (create_os_thread(th,&kid_tid)) {
-        return kid_tid;
-    } else {
+    if (th && !create_os_thread(th,&kid_tid)) {
         free_thread_struct(th);
-        return 0;
+        kid_tid = 0;
     }
+    return kid_tid;
 }
 
 int signal_interrupt_thread(os_thread_t os_thread)
index 4ece006..1ef8e95 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.25.38"
+"1.0.25.39"