1.0.25.29: thread state visibility and synchronization
authorGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:44:11 +0000 (21:44 +0000)
committerGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:44:11 +0000 (21:44 +0000)
C does not guarantee that changes made to a volatile variable in one
thread are visibile to other threads. Use locking primitives (that
have memory barrier semantics) for that purpose.

Thus, all_threads need not be volatile: it's always accessed while
holding all_threads_lock. Thread state does not need volatile either,
as signal a handlers don't change it (save for sig_stop_for_gc_handler
but that sets it restores its value). But visibility issues can arise
and potentially deadlock in stop_the_world, so the thread state is
given a lock. And to reduce busy looping while waiting for the state
to change to STATE_SUSPENDED a condition variable is added as well.

Also convert sig_stop_for_gc_handler to use
wait_for_thread_state_change which frees up SIG_RESUME_FROM_GC. I
think this also guarantees that the changes made by the gc are visible
in other threads on non x86 platforms (on x86 it's already the case).

With these changes threads.impure.lisp runs 10% a faster in real time.

15 files changed:
NEWS
src/compiler/generic/objdef.lisp
src/runtime/bsd-os.c
src/runtime/bsd-os.h
src/runtime/darwin-os.h
src/runtime/interrupt.c
src/runtime/interrupt.h
src/runtime/linux-os.c
src/runtime/linux-os.h
src/runtime/sunos-os.c
src/runtime/sunos-os.h
src/runtime/thread.c
src/runtime/thread.h
src/runtime/win32-os.h
version.lisp-expr

diff --git a/NEWS b/NEWS
index a35e241..e175a0f 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ changes in sbcl-1.0.26 relative to 1.0.25:
     memory, stack, alien stack, binding stack, encountering a memory
     fault, etc. In the absence of --lose-on-corruption a warning is
     printed to stderr.
+  * optimization: slightly faster gc on multithreaded builds
 
 changes in sbcl-1.0.25 relative to 1.0.24:
   * incompatible change: SB-INTROSPECT:FUNCTION-ARGLIST is deprecated, to be
index 059cc32..e2d8396 100644 (file)
   ;; tls[0] = NO_TLS_VALUE_MARKER_WIDETAG because a the tls index slot
   ;; of a symbol is initialized to zero
   (no-tls-value-marker)
-  (os-thread :c-type "volatile os_thread_t")
+  (os-thread :c-type "os_thread_t")
   ;; This is the original address at which the memory was allocated,
   ;; which may have different alignment then what we prefer to use.
-  ;; Kept here so that when the thread dies we can releast the whole
+  ;; Kept here so that when the thread dies we can release the whole
   ;; memory we reserved.
   (os-address :c-type "void *" :length #!+alpha 2 #!-alpha 1)
   #!+sb-thread
   (os-attr :c-type "pthread_attr_t *" :length #!+alpha 2 #!-alpha 1)
+  #!+sb-thread
+  (state-lock :c-type "pthread_mutex_t *" :length #!+alpha 2 #!-alpha 1)
+  #!+sb-thread
+  (state-cond :c-type "pthread_cond_t *" :length #!+alpha 2 #!-alpha 1)
   (binding-stack-start :c-type "lispobj *" :length #!+alpha 2 #!-alpha 1)
   (binding-stack-pointer :c-type "lispobj *" :length #!+alpha 2 #!-alpha 1)
   (control-stack-start :c-type "lispobj *" :length #!+alpha 2 #!-alpha 1)
   (prev :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1)
   (next :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1)
   ;; starting, running, suspended, dead
-  (state :c-type "volatile lispobj")
+  (state :c-type "lispobj")
   (tls-cookie)                          ;  on x86, the LDT index
   #!+(or x86 x86-64) (pseudo-atomic-bits)
   (interrupt-data :c-type "struct interrupt_data *"
index c5e3e4e..bf71ab9 100644 (file)
@@ -257,8 +257,6 @@ os_install_interrupt_handlers(void)
                                                  interrupt_thread_handler);
     undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
                                                  sig_stop_for_gc_handler);
-    undoably_install_low_level_interrupt_handler(SIG_RESUME_FROM_GC,
-                                                 sig_resume_from_gc_handler);
 #endif
     SHOW("leaving os_install_interrupt_handlers()");
 }
index 23ab40f..eb6037b 100644 (file)
@@ -61,7 +61,6 @@ extern int sig_memory_fault;
 
 #define SIG_INTERRUPT_THREAD (SIGINFO)
 #define SIG_STOP_FOR_GC (SIGUSR1)
-#define SIG_RESUME_FROM_GC (SIGUSR2)
 
 #elif defined __OpenBSD__
 
index f0923d7..86d3e0e 100644 (file)
@@ -35,6 +35,5 @@ typedef ucontext_t os_context_t;
 
 #define SIG_INTERRUPT_THREAD (SIGINFO)
 #define SIG_STOP_FOR_GC (SIGUSR1)
-#define SIG_RESUME_FROM_GC (SIGUSR2)
 
 #endif /* _DARWIN_OS_H */
index cdd7b24..2770d4d 100644 (file)
@@ -115,7 +115,6 @@ sigaddset_blockable(sigset_t *sigset)
 {
     sigaddset_deferrable(sigset);
 #ifdef LISP_FEATURE_SB_THREAD
-    sigaddset(sigset,SIG_RESUME_FROM_GC);
     sigaddset(sigset,SIG_STOP_FOR_GC);
 #endif
 }
@@ -860,40 +859,25 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context)
     SetSymbolValue(GC_PENDING,NIL,thread);
     SetSymbolValue(STOP_FOR_GC_PENDING,NIL,thread);
 
-    if(thread->state!=STATE_RUNNING) {
+    if(thread_state(thread)!=STATE_RUNNING) {
         lose("sig_stop_for_gc_handler: wrong thread state: %ld\n",
              fixnum_value(thread->state));
     }
 
-    thread->state=STATE_SUSPENDED;
+    set_thread_state(thread,STATE_SUSPENDED);
     FSHOW_SIGNAL((stderr,"suspended\n"));
 
-    sigemptyset(&ss);
-    sigaddset(&ss,SIG_RESUME_FROM_GC);
-
-    /* It is possible to get SIGCONT (and probably other non-blockable
-     * signals) here. */
-    {
-        int sigret;
-        do { sigwait(&ss, &sigret); }
-        while (sigret != SIG_RESUME_FROM_GC);
-    }
-
+    wait_for_thread_state_change(thread, STATE_SUSPENDED);
     FSHOW_SIGNAL((stderr,"resumed\n"));
-    if(thread->state!=STATE_RUNNING) {
+
+    if(thread_state(thread)!=STATE_RUNNING) {
         lose("sig_stop_for_gc_handler: wrong thread state on wakeup: %ld\n",
-             fixnum_value(thread->state));
+             fixnum_value(thread_state(thread)));
     }
 
     undo_fake_foreign_function_call(context);
 }
 
-void
-sig_resume_from_gc_handler(int signal, siginfo_t *info, void *void_context)
-{
-    lose("SIG_RESUME_FROM_GC handler called.");
-}
-
 #endif
 
 void
index 36b9720..210579b 100644 (file)
@@ -103,7 +103,6 @@ extern void do_pending_interrupt(void);
 #ifdef LISP_FEATURE_SB_THREAD
 extern void interrupt_thread_handler(int, siginfo_t*, void*);
 extern void sig_stop_for_gc_handler(int, siginfo_t*, void*);
-extern void sig_resume_from_gc_handler(int, siginfo_t*, void*);
 #endif
 typedef void (*interrupt_handler_t)(int, siginfo_t *, void *);
 extern void undoably_install_low_level_interrupt_handler (
index 8bd86f1..4a3f994 100644 (file)
@@ -422,8 +422,6 @@ os_install_interrupt_handlers(void)
                                                  interrupt_thread_handler);
     undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
                                                  sig_stop_for_gc_handler);
-    undoably_install_low_level_interrupt_handler(SIG_RESUME_FROM_GC,
-                                                 sig_resume_from_gc_handler);
 #endif
 }
 
index 8b74848..02851a3 100644 (file)
@@ -41,4 +41,3 @@ typedef int os_vm_prot_t;
 
 #define SIG_INTERRUPT_THREAD (SIGRTMIN)
 #define SIG_STOP_FOR_GC (SIGUSR1)
-#define SIG_RESUME_FROM_GC (SIGUSR2)
index bf8e4ec..8d8bb28 100644 (file)
@@ -238,8 +238,6 @@ os_install_interrupt_handlers()
                                                  interrupt_thread_handler);
     undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
                                                  sig_stop_for_gc_handler);
-    undoably_install_low_level_interrupt_handler(SIG_RESUME_FROM_GC,
-                                                 sig_resume_from_gc_handler);
 #endif
 }
 
index 6720c31..b8560c7 100644 (file)
@@ -34,7 +34,6 @@ typedef int os_vm_prot_t;
 
 #define SIG_INTERRUPT_THREAD (SIGRTMIN)
 #define SIG_STOP_FOR_GC (SIGUSR1)
-#define SIG_RESUME_FROM_GC (SIGUSR2)
 
 /* Yaargh?! */
 typedef int os_context_register_t ;
index ae2f990..24daaae 100644 (file)
@@ -83,7 +83,7 @@ static struct thread_post_mortem * volatile pending_thread_post_mortem = 0;
 #endif
 
 int dynamic_values_bytes=TLS_SIZE*sizeof(lispobj);  /* same for all threads */
-struct thread * volatile all_threads;
+struct thread *all_threads;
 extern struct interrupt_data * global_interrupt_data;
 
 #ifdef LISP_FEATURE_SB_THREAD
@@ -145,8 +145,17 @@ initial_thread_trampoline(struct thread *th)
 #endif
 }
 
+#ifdef LISP_FEATURE_SB_THREAD
+#define THREAD_STATE_LOCK_SIZE \
+    (sizeof(pthread_mutex_t))+(sizeof(pthread_cond_t))
+#else
+#define THREAD_STATE_LOCK_SIZE 0
+#endif
+
 #define THREAD_STRUCT_SIZE (thread_control_stack_size + BINDING_STACK_SIZE + \
-                            ALIEN_STACK_SIZE + dynamic_values_bytes +        \
+                            ALIEN_STACK_SIZE +                               \
+                            THREAD_STATE_LOCK_SIZE +                         \
+                            dynamic_values_bytes +                           \
                             32 * SIGSTKSZ +                                  \
                             THREAD_ALIGNMENT_BYTES)
 
@@ -272,7 +281,7 @@ new_thread_trampoline(struct thread *th)
 
     /* Block GC */
     block_blockable_signals();
-    th->state=STATE_DEAD;
+    set_thread_state(th, STATE_DEAD);
 
     /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
      * thread, but since we are already dead it won't wait long. */
@@ -285,6 +294,9 @@ new_thread_trampoline(struct thread *th)
     gc_assert(lock_ret == 0);
 
     if(th->tls_cookie>=0) arch_os_thread_cleanup(th);
+    pthread_mutex_destroy(th->state_lock);
+    pthread_cond_destroy(th->state_cond);
+
     os_invalidate((os_vm_address_t)th->interrupt_data,
                   (sizeof (struct interrupt_data)));
 
@@ -351,7 +363,8 @@ create_thread_struct(lispobj initial_function) {
         (aligned_spaces+
          thread_control_stack_size+
          BINDING_STACK_SIZE+
-         ALIEN_STACK_SIZE);
+         ALIEN_STACK_SIZE +
+         THREAD_STATE_LOCK_SIZE);
 
 #ifdef LISP_FEATURE_SB_THREAD
     for(i = 0; i < (dynamic_values_bytes / sizeof(lispobj)); i++)
@@ -395,6 +408,12 @@ create_thread_struct(lispobj initial_function) {
     th->os_thread=0;
 #ifdef LISP_FEATURE_SB_THREAD
     th->os_attr=malloc(sizeof(pthread_attr_t));
+    th->state_lock=(pthread_mutex_t *)((void *)th->alien_stack_start +
+                                       ALIEN_STACK_SIZE);
+    pthread_mutex_init(th->state_lock, NULL);
+    th->state_cond=(pthread_cond_t *)((void *)th->state_lock +
+                                      (sizeof(pthread_mutex_t)));
+    pthread_cond_init(th->state_cond, NULL);
 #endif
     th->state=STATE_RUNNING;
 #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD
@@ -608,14 +627,14 @@ void gc_stop_the_world()
     for(p=all_threads; p; p=p->next) {
         gc_assert(p->os_thread != 0);
         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: thread=%lu, state=%x\n",
-                      p->os_thread, p->state));
-        if((p!=th) && ((p->state==STATE_RUNNING))) {
+                      p->os_thread, thread_state(p)));
+        if((p!=th) && ((thread_state(p)==STATE_RUNNING))) {
             FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending thread %lu\n",
                           p->os_thread));
-            status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
+            status=pthread_kill(p->os_thread,SIG_STOP_FOR_GC);
             if (status==ESRCH) {
                 /* This thread has exited. */
-                gc_assert(p->state==STATE_DEAD);
+                gc_assert(thread_state(p)==STATE_DEAD);
             } else if (status) {
                 lose("cannot send suspend thread=%lu: %d, %s\n",
                      p->os_thread,status,strerror(status));
@@ -623,26 +642,15 @@ void gc_stop_the_world()
         }
     }
     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:signals sent\n"));
-    /* wait for the running threads to stop or finish */
-    {
-#if QSHOW_SIGNALS
-        struct thread *prev = 0;
-#endif
-        for(p=all_threads;p;) {
-#if QSHOW_SIGNALS
-            if ((p!=th)&&(p!=prev)&&(p->state==STATE_RUNNING)) {
-                FSHOW_SIGNAL
-                    ((stderr,
-                      "/gc_stop_the_world: waiting for thread=%lu: state=%x\n",
-                      p->os_thread, p->state));
-                prev=p;
-            }
-#endif
-            if((p!=th) && (p->state==STATE_RUNNING)) {
-                sched_yield();
-            } else {
-                p=p->next;
-            }
+    for(p=all_threads;p;p=p->next) {
+        if (p!=th) {
+            FSHOW_SIGNAL
+                ((stderr,
+                  "/gc_stop_the_world: waiting for thread=%lu: state=%x\n",
+                  p->os_thread, thread_state(p)));
+            wait_for_thread_state_change(p, STATE_RUNNING);
+            if (p->state == STATE_RUNNING)
+                lose("/gc_stop_the_world: unexpected state");
         }
     }
     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:end\n"));
@@ -651,7 +659,7 @@ void gc_stop_the_world()
 void gc_start_the_world()
 {
     struct thread *p,*th=arch_os_get_current_thread();
-    int status, lock_ret;
+    int lock_ret;
     /* if a resumed thread creates a new thread before we're done with
      * this loop, the new thread will get consed on the front of
      * all_threads, but it won't have been stopped so won't need
@@ -659,19 +667,16 @@ void gc_start_the_world()
     FSHOW_SIGNAL((stderr,"/gc_start_the_world:begin\n"));
     for(p=all_threads;p;p=p->next) {
         gc_assert(p->os_thread!=0);
-        if((p!=th) && (p->state!=STATE_DEAD)) {
-            if(p->state!=STATE_SUSPENDED) {
-                lose("gc_start_the_world: wrong thread state is %d\n",
-                     fixnum_value(p->state));
-            }
-            FSHOW_SIGNAL((stderr, "/gc_start_the_world: resuming %lu\n",
-                          p->os_thread));
-            p->state=STATE_RUNNING;
-
-            status=kill_thread_safely(p->os_thread,SIG_RESUME_FROM_GC);
-            if (status) {
-                lose("cannot resume thread=%lu: %d, %s\n",
-                     p->os_thread,status,strerror(status));
+        if (p!=th) {
+            lispobj state = thread_state(p);
+            if (state != STATE_DEAD) {
+                if(state != STATE_SUSPENDED) {
+                    lose("gc_start_the_world: wrong thread state is %d\n",
+                         fixnum_value(state));
+                }
+                FSHOW_SIGNAL((stderr, "/gc_start_the_world: resuming %lu\n",
+                              p->os_thread));
+                set_thread_state(p, STATE_RUNNING);
             }
         }
     }
index 074edec..f8ec4e7 100644 (file)
@@ -23,6 +23,39 @@ struct alloc_region { };
 #define STATE_SUSPENDED (make_fixnum(2))
 #define STATE_DEAD (make_fixnum(3))
 
+#ifdef LISP_FEATURE_SB_THREAD
+
+/* Only access thread state with blockables blocked. */
+static inline lispobj
+thread_state(struct thread *thread)
+{
+    lispobj state;
+    pthread_mutex_lock(thread->state_lock);
+    state = thread->state;
+    pthread_mutex_unlock(thread->state_lock);
+    return state;
+}
+
+static inline void
+set_thread_state(struct thread *thread, lispobj state)
+{
+    pthread_mutex_lock(thread->state_lock);
+    thread->state = state;
+    pthread_cond_broadcast(thread->state_cond);
+    pthread_mutex_unlock(thread->state_lock);
+}
+
+static inline void
+wait_for_thread_state_change(struct thread *thread, lispobj state)
+{
+    pthread_mutex_lock(thread->state_lock);
+    while (thread->state == state)
+        pthread_cond_wait(thread->state_cond, thread->state_lock);
+    pthread_mutex_unlock(thread->state_lock);
+}
+
+#endif
+
 #define THREAD_SLOT_OFFSET_WORDS(c) \
  (offsetof(struct thread,c)/(sizeof (struct thread *)))
 
@@ -31,7 +64,7 @@ union per_thread_data {
     lispobj dynamic_values[1];  /* actually more like 4000 or so */
 };
 
-extern struct thread * volatile all_threads;
+extern struct thread *all_threads;
 extern int dynamic_values_bytes;
 
 #if defined(LISP_FEATURE_DARWIN)
index f08a784..1526e8d 100644 (file)
@@ -37,7 +37,6 @@ typedef void *siginfo_t;
 
 #define SIG_INTERRUPT_THREAD (SIGRTMIN)
 #define SIG_STOP_FOR_GC (SIGRTMIN+1)
-#define SIG_RESUME_FROM_GC (SIGRTMIN+4)
 #define SIG_DEQUEUE (SIGRTMIN+2)
 #define SIG_THREAD_EXIT (SIGRTMIN+3)
 
index 575dff3..0f7cc46 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.25.28"
+"1.0.25.29"