From dca9c046b59d56ecddf36169f6994119ef8aeace Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Mon, 16 Feb 2009 22:07:55 +0000 Subject: [PATCH] 1.0.25.41: only call pthread_kill with valid thread ids ... else it segfaults (at least on Linux). Fixes sporadic "Unhandled memory fault" running timer, INTERRUPT-THREAD heavy code. And block signals while calling pthread_kill because it is not async signal safe. --- NEWS | 2 ++ src/runtime/interrupt.c | 8 ++++++ src/runtime/thread.c | 72 ++++++++++++++++++++++++++++++++++++++--------- src/runtime/thread.h | 2 ++ version.lisp-expr | 2 +- 5 files changed, 71 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index a1cd3c5..bb49133 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ changes in sbcl-1.0.26 relative to 1.0.25: recursive errors or deadlock. * bug fix: real-time signals are not used anymore, so no more hanging when the system wide real-time signal queue gets full. + * bug fix: INTERRUPT-THREAD on a dying thread could produce memory + fault. * bug fix: finalizers, gc hooks never run in a WITHOUT-INTERRUPTS * bug fix: fix deadlocks related to starting threads diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 22ea827..ad53a4c 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -1332,6 +1332,14 @@ arrange_return_to_lisp_function(os_context_t *context, lispobj function) #ifdef LISP_FEATURE_SB_THREAD +int +signal_interrupt_thread(os_thread_t os_thread) +{ + /* FSHOW first, in case we are signalling ourselves. */ + FSHOW((stderr,"/signal_interrupt_thread: %lu\n", os_thread)); + return kill_safely(os_thread, SIG_INTERRUPT_THREAD); +} + /* FIXME: this function can go away when all lisp handlers are invoked * via arrange_return_to_lisp_function. */ void diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 5a2fa87..ebdafaa 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -563,20 +563,6 @@ os_thread_t create_thread(lispobj initial_function) { return kid_tid; } -int signal_interrupt_thread(os_thread_t os_thread) -{ - int status = pthread_kill(os_thread, SIG_INTERRUPT_THREAD); - FSHOW_SIGNAL((stderr,"/signal_interrupt_thread: %lu\n", os_thread)); - if (status == 0) { - return 0; - } else if (status == ESRCH) { - return -1; - } else { - lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s\n", - os_thread, status, strerror(status)); - } -} - /* stopping the world is a two-stage process. From this thread we signal * all the others with SIG_STOP_FOR_GC. The handler for this signal does * the usual pseudo-atomic checks (we don't want to stop a thread while @@ -612,6 +598,8 @@ void gc_stop_the_world() if((p!=th) && ((thread_state(p)==STATE_RUNNING))) { FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending thread %lu\n", p->os_thread)); + /* We already hold all_thread_lock, P can become DEAD but + * cannot exit, ergo it's safe to use pthread_kill. */ status=pthread_kill(p->os_thread,SIG_STOP_FOR_GC); if (status==ESRCH) { /* This thread has exited. */ @@ -682,3 +670,59 @@ thread_yield() return 0; #endif } + +/* If the thread id given does not belong to a running thread (it has + * exited or never even existed) pthread_kill _may_ fail with ESRCH, + * but it is also allowed to just segfault, see + * . + * + * Relying on thread ids can easily backfire since ids are recycled + * (NPTL recycles them extremely fast) so a signal can be sent to + * another process if the one it was sent to exited. + * + * We send signals in two places: signal_interrupt_thread sends a + * signal that's harmless if delivered to another thread, but + * SIG_STOP_FOR_GC is fatal. + * + * For these reasons, we must make sure that the thread is still alive + * when the pthread_kill is called and return if the thread is + * exiting. */ +int +kill_safely(os_thread_t os_thread, int signal) +{ +#ifdef LISP_FEATURE_SB_THREAD + sigset_t oldset; + struct thread *thread; + /* pthread_kill is not async signal safe and we don't want to be + * interrupted while holding the lock. */ + thread_sigmask(SIG_BLOCK, &deferrable_sigset, &oldset); + pthread_mutex_lock(&all_threads_lock); + for (thread = all_threads; thread; thread = thread->next) { + if (thread->os_thread == os_thread) { + int status = pthread_kill(os_thread, signal); + if (status) + lose("kill_safely: pthread_kill failed with %d\n", status); + break; + } + } + pthread_mutex_unlock(&all_threads_lock); + thread_sigmask(SIG_SETMASK,&oldset,0); + if (thread) + return 0; + else + return -1; +#else + int status; + if (os_thread != getpid()) + lose("kill_safely: who do you want to kill? %d?\n", os_thread); + status = kill(os_thread, signal); + if (status == 0) { + return 0; + } else if (status == ESRCH) { + return -1; + } else { + lose("cannot send signal %d to process %lu: %d, %s\n", + signal, os_thread, status, strerror(status)); + } +#endif +} diff --git a/src/runtime/thread.h b/src/runtime/thread.h index c123077..306d133 100644 --- a/src/runtime/thread.h +++ b/src/runtime/thread.h @@ -56,6 +56,8 @@ wait_for_thread_state_change(struct thread *thread, lispobj state) #endif +extern int kill_safely(os_thread_t os_thread, int signal); + #define THREAD_SLOT_OFFSET_WORDS(c) \ (offsetof(struct thread,c)/(sizeof (struct thread *))) diff --git a/version.lisp-expr b/version.lisp-expr index c6df9ac..40e6205 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.25.40" +"1.0.25.41" -- 1.7.10.4