From ce027af4a3f623440d33e7be119fe46caaecee65 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Mon, 16 Feb 2009 21:31:32 +0000 Subject: [PATCH] 1.0.25.18: it's only SHOW - fix compilation with QSHOW - SHOW prints thread id on threaded builds - SHOWing os_threads - do not print pthread_self() that's the job of SHOW - always print thread ids with %lu - states with %x - add more SHOW to ease debugging - gc_stop_the_world: don't flood with FSHOW_SIGNAL when waiting for another thread to change states - signal safe SHOW (if QSHOW_SAFE is defined) --- doc/internals/signals.texinfo | 14 ++++++++++++ src/runtime/gencgc.c | 2 +- src/runtime/interrupt.c | 35 ++++++++++++++--------------- src/runtime/interrupt.h | 3 +++ src/runtime/runtime.h | 49 +++++++++++++++++++++++++++++++++++++++-- src/runtime/thread.c | 47 +++++++++++++++++++++++---------------- src/runtime/thread.h | 14 ------------ version.lisp-expr | 2 +- 8 files changed, 111 insertions(+), 55 deletions(-) diff --git a/doc/internals/signals.texinfo b/doc/internals/signals.texinfo index 1f27d3f..dc2b3a6 100644 --- a/doc/internals/signals.texinfo +++ b/doc/internals/signals.texinfo @@ -165,3 +165,17 @@ For the reasons above, calling user code, i.e. functions passed in, or in other words code that one cannot reason about, from non-reentrant code (holding locks), @code{WITHOUT-INTERRUPTS}, @code{WITHOUT-GCING} is dangerous and best avoided. + +@section Debugging + +It is not easy to debug signal problems. The best bet probably is to +enable @code{QSHOW} and @code{QSHOW_SIGNALS} in runtime.h and once +SBCL runs into problems attach gdb. A simple @code{thread apply all +ba} is already tremendously useful. Another possibility is to send a +SIGABORT to SBCL to provoke landing in LDB, if it's compiled with it +and it has not yet done so on its own. + +Note, that fprintf used by QSHOW is not reentrant and at least on x86 +linux it is known to cause deadlocks, so place SHOW and co carefully, +ideally to places where blockable signals are blocked. Use +@code{QSHOW_SAFE} if you like. diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index ab6aa6b..abd4918 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -4087,7 +4087,7 @@ garbage_collect_generation(generation_index_t generation, int raise) fprintf(stderr, "/non-movable pages due to conservative pointers = %d (%d bytes)\n", num_dont_move_pages, - npage_bytes(num_dont_move_pages); + npage_bytes(num_dont_move_pages)); } #endif diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 5fd173e..95f50be 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -117,8 +117,8 @@ sigaddset_blockable(sigset_t *s) } /* initialized in interrupt_init */ -static sigset_t deferrable_sigset; -static sigset_t blockable_sigset; +sigset_t deferrable_sigset; +sigset_t blockable_sigset; #endif void @@ -639,9 +639,8 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, store_signal_data_for_later(data,handler,signal,info,context); SetSymbolValue(INTERRUPT_PENDING, T,thread); FSHOW_SIGNAL((stderr, - "/maybe_defer_handler(%x,%d),thread=%lu: deferred\n", - (unsigned int)handler,signal, - (unsigned long)thread->os_thread)); + "/maybe_defer_handler(%x,%d): deferred\n", + (unsigned int)handler,signal)); return 1; } /* a slightly confusing test. arch_pseudo_atomic_atomic() doesn't @@ -651,15 +650,13 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, store_signal_data_for_later(data,handler,signal,info,context); arch_set_pseudo_atomic_interrupted(context); FSHOW_SIGNAL((stderr, - "/maybe_defer_handler(%x,%d),thread=%lu: deferred(PA)\n", - (unsigned int)handler,signal, - (unsigned long)thread->os_thread)); + "/maybe_defer_handler(%x,%d): deferred(PA)\n", + (unsigned int)handler,signal)); return 1; } FSHOW_SIGNAL((stderr, - "/maybe_defer_handler(%x,%d),thread=%lu: not deferred\n", - (unsigned int)handler,signal, - (unsigned long)thread->os_thread)); + "/maybe_defer_handler(%x,%d): not deferred\n", + (unsigned int)handler,signal)); return 0; } @@ -748,15 +745,12 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) if (arch_pseudo_atomic_atomic(context)) { SetSymbolValue(STOP_FOR_GC_PENDING,T,thread); arch_set_pseudo_atomic_interrupted(context); - FSHOW_SIGNAL((stderr,"thread=%lu sig_stop_for_gc deferred (PA)\n", - thread->os_thread)); + FSHOW_SIGNAL((stderr, "sig_stop_for_gc deferred (PA)\n")); return; } else if (SymbolValue(GC_INHIBIT,thread) != NIL) { SetSymbolValue(STOP_FOR_GC_PENDING,T,thread); - FSHOW_SIGNAL((stderr, - "thread=%lu sig_stop_for_gc deferred (*GC-INHIBIT*)\n", - thread->os_thread)); + FSHOW_SIGNAL((stderr, "sig_stop_for_gc deferred (*GC-INHIBIT*)\n")); return; } @@ -779,7 +773,7 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) } thread->state=STATE_SUSPENDED; - FSHOW_SIGNAL((stderr,"thread=%lu suspended\n",thread->os_thread)); + FSHOW_SIGNAL((stderr,"suspended\n")); sigemptyset(&ss); #if defined(SIG_RESUME_FROM_GC) @@ -800,7 +794,7 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) while (sigwaitinfo(&ss,0) != SIG_STOP_FOR_GC); #endif - FSHOW_SIGNAL((stderr,"thread=%lu resumed\n",thread->os_thread)); + FSHOW_SIGNAL((stderr,"resumed\n")); if(thread->state!=STATE_RUNNING) { lose("sig_stop_for_gc_handler: wrong thread state on wakeup: %ld\n", fixnum_value(thread->state)); @@ -1002,6 +996,8 @@ arrange_return_to_lisp_function(os_context_t *context, lispobj function) *os_context_register_addr(context,reg_CODE) = (os_context_register_t)(fun + FUN_POINTER_LOWTAG); #endif + FSHOW((stderr, "/arranged return to Lisp function (0x%lx)\n", + (long)function)); } #ifdef LISP_FEATURE_SB_THREAD @@ -1013,6 +1009,9 @@ interrupt_thread_handler(int num, siginfo_t *info, void *v_context) { os_context_t *context = (os_context_t*)arch_os_get_context(&v_context); + FSHOW_SIGNAL((stderr,"/interrupt_thread_handler\n")); + check_blockables_blocked_or_lose(); + /* let the handler enable interrupts again when it sees fit */ sigaddset_deferrable(os_context_sigmask_addr(context)); arrange_return_to_lisp_function(context, diff --git a/src/runtime/interrupt.h b/src/runtime/interrupt.h index fb115e5..09a63f2 100644 --- a/src/runtime/interrupt.h +++ b/src/runtime/interrupt.h @@ -26,6 +26,9 @@ /* FIXME: do not rely on NSIG being a multiple of 8 */ #define REAL_SIGSET_SIZE_BYTES ((NSIG/8)) +extern sigset_t deferrable_sigset; +extern sigset_t blockable_sigset; + extern void check_blockables_blocked_or_lose(void); extern void check_gc_signals_unblocked_or_lose(void); extern void unblock_gc_signals(void); diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 30bccd7..9f38dab 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -15,14 +15,59 @@ #ifndef _SBCL_RUNTIME_H_ #define _SBCL_RUNTIME_H_ -/*#define QSHOW */ /* Enable low-level debugging output? */ +#if defined(LISP_FEATURE_SB_THREAD) +#define thread_self pthread_self +#define thread_kill pthread_kill +#define thread_sigmask pthread_sigmask +#define thread_mutex_lock(l) pthread_mutex_lock(l) +#define thread_mutex_unlock(l) pthread_mutex_unlock(l) +#else +#define thread_self getpid +#define thread_kill kill +#define thread_sigmask sigprocmask +#define thread_mutex_lock(l) 0 +#define thread_mutex_unlock(l) 0 +#endif + +/* #define QSHOW */ /* Enable low-level debugging output? */ +/* #define QSHOW_SAFE */ /* Enable blocking interrupts for each SHOW. */ #ifdef QSHOW -#define FSHOW(args) fprintf args + +#ifdef QSHOW_SAFE + +#include +extern sigset_t blockable_sigset; + +#define QSHOW_BLOCK \ + sigset_t oldset; \ + thread_sigmask(SIG_BLOCK, &blockable_sigset, &oldset); +#define QSHOW_UNBLOCK thread_sigmask(SIG_SETMASK,&oldset,0); +#else +#define QSHOW_BLOCK +#define QSHOW_UNBLOCK +#endif + +#ifdef LISP_FEATURE_SB_THREAD +#define QSHOW_PREFIX fprintf(stderr, "%lu ", pthread_self()); +#else +#define QSHOW_PREFIX +#endif + +#define FSHOW(args) \ + do { \ + QSHOW_BLOCK \ + QSHOW_PREFIX \ + fprintf args; \ + QSHOW_UNBLOCK \ + } while (0) #define SHOW(string) FSHOW((stderr, "/%s\n", string)) + #else + #define FSHOW(args) #define SHOW(string) + #endif /* Enable extra-verbose low-level debugging output for signals? (You diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 63b39d7..69a32b3 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -507,8 +507,6 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) (kid_tid,th->os_attr,(void *(*)(void *))new_thread_trampoline,th))) { FSHOW_SIGNAL((stderr, "init = %d\n", initcode)); FSHOW_SIGNAL((stderr, printf("pthread_create returned %d, errno %d\n", retcode, errno))); - FSHOW_SIGNAL((stderr, "wanted stack size %d, min stack size %d\n", - cstack_size, PTHREAD_STACK_MIN)); if(retcode < 0) { perror("create_os_thread"); } @@ -568,6 +566,7 @@ kill_thread_safely(os_thread_t os_thread, int signo) int signal_interrupt_thread(os_thread_t os_thread) { int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD); + FSHOW_SIGNAL((stderr,"/signal_interrupt_thread: %lu\n", os_thread)); if (status == 0) { return 0; } else if (status == ESRCH) { @@ -594,28 +593,25 @@ void gc_stop_the_world() #ifdef LOCK_CREATE_THREAD /* KLUDGE: Stopping the thread during pthread_create() causes deadlock * on FreeBSD. */ - FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on create_thread_lock, thread=%lu\n", - th->os_thread)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on create_thread_lock\n")); lock_ret = pthread_mutex_lock(&create_thread_lock); gc_assert(lock_ret == 0); - FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got create_thread_lock, thread=%lu\n", - th->os_thread)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got create_thread_lock\n")); #endif - FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on lock, thread=%lu\n", - th->os_thread)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on lock\n")); /* keep threads from starting while the world is stopped. */ lock_ret = pthread_mutex_lock(&all_threads_lock); \ gc_assert(lock_ret == 0); - FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got lock, thread=%lu\n", - th->os_thread)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got lock\n")); /* stop all other threads by sending them SIG_STOP_FOR_GC */ for(p=all_threads; p; p=p->next) { gc_assert(p->os_thread != 0); - FSHOW_SIGNAL((stderr,"/gc_stop_the_world: p->state: %x\n", p->state)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world: thread=%lu, state=%x\n", + p->os_thread, p->state)); if((p!=th) && ((p->state==STATE_RUNNING))) { - FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending %x, os_thread %x\n", - p, p->os_thread)); + FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending thread %lu\n", + p->os_thread)); status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC); if (status==ESRCH) { /* This thread has exited. */ @@ -628,12 +624,25 @@ void gc_stop_the_world() } FSHOW_SIGNAL((stderr,"/gc_stop_the_world:signals sent\n")); /* wait for the running threads to stop or finish */ - for(p=all_threads;p;) { - FSHOW_SIGNAL((stderr,"/gc_stop_the_world: th: %p, p: %p\n", th, p)); - if((p!=th) && (p->state==STATE_RUNNING)) { - sched_yield(); - } else { - p=p->next; + { +#if QSHOW_SIGNALS + struct thread *prev = 0; +#endif + for(p=all_threads;p;) { +#if QSHOW_SIGNALS + if ((p!=th)&&(p!=prev)&&(p->state==STATE_RUNNING)) { + FSHOW_SIGNAL + ((stderr, + "/gc_stop_the_world: waiting for thread=%lu: state=%x\n", + p->os_thread, p->state)); + prev=p; + } +#endif + if((p!=th) && (p->state==STATE_RUNNING)) { + sched_yield(); + } else { + p=p->next; + } } } FSHOW_SIGNAL((stderr,"/gc_stop_the_world:end\n")); diff --git a/src/runtime/thread.h b/src/runtime/thread.h index c252afa..cf13ac9 100644 --- a/src/runtime/thread.h +++ b/src/runtime/thread.h @@ -186,20 +186,6 @@ static inline struct thread *arch_os_get_current_thread(void) #define EXCEPTION_PORT_TO_THREAD_STRUCT(th) ((struct thread *) th) #endif -#if defined(LISP_FEATURE_SB_THREAD) -#define thread_self pthread_self -#define thread_kill pthread_kill -#define thread_sigmask pthread_sigmask -#define thread_mutex_lock(l) pthread_mutex_lock(l) -#define thread_mutex_unlock(l) pthread_mutex_unlock(l) -#else -#define thread_self getpid -#define thread_kill kill -#define thread_sigmask sigprocmask -#define thread_mutex_lock(l) 0 -#define thread_mutex_unlock(l) 0 -#endif - extern void create_initial_thread(lispobj); extern int kill_thread_safely(os_thread_t os_thread, int signo); diff --git a/version.lisp-expr b/version.lisp-expr index 1de3dc6..f588699 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.25.17" +"1.0.25.18" -- 1.7.10.4