From 4bf2de6a2adff75459cf218c8bff67f6cdb91211 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Tue, 17 Mar 2009 14:05:45 +0000 Subject: [PATCH] 1.0.26.8: QSHOW changes, bug reporting guidelines - change runtime.h so that a simple '#define QSHOW_SIGNAL 1' turns QSHOW automatically and defaults to blocking signals during printing - add notes to BUGS on how to report bugs related to signal handling - kill a warning in thread.c in code conditional on QSHOW_SIGNAL - add #include to x86{-64,}-darwin-os.c so that it compiles with QSHOW - add comment explaining the previous commit --- BUGS | 30 ++++++++++++++++++++++++++++++ src/runtime/gencgc.c | 10 +++++----- src/runtime/interrupt.c | 2 +- src/runtime/linux-os.h | 4 ++++ src/runtime/runtime.h | 35 +++++++++++++++++++---------------- src/runtime/thread.c | 10 ++++++---- src/runtime/x86-64-darwin-os.c | 1 - src/runtime/x86-darwin-os.c | 3 +-- version.lisp-expr | 2 +- 9 files changed, 67 insertions(+), 30 deletions(-) diff --git a/BUGS b/BUGS index 0c6e139..1cb8b2c 100644 --- a/BUGS +++ b/BUGS @@ -21,6 +21,36 @@ but instead (MAKE-FOO) the program loops endlessly instead of printing the object. +If you run into a signal related bug, you are getting fatal errors +such as 'signal N is [un]blocked' or just hangs, and you want to send +a useful bug report then: + +- compile sbcl with ldb support (feature :sb-ldb, see + base-target-features.lisp-expr) and change '#define QSHOW_SIGNAL 0' + to '#define QSHOW_SIGNAL 1' in src/runtime/runtime.h. + +- isolate a smallish test case, run it + +- if it just hangs kill it with sigabrt: kill -ABRT + +- print the backtrace from ldb by typing 'ba' + +- attach gdb: gdb -p and get backtraces for all threads: + thread apply all ba + +- if multiple threads are in play then still in gdb, try to get Lisp + backtrace for all threads: 'thread apply all + call_backtrace_from_fp($ebp, 100)'. Substitute $ebp with $rbp on + x86-64. + +- send a report with the backtraces and the output (both stdout, + stderr) produced by sbcl + +- don't forget to include OS and SBCL version + +- if available include info on outcome of the same test with other + versions of SBCL, OS, ... + NOTES: diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 3353425..c87669e 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -89,7 +89,7 @@ long large_object_size = 4 * PAGE_BYTES; /* the verbosity level. All non-error messages are disabled at level 0; * and only a few rare messages are printed at level 1. */ -#ifdef QSHOW +#if QSHOW boolean gencgc_verbose = 1; #else boolean gencgc_verbose = 0; @@ -384,7 +384,7 @@ count_generation_pages(generation_index_t generation) return count; } -#ifdef QSHOW +#if QSHOW static long count_dont_move_pages(void) { @@ -1684,7 +1684,7 @@ sniff_code_object(struct code *code, unsigned long displacement) unsigned d2 = *((unsigned char *)p - 2); unsigned d3 = *((unsigned char *)p - 3); unsigned d4 = *((unsigned char *)p - 4); -#ifdef QSHOW +#if QSHOW unsigned d5 = *((unsigned char *)p - 5); unsigned d6 = *((unsigned char *)p - 6); #endif @@ -4096,7 +4096,7 @@ garbage_collect_generation(generation_index_t generation, int raise) } #endif -#ifdef QSHOW +#if QSHOW if (gencgc_verbose > 1) { long num_dont_move_pages = count_dont_move_pages(); fprintf(stderr, @@ -4808,7 +4808,7 @@ gencgc_handle_wp_violation(void* fault_addr) { page_index_t page_index = find_page_index(fault_addr); -#ifdef QSHOW_SIGNALS +#if QSHOW_SIGNALS FSHOW((stderr, "heap WP violation? fault_addr=%x, page_index=%d\n", fault_addr, page_index)); #endif diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index b6c5c0d..dc210e9 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -682,7 +682,7 @@ interrupt_internal_error(os_context_t *context, boolean continuable) #endif SHOW("in interrupt_internal_error"); -#ifdef QSHOW +#if QSHOW /* Display some rudimentary debugging information about the * error, so that even if the Lisp error handler gets badly * confused, we have a chance to determine what's going on. */ diff --git a/src/runtime/linux-os.h b/src/runtime/linux-os.h index f1a84e9..6f1003c 100644 --- a/src/runtime/linux-os.h +++ b/src/runtime/linux-os.h @@ -39,4 +39,8 @@ typedef int os_vm_prot_t; #define SIG_MEMORY_FAULT SIGSEGV +/* Note that this must be higher than the highest numbered + * synchronously generated signal that we handle (that is SIGSEGV), + * due to Linux signal handling pecularities. See thread "Signal + * delivery order" from 2009-03-14 on kernel-devel@vger.kernel.org. */ #define SIG_STOP_FOR_GC (SIGUSR2) diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 01f4031..bda2896 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -29,12 +29,27 @@ #define thread_mutex_unlock(l) 0 #endif -/* #define QSHOW */ /* Enable low-level debugging output? */ -/* #define QSHOW_SAFE */ /* Enable blocking interrupts for each SHOW. */ +/* Block blockable interrupts for each SHOW, if not 0. */ +#define QSHOW_SIGNAL_SAFE 1 +/* Enable extra-verbose low-level debugging output for signals? (You + * probably don't want this unless you're trying to debug very early + * cold boot on a new machine, or one where you've just messed up + * signal handling.) + * + * Note: It may be that doing this is fundamentally unsound, since it + * causes output from signal handlers, and the i/o libraries aren't + * necessarily reentrant. But it can still be very convenient for + * figuring out what's going on when you have a signal handling + * problem. */ +#define QSHOW_SIGNALS 0 +/* Enable low-level debugging output, if not zero. Defaults to enabled + * if QSHOW_SIGNALS, disabled otherwise. Change it to 1 if you want + * low-level debugging output but not the whole signal mess. */ +#define QSHOW QSHOW_SIGNALS -#ifdef QSHOW +#if QSHOW -#ifdef QSHOW_SAFE +#if QSHOW_SIGNAL_SAFE #include extern sigset_t blockable_sigset; @@ -70,18 +85,6 @@ extern sigset_t blockable_sigset; #endif -/* Enable extra-verbose low-level debugging output for signals? (You - * probably don't want this unless you're trying to debug very early - * cold boot on a new machine, or one where you've just messed up - * signal handling.) - * - * Note: It may be that doing this is fundamentally unsound, since it - * causes output from signal handlers, and the i/o libraries aren't - * necessarily reentrant. But it can still be very convenient for - * figuring out what's going on when you have a signal handling - * problem.. */ -#define QSHOW_SIGNALS 0 - #if QSHOW_SIGNALS #define FSHOW_SIGNAL FSHOW #else diff --git a/src/runtime/thread.c b/src/runtime/thread.c index e158680..f8cb1ca 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -523,13 +523,15 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) #endif if((initcode = pthread_attr_init(th->os_attr)) || - /* call_into_lisp_first_time switches the stack for the initial thread. For the - * others, we use this. */ - (pthread_attr_setstack(th->os_attr,th->control_stack_start,thread_control_stack_size)) || + /* call_into_lisp_first_time switches the stack for the initial + * thread. For the others, we use this. */ + (pthread_attr_setstack(th->os_attr,th->control_stack_start, + thread_control_stack_size)) || (retcode = pthread_create (kid_tid,th->os_attr,(void *(*)(void *))new_thread_trampoline,th))) { FSHOW_SIGNAL((stderr, "init = %d\n", initcode)); - FSHOW_SIGNAL((stderr, printf("pthread_create returned %d, errno %d\n", retcode, errno))); + FSHOW_SIGNAL((stderr, "pthread_create returned %d, errno %d\n", + retcode, errno)); if(retcode < 0) { perror("create_os_thread"); } diff --git a/src/runtime/x86-64-darwin-os.c b/src/runtime/x86-64-darwin-os.c index 1b7df60..35ac2f9 100644 --- a/src/runtime/x86-64-darwin-os.c +++ b/src/runtime/x86-64-darwin-os.c @@ -1,4 +1,3 @@ - #ifdef LISP_FEATURE_SB_THREAD #include #include diff --git a/src/runtime/x86-darwin-os.c b/src/runtime/x86-darwin-os.c index 2c2b978..1cc837c 100644 --- a/src/runtime/x86-darwin-os.c +++ b/src/runtime/x86-darwin-os.c @@ -1,5 +1,3 @@ - - #ifdef LISP_FEATURE_SB_THREAD #include #include @@ -25,6 +23,7 @@ #include #include #include +#include #ifdef LISP_FEATURE_SB_THREAD diff --git a/version.lisp-expr b/version.lisp-expr index 27ef673..31175e6 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.26.7" +"1.0.26.8" -- 1.7.10.4