From 7cca1cabd213d38218a40e973b06ca11c8546396 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Wed, 8 Jun 2005 08:49:49 +0000 Subject: [PATCH] 0.9.1.29: A number of signal handling cleanup/fixes: * fix gencgc/maybe_defer_handler race (thanks to Thiemo) * interrupt_maybe_gc (on cheneygc platforms): check for already pending handler before calling maybe_defer_handler in order not to lose interrupts * interrupt_handle_pending: check for the pending handler being null * run_deferred_handler: set the pending handler to null, before calling it to guard against the handler enabling interrupts ... * more defensiveness: enforce invariants: checks for signal masks, interrupts * refactoring: undoably_install_low_level_interrupt_handler wraps blockable handlers in low_level_maybe_now_maybe_later * don't unblock signals unconditionally in interrupt_maybe_gc_int just restore the sigmask from the interrupted context (kludge removed) * removed misguided sigprocmask calls from mips, hppa and sparc sig{trap,ill} handlers * removed non-x86 version of handle_breakpoint (interrupts are enabled in now common handle_breakpoint) * fixed arrange_return_to_lisp_function/post_signal_tramp to save and restore eflags (interrupt-threads seems to work) --- NEWS | 2 + src/code/signal.lisp | 36 ++--- src/code/target-thread.lisp | 2 + src/compiler/x86/macros.lisp | 3 +- src/runtime/alloc.c | 3 +- src/runtime/breakpoint.c | 28 +--- src/runtime/gencgc.c | 15 +- src/runtime/hppa-arch.c | 1 - src/runtime/interrupt.c | 354 ++++++++++++++++++++++++------------------ src/runtime/interrupt.h | 17 ++ src/runtime/mips-arch.c | 9 -- src/runtime/sparc-arch.c | 1 - src/runtime/x86-64-arch.c | 4 - src/runtime/x86-64-assem.S | 1 + src/runtime/x86-arch.c | 6 +- src/runtime/x86-assem.S | 3 +- tests/threads.impure.lisp | 29 ++++ version.lisp-expr | 2 +- 18 files changed, 297 insertions(+), 219 deletions(-) diff --git a/NEWS b/NEWS index d1b9a94..855a0c0 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,6 @@ changes in sbcl-0.9.2 relative to sbcl-0.9.1: + * numerous signal handling fixes to increase stability + * bug fix: interrupt-thread restores the eflags register on x86 * minor incompatible change: we now correctly canonize default initargs, making them be a list of (INITARG INITFORM INITFUNCTION) as per the MOP, rather than the historical (INITARG INITFUNCTION diff --git a/src/code/signal.lisp b/src/code/signal.lisp index 91d7a13..b3dd8eb 100644 --- a/src/code/signal.lisp +++ b/src/code/signal.lisp @@ -45,25 +45,23 @@ "Execute BODY in a context impervious to interrupts." (let ((name (gensym "WITHOUT-INTERRUPTS-BODY-"))) `(flet ((,name () ,@body)) - (if *interrupts-enabled* - (unwind-protect - (let ((*interrupts-enabled* nil)) - (,name)) - ;; FIXME: Does it matter that an interrupt coming in here - ;; could be executed before any of the pending interrupts? - ;; Or do incoming interrupts have the good grace to check - ;; whether interrupts are pending before executing themselves - ;; immediately? - ;; - ;; FIXME: from the kernel's POV we've already handled the - ;; signal the moment we return from the handler having - ;; decided to defer it, meaning that the kernel is free - ;; to deliver _more_ signals to us... of which we save, - ;; and subsequently handle here, only the last one. - ;; PSEUDO-ATOMIC has the same issue. -- NS 2005-05-19 - (when *interrupt-pending* - (receive-pending-interrupt))) - (,name))))) + (if *interrupts-enabled* + (unwind-protect + (let ((*interrupts-enabled* nil)) + (,name)) + ;; If we were interrupted in the protected section, then + ;; the interrupts are still blocked and it remains so + ;; until the pending interrupt is handled. + ;; + ;; If we were not interrupted in the protected section, + ;; but here, then even if the interrupt handler enters + ;; another WITHOUT-INTERRUPTS, the pending interrupt will + ;; be handled immediately upon exit from said + ;; WITHOUT-INTERRUPTS, so it is as if nothing has + ;; happened. + (when *interrupt-pending* + (receive-pending-interrupt))) + (,name))))) (sb!xc:defmacro with-interrupts (&body body) #!+sb-doc diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 97c81f1..8a42b39 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -222,6 +222,8 @@ time we reacquire LOCK and return to the caller." (defun interrupt-thread (thread function) "Interrupt THREAD and make it run FUNCTION." (let ((function (coerce function 'function))) + ;; FIXME: FUNCTION is pinned only for the signalling of the + ;; SIG_INTERRUPT_THREAD signal. (sb!sys:with-pinned-objects (function) (multiple-value-bind (res err) diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp index 378ba2d..bd782a4 100644 --- a/src/compiler/x86/macros.lisp +++ b/src/compiler/x86/macros.lisp @@ -326,7 +326,8 @@ (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-interrupted-slot)) 0) (inst fs-segment-prefix) - (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 1) + (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) + (fixnumize 1)) ,@forms (inst fs-segment-prefix) (inst mov (make-ea :byte :disp (* 4 thread-pseudo-atomic-atomic-slot)) 0) diff --git a/src/runtime/alloc.c b/src/runtime/alloc.c index 9d9c6d6..7b19856 100644 --- a/src/runtime/alloc.c +++ b/src/runtime/alloc.c @@ -45,11 +45,12 @@ pa_alloc(int bytes) { lispobj *result=0; struct thread *th=arch_os_get_current_thread(); + /* FIXME: OOAO violation: see arch_pseudo_* */ SetSymbolValue(PSEUDO_ATOMIC_INTERRUPTED, make_fixnum(0),th); SetSymbolValue(PSEUDO_ATOMIC_ATOMIC, make_fixnum(1),th); result=alloc(bytes); SetSymbolValue(PSEUDO_ATOMIC_ATOMIC, make_fixnum(0),th); - if (SymbolValue(PSEUDO_ATOMIC_INTERRUPTED,th)) + if (fixnum_value(SymbolValue(PSEUDO_ATOMIC_INTERRUPTED,th))) /* even if we gc at this point, the new allocation will be * protected from being moved, because result is on the c stack * and points to it */ diff --git a/src/runtime/breakpoint.c b/src/runtime/breakpoint.c index ebe6ac6..c90bd0e 100644 --- a/src/runtime/breakpoint.c +++ b/src/runtime/breakpoint.c @@ -131,28 +131,7 @@ static long compute_offset(os_context_t *context, lispobj code) } } } -/* FIXME: I can see no really good reason these couldn't be merged, but haven't - * tried. The sigprocmask() call would work just as well on alpha as it - * presumably does on x86 -dan 2001.08.10 - */ -#if !(defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)) -void handle_breakpoint(int signal, siginfo_t *info, os_context_t *context) -{ - lispobj code; - - fake_foreign_function_call(context); - code = find_code(context); - /* FIXME we're calling into Lisp with signals masked here. Is this - * the right thing to do? */ - funcall3(SymbolFunction(HANDLE_BREAKPOINT), - compute_offset(context, code), - code, - alloc_sap(context)); - - undo_fake_foreign_function_call(context); -} -#else void handle_breakpoint(int signal, siginfo_t* info, os_context_t *context) { lispobj code, context_sap = alloc_sap(context); @@ -172,7 +151,6 @@ void handle_breakpoint(int signal, siginfo_t* info, os_context_t *context) undo_fake_foreign_function_call(context); } -#endif #if !(defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)) void *handle_fun_end_breakpoint(int signal, siginfo_t *info, @@ -186,8 +164,10 @@ void *handle_fun_end_breakpoint(int signal, siginfo_t *info, code = find_code(context); codeptr = (struct code *)native_pointer(code); - /* FIXME again, calling into Lisp with signals masked. Is this - * sensible? */ + /* Don't disallow recursive breakpoint traps. Otherwise, we can't + * use debugger breakpoints anywhere in here. */ + sigprocmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); + funcall3(SymbolFunction(HANDLE_BREAKPOINT), compute_offset(context, code), code, diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 4cac73a..b7666e7 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -4147,9 +4147,20 @@ alloc(long nbytes) * already, in case it was a gc. If it wasn't a GC, the next * allocation will get us back to this point anyway, so no harm done */ + sigset_t new_mask,old_mask; + sigemptyset(&new_mask); + sigaddset_blockable(&new_mask); + sigprocmask(SIG_BLOCK,&new_mask,&old_mask); + struct interrupt_data *data=th->interrupt_data; - if(!data->pending_handler) - maybe_defer_handler(interrupt_maybe_gc_int,data,0,0,0); + if((!data->pending_handler) && + maybe_defer_handler(interrupt_maybe_gc_int,data,0,0,0)) { + /* Leave the signals blocked just as if it was deferred + * the normal way and set the pending_mask. */ + sigcopyset(&(data->pending_mask),&old_mask); + } else { + sigprocmask(SIG_SETMASK,&old_mask,0); + } } new_obj = gc_alloc_with_region(nbytes,0,region,0); return (new_obj); diff --git a/src/runtime/hppa-arch.c b/src/runtime/hppa-arch.c index f91a065..aadb9a0 100644 --- a/src/runtime/hppa-arch.c +++ b/src/runtime/hppa-arch.c @@ -181,7 +181,6 @@ static void sigtrap_handler(int signal, siginfo_t *siginfo, void *void_context) os_context_t *context = arch_os_get_context(&void_context); unsigned long bad_inst; - sigprocmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); #if 0 printf("sigtrap_handler, pc=0x%08x, alloc=0x%08x\n", scp->sc_pcoqh, SC_REG(scp,reg_ALLOC)); diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index c2ef993..d23df7b 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -75,17 +75,6 @@ boolean interrupt_maybe_gc_int(int signal, siginfo_t *info, void *v_context); extern volatile lispobj all_threads_lock; -/* - * This is a workaround for some slightly silly Linux/GNU Libc - * behaviour: glibc defines sigset_t to support 1024 signals, which is - * more than the kernel. This is usually not a problem, but becomes - * one when we want to save a signal mask from a ucontext, and restore - * it later into another ucontext: the ucontext is allocated on the - * stack by the kernel, so copying a libc-sized sigset_t into it will - * overflow and cause other data on the stack to be corrupted */ - -#define REAL_SIGSET_SIZE_BYTES ((NSIG/8)) - void sigaddset_blockable(sigset_t *s) { sigaddset(s, SIGHUP); @@ -111,6 +100,34 @@ void sigaddset_blockable(sigset_t *s) #endif } +static sigset_t blockable_sigset; + +inline static void check_blockables_blocked_or_lose() +{ + /* Get the current sigmask, by blocking the empty set. */ + sigset_t empty,current; + sigemptyset(&empty); + sigprocmask(SIG_BLOCK, &empty, ¤t); + int i; + for(i=0;iinterrupt_data; - /* FIXME: This is almost certainly wrong if we're here as the - * result of a pseudo-atomic as opposed to WITHOUT-INTERRUPTS. */ - SetSymbolValue(INTERRUPT_PENDING, NIL,thread); - - /* restore the saved signal mask from the original signal (the - * one that interrupted us during the critical section) into the - * os_context for the signal we're currently in the handler for. - * This should ensure that when we return from the handler the - * blocked signals are unblocked */ - - memcpy(os_context_sigmask_addr(context), &data->pending_mask, - REAL_SIGSET_SIZE_BYTES); - - sigemptyset(&data->pending_mask); - /* This will break on sparc linux: the deferred handler really wants - * to be called with a void_context */ - run_deferred_handler(data,(void *)context); + /* Pseudo atomic may trigger several times for a single interrupt, + * and while without-interrupts should not, a false trigger by + * pseudo-atomic may eat a pending handler even from + * without-interrupts. */ + if (data->pending_handler) { + + /* If we're here as the result of a pseudo-atomic as opposed + * to WITHOUT-INTERRUPTS, then INTERRUPT_PENDING is already + * NIL, because maybe_defer_handler sets + * PSEUDO_ATOMIC_INTERRUPTED only if interrupts are enabled.*/ + SetSymbolValue(INTERRUPT_PENDING, NIL,thread); + + /* restore the saved signal mask from the original signal (the + * one that interrupted us during the critical section) into the + * os_context for the signal we're currently in the handler for. + * This should ensure that when we return from the handler the + * blocked signals are unblocked */ + sigcopyset(os_context_sigmask_addr(context), &data->pending_mask); + + sigemptyset(&data->pending_mask); + /* This will break on sparc linux: the deferred handler really wants + * to be called with a void_context */ + run_deferred_handler(data,(void *)context); + } } /* @@ -349,6 +380,8 @@ interrupt_handle_now(int signal, siginfo_t *info, void *void_context) boolean were_in_lisp; #endif union interrupt_handler handler; + check_blockables_blocked_or_lose(); + check_interrupts_enabled_or_lose(context); #ifdef LISP_FEATURE_LINUX /* Under Linux on some architectures, we appear to have to restore @@ -443,9 +476,14 @@ interrupt_handle_now(int signal, siginfo_t *info, void *void_context) void run_deferred_handler(struct interrupt_data *data, void *v_context) { - (*(data->pending_handler)) - (data->pending_signal,&(data->pending_info), v_context); + /* The pending_handler may enable interrupts (see + * interrupt_maybe_gc_int) and then another interrupt may hit, + * overwrite interrupt_data, so reset the pending handler before + * calling it. Trust the handler to finish with the siginfo before + * enabling interrupts. */ + void (*pending_handler) (int, siginfo_t*, void*)=data->pending_handler; data->pending_handler=0; + (*pending_handler)(data->pending_signal,&(data->pending_info), v_context); } boolean @@ -453,9 +491,23 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, int signal, siginfo_t *info, os_context_t *context) { struct thread *thread=arch_os_get_current_thread(); + + check_blockables_blocked_or_lose(); + + if (SymbolValue(INTERRUPT_PENDING,thread) != NIL) + lose("interrupt already pending"); + /* If interrupts are disabled then INTERRUPT_PENDING is set and + * not PSEDUO_ATOMIC_INTERRUPTED. This is important for a pseudo + * atomic section inside a without-interrupts. + */ if (SymbolValue(INTERRUPTS_ENABLED,thread) == NIL) { store_signal_data_for_later(data,handler,signal,info,context); SetSymbolValue(INTERRUPT_PENDING, T,thread); +#ifdef QSHOW_SIGNALS + FSHOW((stderr, + "/maybe_defer_handler(%x,%d),thread=%d: deferred\n", + (unsigned int)handler,signal,thread->pid)); +#endif return 1; } /* a slightly confusing test. arch_pseudo_atomic_atomic() doesn't @@ -468,15 +520,31 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, arch_pseudo_atomic_atomic(context)) { store_signal_data_for_later(data,handler,signal,info,context); arch_set_pseudo_atomic_interrupted(context); +#ifdef QSHOW_SIGNALS + FSHOW((stderr, + "/maybe_defer_handler(%x,%d),thread=%d: deferred(PA)\n", + (unsigned int)handler,signal,thread->pid)); +#endif return 1; } +#ifdef QSHOW_SIGNALS + FSHOW((stderr, + "/maybe_defer_handler(%x,%d),thread=%d: not deferred\n", + (unsigned int)handler,signal,thread->pid)); +#endif return 0; } + static void store_signal_data_for_later (struct interrupt_data *data, void *handler, int signal, siginfo_t *info, os_context_t *context) { + if (data->pending_handler) + lose("tried to overwrite pending interrupt handler %x with %x\n", + data->pending_handler, handler); + if (!handler) + lose("tried to defer null interrupt handler\n"); data->pending_handler = handler; data->pending_signal = signal; if(info) @@ -487,22 +555,11 @@ store_signal_data_for_later (struct interrupt_data *data, void *handler, * run_deferred_handler happens. Then the usually-blocked * signals are added to the mask in the context so that we are * running with blocked signals when the handler returns */ - sigemptyset(&(data->pending_mask)); - memcpy(&(data->pending_mask), - os_context_sigmask_addr(context), - REAL_SIGSET_SIZE_BYTES); + sigcopyset(&(data->pending_mask),os_context_sigmask_addr(context)); sigaddset_blockable(os_context_sigmask_addr(context)); - } else { - /* this is also called from gencgc alloc(), in which case - * there has been no signal and is therefore no context. */ - sigset_t new; - sigemptyset(&new); - sigaddset_blockable(&new); - sigprocmask(SIG_BLOCK,&new,&(data->pending_mask)); } } - static void maybe_now_maybe_later(int signal, siginfo_t *info, void *void_context) { @@ -522,20 +579,53 @@ maybe_now_maybe_later(int signal, siginfo_t *info, void *void_context) #endif } +static void +low_level_interrupt_handle_now(int signal, siginfo_t *info, void *void_context) +{ + os_context_t *context = (os_context_t*)void_context; + struct thread *thread=arch_os_get_current_thread(); + +#ifdef LISP_FEATURE_LINUX + os_restore_fp_control(context); +#endif + check_blockables_blocked_or_lose(); + check_interrupts_enabled_or_lose(context); + (*thread->interrupt_data->interrupt_low_level_handlers[signal]) + (signal, info, void_context); +#ifdef LISP_FEATURE_DARWIN + /* Work around G5 bug */ + DARWIN_FIX_CONTEXT(context); +#endif +} + +static void +low_level_maybe_now_maybe_later(int signal, siginfo_t *info, void *void_context) +{ + os_context_t *context = arch_os_get_context(&void_context); + struct thread *thread=arch_os_get_current_thread(); + struct interrupt_data *data=thread->interrupt_data; +#ifdef LISP_FEATURE_LINUX + os_restore_fp_control(context); +#endif + if(maybe_defer_handler(low_level_interrupt_handle_now,data, + signal,info,context)) + return; + low_level_interrupt_handle_now(signal, info, context); +#ifdef LISP_FEATURE_DARWIN + /* Work around G5 bug */ + DARWIN_FIX_CONTEXT(context); +#endif +} + #ifdef LISP_FEATURE_SB_THREAD void sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) { os_context_t *context = arch_os_get_context(&void_context); struct thread *thread=arch_os_get_current_thread(); - struct interrupt_data *data=thread->interrupt_data; sigset_t ss; int i; - if(maybe_defer_handler(sig_stop_for_gc_handler,data, - signal,info,context)) { - return; - } /* need the context stored so it can have registers scavenged */ fake_foreign_function_call(context); @@ -600,6 +690,10 @@ gc_trigger_hit(int signal, siginfo_t *info, os_context_t *context) * previously */ +#if (defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)) +int *context_eflags_addr(os_context_t *context); +#endif + extern lispobj call_into_lisp(lispobj fun, lispobj *args, int nargs); extern void post_signal_tramp(void); void arrange_return_to_lisp_function(os_context_t *context, lispobj function) @@ -611,6 +705,8 @@ void arrange_return_to_lisp_function(os_context_t *context, lispobj function) /* Build a stack frame showing `interrupted' so that the * user's backtrace makes (as much) sense (as usual) */ + + /* FIXME: what about restoring fp state? */ #ifdef LISP_FEATURE_X86 /* Suppose the existence of some function that saved all * registers, called call_into_lisp, then restored GP registers and @@ -641,44 +737,47 @@ void arrange_return_to_lisp_function(os_context_t *context, lispobj function) u32 *sp=(u32 *)*os_context_register_addr(context,reg_ESP); - *(sp-14) = post_signal_tramp; /* return address for call_into_lisp */ - *(sp-13) = function; /* args for call_into_lisp : function*/ - *(sp-12) = 0; /* arg array */ - *(sp-11) = 0; /* no. args */ + *(sp-15) = post_signal_tramp; /* return address for call_into_lisp */ + *(sp-14) = function; /* args for call_into_lisp : function*/ + *(sp-13) = 0; /* arg array */ + *(sp-12) = 0; /* no. args */ /* this order matches that used in POPAD */ - *(sp-10)=*os_context_register_addr(context,reg_EDI); - *(sp-9)=*os_context_register_addr(context,reg_ESI); - - *(sp-8)=*os_context_register_addr(context,reg_ESP)-8; - *(sp-7)=0; - *(sp-6)=*os_context_register_addr(context,reg_EBX); - - *(sp-5)=*os_context_register_addr(context,reg_EDX); - *(sp-4)=*os_context_register_addr(context,reg_ECX); - *(sp-3)=*os_context_register_addr(context,reg_EAX); + *(sp-11)=*os_context_register_addr(context,reg_EDI); + *(sp-10)=*os_context_register_addr(context,reg_ESI); + + *(sp-9)=*os_context_register_addr(context,reg_ESP)-8; + /* POPAD ignores the value of ESP: */ + *(sp-8)=0; + *(sp-7)=*os_context_register_addr(context,reg_EBX); + + *(sp-6)=*os_context_register_addr(context,reg_EDX); + *(sp-5)=*os_context_register_addr(context,reg_ECX); + *(sp-4)=*os_context_register_addr(context,reg_EAX); + *(sp-3)=*context_eflags_addr(context); *(sp-2)=*os_context_register_addr(context,reg_EBP); *(sp-1)=*os_context_pc_addr(context); #elif defined(LISP_FEATURE_X86_64) u64 *sp=(u64 *)*os_context_register_addr(context,reg_RSP); - *(sp-19) = post_signal_tramp; /* return address for call_into_lisp */ - - *(sp-18)=*os_context_register_addr(context,reg_R15); - *(sp-17)=*os_context_register_addr(context,reg_R14); - *(sp-16)=*os_context_register_addr(context,reg_R13); - *(sp-15)=*os_context_register_addr(context,reg_R12); - *(sp-14)=*os_context_register_addr(context,reg_R11); - *(sp-13)=*os_context_register_addr(context,reg_R10); - *(sp-12)=*os_context_register_addr(context,reg_R9); - *(sp-11)=*os_context_register_addr(context,reg_R8); - *(sp-10)=*os_context_register_addr(context,reg_RDI); - *(sp-9)=*os_context_register_addr(context,reg_RSI); - *(sp-8)=*os_context_register_addr(context,reg_RSP)-16; - *(sp-7)=0; - *(sp-6)=*os_context_register_addr(context,reg_RBX); - *(sp-5)=*os_context_register_addr(context,reg_RDX); - *(sp-4)=*os_context_register_addr(context,reg_RCX); - *(sp-3)=*os_context_register_addr(context,reg_RAX); + *(sp-20) = post_signal_tramp; /* return address for call_into_lisp */ + + *(sp-19)=*os_context_register_addr(context,reg_R15); + *(sp-18)=*os_context_register_addr(context,reg_R14); + *(sp-17)=*os_context_register_addr(context,reg_R13); + *(sp-16)=*os_context_register_addr(context,reg_R12); + *(sp-15)=*os_context_register_addr(context,reg_R11); + *(sp-14)=*os_context_register_addr(context,reg_R10); + *(sp-13)=*os_context_register_addr(context,reg_R9); + *(sp-12)=*os_context_register_addr(context,reg_R8); + *(sp-11)=*os_context_register_addr(context,reg_RDI); + *(sp-10)=*os_context_register_addr(context,reg_RSI); + *(sp-9)=*os_context_register_addr(context,reg_RSP)-16; + *(sp-8)=0; + *(sp-7)=*os_context_register_addr(context,reg_RBX); + *(sp-6)=*os_context_register_addr(context,reg_RDX); + *(sp-5)=*os_context_register_addr(context,reg_RCX); + *(sp-4)=*os_context_register_addr(context,reg_RAX); + *(sp-3)=*context_eflags_addr(context); *(sp-2)=*os_context_register_addr(context,reg_RBP); *(sp-1)=*os_context_pc_addr(context); @@ -695,15 +794,15 @@ void arrange_return_to_lisp_function(os_context_t *context, lispobj function) *os_context_register_addr(context,reg_ECX) = 0; *os_context_register_addr(context,reg_EBP) = sp-2; #ifdef __NetBSD__ - *os_context_register_addr(context,reg_UESP) = sp-14; + *os_context_register_addr(context,reg_UESP) = sp-15; #else - *os_context_register_addr(context,reg_ESP) = sp-14; + *os_context_register_addr(context,reg_ESP) = sp-15; #endif #elif defined(LISP_FEATURE_X86_64) *os_context_pc_addr(context) = call_into_lisp; *os_context_register_addr(context,reg_RCX) = 0; *os_context_register_addr(context,reg_RBP) = sp-2; - *os_context_register_addr(context,reg_RSP) = sp-19; + *os_context_register_addr(context,reg_RSP) = sp-20; #else /* this much of the calling convention is common to all non-x86 ports */ @@ -727,12 +826,6 @@ void arrange_return_to_lisp_function(os_context_t *context, lispobj function) void interrupt_thread_handler(int num, siginfo_t *info, void *v_context) { os_context_t *context = (os_context_t*)arch_os_get_context(&v_context); - struct thread *th=arch_os_get_current_thread(); - struct interrupt_data *data= - th ? th->interrupt_data : global_interrupt_data; - if(maybe_defer_handler(interrupt_thread_handler,data,num,info,context)){ - return ; - } arrange_return_to_lisp_function(context,info->si_value.sival_int); } @@ -805,71 +898,26 @@ interrupt_maybe_gc(int signal, siginfo_t *info, void *void_context) struct interrupt_data *data= th ? th->interrupt_data : global_interrupt_data; - if(!foreign_function_call_active && gc_trigger_hit(signal, info, context)){ - clear_auto_gc_trigger(); - if(!maybe_defer_handler - (interrupt_maybe_gc_int,data,signal,info,void_context)) - interrupt_maybe_gc_int(signal,info,void_context); - return 1; + if(!data->pending_handler && !foreign_function_call_active && + gc_trigger_hit(signal, info, context)){ + clear_auto_gc_trigger(); + if(!maybe_defer_handler(interrupt_maybe_gc_int, + data,signal,info,void_context)) + interrupt_maybe_gc_int(signal,info,void_context); + return 1; } return 0; } #endif -void -kludge_sigset_for_gc(sigset_t * set) -{ -#ifndef LISP_FEATURE_GENCGC - /* FIXME: It is not sure if GENCGC is really right here: maybe this - * really affects eg. only Sparc and PPC. And the following KLUDGE - * could really use real fixing as well. - * - * KLUDGE: block some async signals that seem to have the ability - * to hang us in an uninterruptible state during GC -- at least - * part of the time. The main beneficiary of this is SB-SPROF, as - * SIGPROF was almost certain to be eventually triggered at a bad - * moment, rendering it virtually useless. SIGINT and SIGIO from - * user or eg. Slime also seemed to occasionally do this. - * - * The problem this papers over appears to be something going awry - * in SB-UNIX:RECEIVE-PENDING-SIGNALS at the end of the - * WITHOUT-INTERRUPTS in SUB-GC: adding debugging output shows us - * leaving the body of W-I, but never entering sigtrap_handler. - * - * Empirically, it seems that the problem is only triggered if the - * GC was triggered/deferred during a PA section, but this is not - * a sufficient condition: some collections triggered in such a - * manner seem to be able to receive and defer a signal during the - * GC without issues. Likewise empirically, it seems that the - * problem arises more often with floating point code then not. Eg - * (LOOP (* (RANDOM 1.0) (RANDOM 1.0))) will eventually hang if - * run with SB-SPROF on, but (LOOP (FOO (MAKE-LIST 24))) will not. - * All this makes some badnesss in the interaction between PA and - * W-I seem likely, possibly in the form of one or more bad VOPs. - * - * For additional entertainment on the affected platforms we - * currently use an actual illegal instruction to receive pending - * interrupts instead of a trap: whether this has any bearing on - * the matter is unknown. - * - * Apparently CMUCL blocks everything but SIGILL for GC on Sparc, - * possibly for this very reason. - * - * -- NS 2005-05-20 - */ - sigdelset(set, SIGPROF); - sigdelset(set, SIGIO); - sigdelset(set, SIGINT); -#endif -} - /* this is also used by gencgc, in alloc() */ boolean interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) { - sigset_t new; os_context_t *context=(os_context_t *) void_context; + + check_blockables_blocked_or_lose(); fake_foreign_function_call(context); /* SUB-GC may return without GCing if *GC-INHIBIT* is set, in @@ -881,11 +929,10 @@ interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) * and signal a storage condition from there. */ - /* enable some signals before calling into Lisp */ - sigemptyset(&new); - sigaddset_blockable(&new); - kludge_sigset_for_gc(&new); - sigprocmask(SIG_UNBLOCK,&new,0); + /* restore the signal mask from the interrupted context before + * calling into Lisp */ + if (context) + sigprocmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); funcall0(SymbolFunction(SUB_GC)); @@ -913,7 +960,11 @@ undoably_install_low_level_interrupt_handler (int signal, lose("bad signal number %d", signal); } - sa.sa_sigaction = handler; + if (sigismember(&blockable_sigset,signal)) + sa.sa_sigaction = low_level_maybe_now_maybe_later; + else + sa.sa_sigaction = handler; + sigemptyset(&sa.sa_mask); sigaddset_blockable(&sa.sa_mask); sa.sa_flags = SA_SIGINFO | SA_RESTART; @@ -951,8 +1002,8 @@ install_handler(int signal, void handler(int, siginfo_t*, void*)) sigemptyset(&new); sigaddset_blockable(&new); - FSHOW((stderr, "/data->interrupt_low_level_handlers[signal]=%d\n", - data->interrupt_low_level_handlers[signal])); + FSHOW((stderr, "/data->interrupt_low_level_handlers[signal]=%x\n", + (unsigned int)data->interrupt_low_level_handlers[signal])); if (data->interrupt_low_level_handlers[signal]==0) { if (ARE_SAME_HANDLER(handler, SIG_DFL) || ARE_SAME_HANDLER(handler, SIG_IGN)) { @@ -984,6 +1035,9 @@ interrupt_init() { int i; SHOW("entering interrupt_init()"); + sigemptyset(&blockable_sigset); + sigaddset_blockable(&blockable_sigset); + global_interrupt_data=calloc(sizeof(struct interrupt_data), 1); /* Set up high level handler information. */ diff --git a/src/runtime/interrupt.h b/src/runtime/interrupt.h index 642160e..8a99fd4 100644 --- a/src/runtime/interrupt.h +++ b/src/runtime/interrupt.h @@ -14,6 +14,23 @@ #include +/* + * This is a workaround for some slightly silly Linux/GNU Libc + * behaviour: glibc defines sigset_t to support 1024 signals, which is + * more than the kernel. This is usually not a problem, but becomes + * one when we want to save a signal mask from a ucontext, and restore + * it later into another ucontext: the ucontext is allocated on the + * stack by the kernel, so copying a libc-sized sigset_t into it will + * overflow and cause other data on the stack to be corrupted */ +/* FIXME: do not rely on NSIG being a multiple of 8 */ +#define REAL_SIGSET_SIZE_BYTES ((NSIG/8)) + +static inline void +sigcopyset(sigset_t *new, sigset_t *old) +{ + memcpy(new, old, REAL_SIGSET_SIZE_BYTES); +} + /* maximum signal nesting depth * * Note: In CMU CL, this was 4096, but there was no explanation given, diff --git a/src/runtime/mips-arch.c b/src/runtime/mips-arch.c index b07d8da..0ec497b 100644 --- a/src/runtime/mips-arch.c +++ b/src/runtime/mips-arch.c @@ -229,14 +229,8 @@ static void sigtrap_handler(int signal, siginfo_t *info, void *void_context) { os_context_t *context = arch_os_get_context(&void_context); - sigset_t *mask; unsigned int code; - sigset_t ss; - /* Don't disallow recursive breakpoint traps. Otherwise, we can't - use debugger breakpoints anywhere in here. */ - mask = os_context_sigmask_addr(context); - sigprocmask(SIG_SETMASK, mask, NULL); code = ((*(int *) (*os_context_pc_addr(context))) >> 16) & 0x1f; switch (code) { @@ -246,9 +240,6 @@ sigtrap_handler(int signal, siginfo_t *info, void *void_context) case trap_PendingInterrupt: arch_skip_instruction(context); - sigemptyset(&ss); - sigaddset(&ss,SIGTRAP); - sigprocmask(SIG_UNBLOCK,&ss,0); interrupt_handle_pending(context); break; diff --git a/src/runtime/sparc-arch.c b/src/runtime/sparc-arch.c index 9cc1780..736edbe 100644 --- a/src/runtime/sparc-arch.c +++ b/src/runtime/sparc-arch.c @@ -188,7 +188,6 @@ static void sigill_handler(int signal, siginfo_t *siginfo, void *void_context) /* FIXME: Check that this is necessary -- CSR, 2002-07-15 */ os_restore_fp_control(context); #endif - sigprocmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); if ((siginfo->si_code) == ILL_ILLOPC #ifdef LISP_FEATURE_LINUX diff --git a/src/runtime/x86-64-arch.c b/src/runtime/x86-64-arch.c index 1b1238b..323ff39 100644 --- a/src/runtime/x86-64-arch.c +++ b/src/runtime/x86-64-arch.c @@ -192,7 +192,6 @@ sigtrap_handler(int signal, siginfo_t *info, void *void_context) int code = info->si_code; os_context_t *context = (os_context_t*)void_context; unsigned int trap; - sigset_t ss; if (single_stepping && (signal==SIGTRAP)) { @@ -243,9 +242,6 @@ sigtrap_handler(int signal, siginfo_t *info, void *void_context) case trap_PendingInterrupt: FSHOW((stderr, "/\n")); arch_skip_instruction(context); - sigemptyset(&ss); - sigaddset(&ss,SIGTRAP); - sigprocmask(SIG_UNBLOCK,&ss,0); interrupt_handle_pending(context); break; diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index 1751d5d..b806ce1 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -334,6 +334,7 @@ GNAME(post_signal_tramp): popq %rbx popq %rcx popq %rax + popfl leave ret .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp) diff --git a/src/runtime/x86-arch.c b/src/runtime/x86-arch.c index f2a3225..a62d204 100644 --- a/src/runtime/x86-arch.c +++ b/src/runtime/x86-arch.c @@ -193,7 +193,6 @@ sigtrap_handler(int signal, siginfo_t *info, void *void_context) int code = info->si_code; os_context_t *context = (os_context_t*)void_context; unsigned int trap; - sigset_t ss; if (single_stepping && (signal==SIGTRAP)) { @@ -244,10 +243,7 @@ sigtrap_handler(int signal, siginfo_t *info, void *void_context) case trap_PendingInterrupt: FSHOW((stderr, "/\n")); arch_skip_instruction(context); - sigemptyset(&ss); - sigaddset(&ss,SIGTRAP); - sigprocmask(SIG_UNBLOCK,&ss,0); - interrupt_handle_pending(context); + interrupt_handle_pending(context); break; case trap_Halt: diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index d512676..4af6507 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -802,7 +802,8 @@ GNAME(post_signal_tramp): * doesn't exist. This is where call_into_lisp returns when called * using return_to_lisp_function */ addl $12,%esp /* clear call_into_lisp args from stack */ - popa /* restore registers */ + popal /* restore registers */ + popfl leave ret .size GNAME(post_signal_tramp),.-GNAME(post_signal_tramp) diff --git a/tests/threads.impure.lisp b/tests/threads.impure.lisp index 6dca7f9..a312f3b 100644 --- a/tests/threads.impure.lisp +++ b/tests/threads.impure.lisp @@ -187,6 +187,35 @@ (assert (zerop SB-KERNEL:*PSEUDO-ATOMIC-ATOMIC*))))) (terminate-thread c)) +(defparameter *interrupt-count* 0) + +(declaim (notinline check-interrupt-count)) +(defun check-interrupt-count (i) + (declare (optimize (debug 1) (speed 1))) + ;; This used to lose if eflags were not restored after an interrupt. + (unless (typep i 'fixnum) + (error "!!!!!!!!!!!"))) + +(let ((c (make-thread + (lambda () + (handler-bind ((error #'(lambda (cond) + (princ cond) + (sb-debug:backtrace + most-positive-fixnum)))) + (loop (check-interrupt-count *interrupt-count*))))))) + (let ((func (lambda () + (princ ".") + (force-output) + (sb-impl::atomic-incf/symbol *interrupt-count*)))) + (sb-sys:with-pinned-objects (func) + (setq *interrupt-count* 0) + (dotimes (i 100) + (sleep (random 1d0)) + (interrupt-thread c func)) + (sleep 1) + (assert (= 100 *interrupt-count*)) + (terminate-thread c)))) + (format t "~&interrupt test done~%") (let (a-done b-done) diff --git a/version.lisp-expr b/version.lisp-expr index 9c0ad36..b7ba5dd 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.1.28" +"0.9.1.29" -- 1.7.10.4