From: Nikodemus Siivola Date: Tue, 15 May 2007 14:14:33 +0000 (+0000) Subject: 1.0.5.49: interrupt & GC & PA handling X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=b5c69cfe906a31ae57bb0f18c67af9d2eaa1dfef;p=sbcl.git 1.0.5.49: interrupt & GC & PA handling * On all FOREIGN_FUNCTION_CALL_FLAG platforms arch_pseudo_atomic_atomic is always accompanied by !foreign_function_call_active. For clarity move checking it to the a_p_a_a definitions on platforms that need it. TEST ALERT: This touches all the non-x86oid platforms, but I have not been able to test on them -- so a thinko/typo build-breakage is not impossible. * If we somehow ended up in interrupt_handle_pending in the middle of a PA section we might GC in the middle of it, or lose PA interrupted flags. Now we punt straight up if we get there in the middle of PA. * If we handled a pending interrupt outside GC-INHIBIT, but inside a PA section, with both GC_PENDING and SIG_STOP_FOR_GC_PENDING was true, we would be left running with GC_PENDING cleared without actually taking any action. The previous item actually fixes this, but for clarity make sig_stop_for_gc_handler clear the GC_PENDING and SIG_STOP_FOR_GC_PENDING iff it actually stops the thread. * The single UWP implementation for WITHOUT-GCING was incorrect: if we had a GC but no interrupts pending when checking for pending interrupts and GCs, and caught an asynch unwind at that point we could be left running with the GC pending (and/or related signals blocked). Due to the increased cost of WITHOUT-GCING, check first if GC is already disabled before taking the UWP code path. --- diff --git a/src/code/sysmacs.lisp b/src/code/sysmacs.lisp index aca1cc5..ad91f85 100644 --- a/src/code/sysmacs.lisp +++ b/src/code/sysmacs.lisp @@ -48,14 +48,30 @@ stopped for GC while T2 is waiting for the lock inside WITHOUT-GCING the system will be deadlocked. Since SBCL does not currently document its internal locks, application code can never be certain that this invariant is maintained." - `(unwind-protect - (let* ((*interrupts-enabled* nil) - (*gc-inhibit* t)) - ,@body) - (when (or (and *interrupts-enabled* *interrupt-pending*) - (and (not *gc-inhibit*) - (or *gc-pending* #!+sb-thread *stop-for-gc-pending*))) - (sb!unix::receive-pending-interrupt)))) + (with-unique-names (without-gcing-body) + `(flet ((,without-gcing-body () + ,@body)) + (if *gc-inhibit* + (,without-gcing-body) + (without-interrupts + ;; We need to disable interrupts before disabling GC, so that + ;; signal handlers using locks don't accidentally try to grab + ;; them with GC inhibited. + ;; + ;; It would be nice to implement this with just a single UWP, but + ;; unfortunately it seems that it cannot be done: the naive + ;; solution of binding both *INTERRUPTS-ENABLED* and + ;; *GC-INHIBIT*, and checking for both pending GC and interrupts + ;; in the cleanup breaks if we have a GC pending, but no + ;; interrupts, and we receive an asynch unwind while checking for + ;; the pending GC: we unwind before handling the pending GC, and + ;; will be left running with further GCs blocked due to the GC + ;; pending flag. + (unwind-protect + (let ((*gc-inhibit* t)) + (,without-gcing-body)) + (when (or *gc-pending* #!+sb-thread *stop-for-gc-pending*) + (sb!unix::receive-pending-interrupt)))))))) ;;; EOF-OR-LOSE is a useful macro that handles EOF. diff --git a/src/runtime/alpha-arch.c b/src/runtime/alpha-arch.c index 5c71fa7..3f3dafe 100644 --- a/src/runtime/alpha-arch.c +++ b/src/runtime/alpha-arch.c @@ -98,7 +98,18 @@ arch_internal_error_arguments(os_context_t *context) boolean arch_pseudo_atomic_atomic(os_context_t *context) { - return ((*os_context_register_addr(context,reg_ALLOC)) & 1); + /* FIXME: this foreign_function_call_active test is dubious at + * best. If a foreign call is made in a pseudo atomic section + * (?) or more likely a pseudo atomic section is in a foreign + * call then an interrupt is executed immediately. Maybe it + * has to do with C code not maintaining pseudo atomic + * properly. MG - 2005-08-10 + * + * The foreign_function_call_active used to live at each call-site + * to arch_pseudo_atomic_atomic, but this seems clearer. + * --NS 2007-05-15 */ + return (!foreign_function_call_active) + && ((*os_context_register_addr(context,reg_ALLOC)) & 1); } void arch_set_pseudo_atomic_interrupted(os_context_t *context) diff --git a/src/runtime/gc-common.c b/src/runtime/gc-common.c index 11b3add..4f95952 100644 --- a/src/runtime/gc-common.c +++ b/src/runtime/gc-common.c @@ -2474,7 +2474,11 @@ maybe_gc(os_context_t *context) * blocked? Just skip the GC: we can't set GC_PENDING, because * that would block the next attempt, and we don't know when * we'd next check for it -- and it's hard to be sure that - * unblocking would be safe. */ + * unblocking would be safe. + * + * FIXME: This is not actually much better: we may already have + * GC_PENDING set, and presumably our caller assumes that we will + * clear it. Perhaps we should, even though we don't actually GC? */ if (sigismember(context_sigmask,SIG_STOP_FOR_GC)) { undo_fake_foreign_function_call(context); return 1; diff --git a/src/runtime/hppa-arch.c b/src/runtime/hppa-arch.c index 46fdb34..bf9a464 100644 --- a/src/runtime/hppa-arch.c +++ b/src/runtime/hppa-arch.c @@ -76,7 +76,18 @@ unsigned char *arch_internal_error_arguments(os_context_t *context) boolean arch_pseudo_atomic_atomic(os_context_t *context) { - return ((*os_context_register_addr(context,reg_ALLOC)) & 4); + /* FIXME: this foreign_function_call_active test is dubious at + * best. If a foreign call is made in a pseudo atomic section + * (?) or more likely a pseudo atomic section is in a foreign + * call then an interrupt is executed immediately. Maybe it + * has to do with C code not maintaining pseudo atomic + * properly. MG - 2005-08-10 + * + * The foreign_function_call_active used to live at each call-site + * to arch_pseudo_atomic_atomic, but this seems clearer. + * --NS 2007-05-15 */ + return (!foreign_function_call_active) + && ((*os_context_register_addr(context,reg_ALLOC)) & 4); } void arch_set_pseudo_atomic_interrupted(os_context_t *context) diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 86b7413..78bafeb 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -155,11 +155,7 @@ check_interrupts_enabled_or_lose(os_context_t *context) struct thread *thread=arch_os_get_current_thread(); if (SymbolValue(INTERRUPTS_ENABLED,thread) == NIL) lose("interrupts not enabled\n"); - if ( -#ifdef FOREIGN_FUNCTION_CALL_FLAG - (!foreign_function_call_active) && -#endif - arch_pseudo_atomic_atomic(context)) + if (arch_pseudo_atomic_atomic(context)) lose ("in pseudo atomic section\n"); } @@ -397,7 +393,19 @@ interrupt_handle_pending(os_context_t *context) /* Win32 only needs to handle the GC cases (for now?) */ - struct thread *thread = arch_os_get_current_thread(); + struct thread *thread; + + /* Punt if in PA section, marking it as interrupted. This can + * happenat least if we pick up a GC request while in a + * WITHOUT-GCING with an outer PA -- it is not immediately clear + * to me that this should/could ever happen, but better safe then + * sorry. --NS 2007-05-15 */ + if (arch_pseudo_atomic_atomic(context)) { + arch_set_pseudo_atomic_interrupted(context); + return; + } + + thread = arch_os_get_current_thread(); FSHOW_SIGNAL((stderr, "/entering interrupt_handle_pending\n")); @@ -410,10 +418,8 @@ interrupt_handle_pending(os_context_t *context) if (SymbolValue(GC_INHIBIT,thread)==NIL) { #ifdef LISP_FEATURE_SB_THREAD if (SymbolValue(STOP_FOR_GC_PENDING,thread) != NIL) { - /* another thread has already initiated a gc, this attempt - * might as well be cancelled */ - SetSymbolValue(GC_PENDING,NIL,thread); - SetSymbolValue(STOP_FOR_GC_PENDING,NIL,thread); + /* STOP_FOR_GC_PENDING and GC_PENDING are cleared by + * the signal handler if it actually stops us. */ sig_stop_for_gc_handler(SIG_STOP_FOR_GC,NULL,context); } else #endif @@ -429,12 +435,7 @@ interrupt_handle_pending(os_context_t *context) #ifndef LISP_FEATURE_WIN32 /* we may be here only to do the gc stuff, if interrupts are * enabled run the pending handler */ - if (!((SymbolValue(INTERRUPTS_ENABLED,thread) == NIL) || - ( -#ifdef FOREIGN_FUNCTION_CALL_FLAG - (!foreign_function_call_active) && -#endif - arch_pseudo_atomic_atomic(context)))) { + if (SymbolValue(INTERRUPTS_ENABLED,thread) != NIL) { struct interrupt_data *data = thread->interrupt_data; /* There may be no pending handler, because it was only a gc @@ -449,7 +450,7 @@ interrupt_handle_pending(os_context_t *context) * to WITHOUT-INTERRUPTS, then INTERRUPT_PENDING is already * NIL, because maybe_defer_handler sets * PSEUDO_ATOMIC_INTERRUPTED only if interrupts are enabled.*/ - SetSymbolValue(INTERRUPT_PENDING, NIL,thread); + SetSymbolValue(INTERRUPT_PENDING, NIL, thread); /* restore the saved signal mask from the original signal (the * one that interrupted us during the critical section) into the @@ -631,20 +632,10 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, (unsigned long)thread->os_thread)); return 1; } - /* a slightly confusing test. arch_pseudo_atomic_atomic() doesn't + /* a slightly confusing test. arch_pseudo_atomic_atomic() doesn't * actually use its argument for anything on x86, so this branch * may succeed even when context is null (gencgc alloc()) */ - if ( -#ifdef FOREIGN_FUNCTION_CALL_FLAG - /* FIXME: this foreign_function_call_active test is dubious at - * best. If a foreign call is made in a pseudo atomic section - * (?) or more likely a pseudo atomic section is in a foreign - * call then an interrupt is executed immediately. Maybe it - * has to do with C code not maintaining pseudo atomic - * properly. MG - 2005-08-10 */ - (!foreign_function_call_active) && -#endif - arch_pseudo_atomic_atomic(context)) { + if (arch_pseudo_atomic_atomic(context)) { store_signal_data_for_later(data,handler,signal,info,context); arch_set_pseudo_atomic_interrupted(context); FSHOW_SIGNAL((stderr, @@ -748,53 +739,67 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) struct thread *thread=arch_os_get_current_thread(); sigset_t ss; - if ((arch_pseudo_atomic_atomic(context) || - SymbolValue(GC_INHIBIT,thread) != NIL)) { + if (arch_pseudo_atomic_atomic(context)) { SetSymbolValue(STOP_FOR_GC_PENDING,T,thread); - if (SymbolValue(GC_INHIBIT,thread) == NIL) - arch_set_pseudo_atomic_interrupted(context); - FSHOW_SIGNAL((stderr,"thread=%lu sig_stop_for_gc deferred\n", + arch_set_pseudo_atomic_interrupted(context); + FSHOW_SIGNAL((stderr,"thread=%lu sig_stop_for_gc deferred (PA)\n", thread->os_thread)); - } else { - /* need the context stored so it can have registers scavenged */ - fake_foreign_function_call(context); + return; + } + else if (SymbolValue(GC_INHIBIT,thread) != NIL) { + SetSymbolValue(STOP_FOR_GC_PENDING,T,thread); + FSHOW_SIGNAL((stderr, + "thread=%lu sig_stop_for_gc deferred (*GC-INHIBIT*)\n", + thread->os_thread)); + return; + } - sigfillset(&ss); /* Block everything. */ - thread_sigmask(SIG_BLOCK,&ss,0); + /* Not PA and GC not inhibited -- we can stop now. */ - if(thread->state!=STATE_RUNNING) { - lose("sig_stop_for_gc_handler: wrong thread state: %ld\n", - fixnum_value(thread->state)); - } - thread->state=STATE_SUSPENDED; - FSHOW_SIGNAL((stderr,"thread=%lu suspended\n",thread->os_thread)); + /* need the context stored so it can have registers scavenged */ + fake_foreign_function_call(context); + + /* Block everything. */ + sigfillset(&ss); + thread_sigmask(SIG_BLOCK,&ss,0); + + /* Not pending anymore. */ + SetSymbolValue(GC_PENDING,NIL,thread); + SetSymbolValue(STOP_FOR_GC_PENDING,NIL,thread); + + if(thread->state!=STATE_RUNNING) { + lose("sig_stop_for_gc_handler: wrong thread state: %ld\n", + fixnum_value(thread->state)); + } + + thread->state=STATE_SUSPENDED; + FSHOW_SIGNAL((stderr,"thread=%lu suspended\n",thread->os_thread)); #if defined(SIG_RESUME_FROM_GC) - sigemptyset(&ss); sigaddset(&ss,SIG_RESUME_FROM_GC); + sigemptyset(&ss); sigaddset(&ss,SIG_RESUME_FROM_GC); #else - sigemptyset(&ss); sigaddset(&ss,SIG_STOP_FOR_GC); + sigemptyset(&ss); sigaddset(&ss,SIG_STOP_FOR_GC); #endif - /* It is possible to get SIGCONT (and probably other - * non-blockable signals) here. */ + /* It is possible to get SIGCONT (and probably other non-blockable + * signals) here. */ #ifdef SIG_RESUME_FROM_GC - { - int sigret; - do { sigwait(&ss, &sigret); } - while (sigret != SIG_RESUME_FROM_GC); - } + { + int sigret; + do { sigwait(&ss, &sigret); } + while (sigret != SIG_RESUME_FROM_GC); + } #else - while (sigwaitinfo(&ss,0) != SIG_STOP_FOR_GC); + while (sigwaitinfo(&ss,0) != SIG_STOP_FOR_GC); #endif - FSHOW_SIGNAL((stderr,"thread=%lu resumed\n",thread->os_thread)); - if(thread->state!=STATE_RUNNING) { - lose("sig_stop_for_gc_handler: wrong thread state on wakeup: %ld\n", - fixnum_value(thread->state)); - } - - undo_fake_foreign_function_call(context); + FSHOW_SIGNAL((stderr,"thread=%lu resumed\n",thread->os_thread)); + if(thread->state!=STATE_RUNNING) { + lose("sig_stop_for_gc_handler: wrong thread state on wakeup: %ld\n", + fixnum_value(thread->state)); } + + undo_fake_foreign_function_call(context); } #endif diff --git a/src/runtime/mips-arch.c b/src/runtime/mips-arch.c index 6da8579..ac3b0ff 100644 --- a/src/runtime/mips-arch.c +++ b/src/runtime/mips-arch.c @@ -264,7 +264,18 @@ arch_internal_error_arguments(os_context_t *context) boolean arch_pseudo_atomic_atomic(os_context_t *context) { - return os_context_register(context, reg_ALLOC) & 1; + /* FIXME: this foreign_function_call_active test is dubious at + * best. If a foreign call is made in a pseudo atomic section + * (?) or more likely a pseudo atomic section is in a foreign + * call then an interrupt is executed immediately. Maybe it + * has to do with C code not maintaining pseudo atomic + * properly. MG - 2005-08-10 + * + * The foreign_function_call_active used to live at each call-site + * to arch_pseudo_atomic_atomic, but this seems clearer. + * --NS 2007-05-15 */ + return (!foreign_function_call_active) + && os_context_register(context, reg_ALLOC) & 1; } void diff --git a/src/runtime/ppc-arch.c b/src/runtime/ppc-arch.c index dca032e..e2d5c67 100644 --- a/src/runtime/ppc-arch.c +++ b/src/runtime/ppc-arch.c @@ -84,7 +84,18 @@ arch_internal_error_arguments(os_context_t *context) boolean arch_pseudo_atomic_atomic(os_context_t *context) { - return ((*os_context_register_addr(context,reg_ALLOC)) & 4); + /* FIXME: this foreign_function_call_active test is dubious at + * best. If a foreign call is made in a pseudo atomic section + * (?) or more likely a pseudo atomic section is in a foreign + * call then an interrupt is executed immediately. Maybe it + * has to do with C code not maintaining pseudo atomic + * properly. MG - 2005-08-10 + * + * The foreign_function_call_active used to live at each call-site + * to arch_pseudo_atomic_atomic, but this seems clearer. + * --NS 2007-05-15 */ + return (!foreign_function_call_active) + && ((*os_context_register_addr(context,reg_ALLOC)) & 4); } void diff --git a/src/runtime/sparc-arch.c b/src/runtime/sparc-arch.c index 9095f02..f32c06d 100644 --- a/src/runtime/sparc-arch.c +++ b/src/runtime/sparc-arch.c @@ -101,7 +101,18 @@ unsigned char *arch_internal_error_arguments(os_context_t *context) boolean arch_pseudo_atomic_atomic(os_context_t *context) { - return ((*os_context_register_addr(context,reg_ALLOC)) & 4); + /* FIXME: this foreign_function_call_active test is dubious at + * best. If a foreign call is made in a pseudo atomic section + * (?) or more likely a pseudo atomic section is in a foreign + * call then an interrupt is executed immediately. Maybe it + * has to do with C code not maintaining pseudo atomic + * properly. MG - 2005-08-10 + * + * The foreign_function_call_active used to live at each call-site + * to arch_pseudo_atomic_atomic, but this seems clearer. + * --NS 2007-05-15 */ + return (!foreign_function_call_active) + && ((*os_context_register_addr(context,reg_ALLOC)) & 4); } void arch_set_pseudo_atomic_interrupted(os_context_t *context) diff --git a/version.lisp-expr b/version.lisp-expr index 6a181b0..47045ec 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.5.48" +"1.0.5.49"