From: Gabor Melis Date: Mon, 16 Feb 2009 22:01:15 +0000 (+0000) Subject: 1.0.25.37: block deferrables when gc pending in PA X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=aa0ed5a420ea5295d586b3f323b5375d3b506860;p=sbcl.git 1.0.25.37: block deferrables when gc pending in PA Consider this: set_pseudo_atomic_atomic() alloc and set pseudo_atomic_interrupted and GC_PENDING clear_pseudo_atomic_atomic() if (get_pseudo_atomic_interrupted()) gc(); If an async interrupt happens and unwinds between clear_pseudo_atomic_atomic and gc() we have lost a gc trigger until the next alloc. Same with SIG_STOP_FOR_GC instead of alloc. This patch addresses the above problem by blocking deferrables when a gc is requested in a pseudo atomic section. On exit from the protected section there is no race because no async unwinding interrupts can happen. --- diff --git a/src/runtime/cheneygc.c b/src/runtime/cheneygc.c index 2ba6250..78c0ea4 100644 --- a/src/runtime/cheneygc.c +++ b/src/runtime/cheneygc.c @@ -654,6 +654,8 @@ cheneygc_handle_wp_violation(os_context_t *context, void *addr) * the PA section */ SetSymbolValue(GC_PENDING,T,thread); arch_set_pseudo_atomic_interrupted(context); + maybe_save_gc_mask_and_block_deferrables + (os_context_sigmask_addr(context)); } else { maybe_gc(context); } diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 60db5a8..81ef8b2 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -4723,8 +4723,23 @@ general_alloc_internal(long nbytes, int page_type_flag, struct alloc_region *reg /* set things up so that GC happens when we finish the PA * section */ SetSymbolValue(GC_PENDING,T,thread); - if (SymbolValue(GC_INHIBIT,thread) == NIL) - set_pseudo_atomic_interrupted(thread); + if (SymbolValue(GC_INHIBIT,thread) == NIL) { + set_pseudo_atomic_interrupted(thread); +#ifdef LISP_FEATURE_PPC + /* PPC calls alloc() from a trap, look up the most + * recent one and frob that. */ + { + int context_index = + fixnum_value(SymbolValue(FREE_INTERRUPT_CONTEXT_INDEX, + thread)); + os_context_t *context = + thread->interrupt_contexts[context_index - 1]; + maybe_save_gc_mask_and_block_deferrables(context); + } +#else + maybe_save_gc_mask_and_block_deferrables(NULL); +#endif + } } } new_obj = gc_alloc_with_region(nbytes, page_type_flag, region, 0); diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index b9e8b11..78f34fb 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -244,6 +244,48 @@ check_interrupts_enabled_or_lose(os_context_t *context) lose ("in pseudo atomic section\n"); } +/* Save sigset (or the current sigmask if 0) if there is no pending + * handler, because that means that deferabbles are already blocked. + * The purpose is to avoid losing the pending gc signal if a + * deferrable interrupt async unwinds between clearing the pseudo + * atomic and trapping to GC.*/ +void +maybe_save_gc_mask_and_block_deferrables(sigset_t *sigset) +{ + struct thread *thread = arch_os_get_current_thread(); + struct interrupt_data *data = thread->interrupt_data; + sigset_t oldset; + /* Obviously, this function is called when signals may not be + * blocked. Let's make sure we are not interrupted. */ + thread_sigmask(SIG_BLOCK, &blockable_sigset, &oldset); +#ifndef LISP_FEATURE_SB_THREAD + /* With threads a SIG_STOP_FOR_GC and a normal GC may also want to + * block. */ + if (data->gc_blocked_deferrables) + lose("gc_blocked_deferrables already true\n"); +#endif + if ((!data->pending_handler) && + (!data->gc_blocked_deferrables)) { + FSHOW_SIGNAL((stderr,"/setting gc_blocked_deferrables\n")); + data->gc_blocked_deferrables = 1; + if (sigset) { + /* This is the sigmask of some context. */ + sigcopyset(&data->pending_mask, sigset); + sigaddset_deferrable(sigset); + thread_sigmask(SIG_SETMASK,&oldset,0); + return; + } else { + /* Operating on the current sigmask. Save oldset and + * unblock gc signals. In the end, this is equivalent to + * blocking the deferrables. */ + sigcopyset(&data->pending_mask, &oldset); + unblock_gc_signals(); + return; + } + } + thread_sigmask(SIG_SETMASK,&oldset,0); +} + /* Are we leaving WITH-GCING and already running with interrupts * enabled, without the protection of *GC-INHIBIT* T and there is gc * (or stop for gc) pending, but we haven't trapped yet? */ @@ -268,50 +310,50 @@ check_interrupt_context_or_lose(os_context_t *context) struct interrupt_data *data = thread->interrupt_data; int interrupt_deferred_p = (data->pending_handler != 0); int interrupt_pending = (SymbolValue(INTERRUPT_PENDING,thread) != NIL); + sigset_t *sigset = os_context_sigmask_addr(context); /* On PPC pseudo_atomic_interrupted is cleared when coming out of * handle_allocation_trap. */ #if defined(LISP_FEATURE_GENCGC) && !defined(LISP_FEATURE_PPC) -#if 0 int interrupts_enabled = (SymbolValue(INTERRUPTS_ENABLED,thread) != NIL); int gc_inhibit = (SymbolValue(GC_INHIBIT,thread) != NIL); int gc_pending = (SymbolValue(GC_PENDING,thread) == T); int pseudo_atomic_interrupted = get_pseudo_atomic_interrupted(thread); int in_race_p = in_leaving_without_gcing_race_p(thread); -#endif /* In the time window between leaving the *INTERRUPTS-ENABLED* NIL * section and trapping, a SIG_STOP_FOR_GC would see the next * check fail, for this reason sig_stop_for_gc handler does not - * call this function. Plus, there may be interrupt lossage when a - * pseudo atomic is interrupted by a deferrable signal and gc is - * triggered, too. */ -#if 0 - if (interrupt_deferred_p) + * call this function. */ + if (interrupt_deferred_p) { if (!(!interrupts_enabled || pseudo_atomic_interrupted || in_race_p)) - lose("Stray deferred interrupt."); -#endif -#if 0 + lose("Stray deferred interrupt.\n"); + } if (gc_pending) if (!(pseudo_atomic_interrupted || gc_inhibit || in_race_p)) - lose("GC_PENDING, but why?."); + lose("GC_PENDING, but why?\n"); #if defined(LISP_FEATURE_SB_THREAD) { int stop_for_gc_pending = (SymbolValue(STOP_FOR_GC_PENDING,thread) != NIL); if (stop_for_gc_pending) if (!(pseudo_atomic_interrupted || gc_inhibit || in_race_p)) - lose("STOP_FOR_GC_PENDING, but why?."); + lose("STOP_FOR_GC_PENDING, but why?\n"); } #endif #endif -#endif if (interrupt_pending && !interrupt_deferred_p) - lose("INTERRUPT_PENDING but not pending handler."); - if (interrupt_deferred_p) - check_deferrables_blocked_in_sigset_or_lose - (os_context_sigmask_addr(context)); - else - check_deferrables_unblocked_in_sigset_or_lose - (os_context_sigmask_addr(context)); + lose("INTERRUPT_PENDING but not pending handler.\n"); + if ((data->gc_blocked_deferrables) && interrupt_pending) + lose("gc_blocked_deferrables and interrupt pending\n."); + if (data->gc_blocked_deferrables) + check_deferrables_blocked_in_sigset_or_lose(sigset); + if (interrupt_pending || interrupt_deferred_p) + check_deferrables_blocked_in_sigset_or_lose(sigset); + else { + check_deferrables_unblocked_in_sigset_or_lose(sigset); + /* If deferrables are unblocked then we are open to signals + * that run lisp code. */ + check_gc_signals_unblocked_in_sigset_or_lose(sigset); + } } /* When we catch an internal error, should we pass it back to Lisp to @@ -616,27 +658,47 @@ interrupt_handle_pending(os_context_t *context) /* Win32 only needs to handle the GC cases (for now?) */ - struct thread *thread; + struct thread *thread = arch_os_get_current_thread(); + struct interrupt_data *data = thread->interrupt_data; if (arch_pseudo_atomic_atomic(context)) { lose("Handling pending interrupt in pseduo atomic."); } - thread = arch_os_get_current_thread(); - FSHOW_SIGNAL((stderr, "/entering interrupt_handle_pending\n")); check_blockables_blocked_or_lose(); - /* If pseudo_atomic_interrupted is set then the interrupt is going - * to be handled now, ergo it's safe to clear it. */ - arch_clear_pseudo_atomic_interrupted(context); + /* If GC/SIG_STOP_FOR_GC stroke during PA and there was no pending + * handler, then the pending mask was saved and + * gc_blocked_deferrables set. Hence, there can be no pending + * handler and it's safe to restore the pending mask. + * + * Note, that if gc_blocked_deferrables is false we may still have + * to GC. In this case, we are coming out of a WITHOUT-GCING. */ + if (data->gc_blocked_deferrables) { + if (data->pending_handler) + lose("GC blocked deferrables but still got a pending handler."); + if (SymbolValue(GC_INHIBIT,thread)!=NIL) + lose("GC blocked deferrables while GC is inhibited."); + /* restore the saved signal mask from the original signal + * (the one that interrupted us during the critical + * section) into the os_context for the signal we're + * currently in the handler for. This should ensure that + * when we return from the handler the blocked signals are + * unblocked */ + sigcopyset(os_context_sigmask_addr(context), &data->pending_mask); + data->gc_blocked_deferrables = 0; + } if (SymbolValue(GC_INHIBIT,thread)==NIL) { + void *original_pending_handler = data->pending_handler; + #ifdef LISP_FEATURE_SB_THREAD if (SymbolValue(STOP_FOR_GC_PENDING,thread) != NIL) { /* STOP_FOR_GC_PENDING and GC_PENDING are cleared by * the signal handler if it actually stops us. */ + arch_clear_pseudo_atomic_interrupted(context); sig_stop_for_gc_handler(SIG_STOP_FOR_GC,NULL,context); } else #endif @@ -644,46 +706,81 @@ interrupt_handle_pending(os_context_t *context) * is used in SUB-GC as part of the mechanism to supress * recursive gcs.*/ if (SymbolValue(GC_PENDING,thread) == T) { + /* Two reasons for doing this. First, if there is a + * pending handler we don't want to run. Second, we are + * going to clear pseudo atomic interrupted to avoid + * spurious trapping on every allocation in SUB_GC and + * having a pending handler with interrupts enabled and + * without pseudo atomic interrupted breaks an + * invariant. */ + if (data->pending_handler) { + bind_variable(ALLOW_WITH_INTERRUPTS, NIL, thread); + bind_variable(INTERRUPTS_ENABLED, NIL, thread); + } + + arch_clear_pseudo_atomic_interrupted(context); + /* GC_PENDING is cleared in SUB-GC, or if another thread * is doing a gc already we will get a SIG_STOP_FOR_GC and * that will clear it. */ - maybe_gc(context); + if (!maybe_gc(context)) + lose("GC not inhibited but maybe_gc did not GC."); + + if (data->pending_handler) { + unbind(thread); + unbind(thread); + } + } else if (SymbolValue(GC_PENDING,thread) != NIL) { + /* It's not NIL or T so GC_PENDING is :IN-PROGRESS. If + * GC-PENDING is not NIL then we cannot trap on pseudo + * atomic due to GC (see if(GC_PENDING) logic in + * cheneygc.c an gengcgc.c), plus there is a outer + * WITHOUT-INTERRUPTS SUB_GC, so how did we end up + * here? */ + lose("Trapping to run pending handler while GC in progress."); } + check_blockables_blocked_or_lose(); + + /* No GC shall be lost. If SUB_GC triggers another GC then + * that should be handled on the spot. */ + if (SymbolValue(GC_PENDING,thread) != NIL) + lose("GC_PENDING after doing gc."); +#ifdef LISP_FEATURE_SB_THREAD + if (SymbolValue(STOP_FOR_GC_PENDING,thread) != NIL) + lose("STOP_FOR_GC_PENDING after doing gc."); +#endif + /* Check two things. First, that gc does not clobber a handler + * that's already pending. Second, that there is no interrupt + * lossage: if original_pending_handler was NULL then even if + * an interrupt arrived during GC (POST-GC, really) it was + * handled. */ + if (original_pending_handler != data->pending_handler) + lose("pending handler changed in gc: %x -> %d.", + original_pending_handler, data->pending_handler); } #ifndef LISP_FEATURE_WIN32 - /* we may be here only to do the gc stuff, if interrupts are - * enabled run the pending handler */ - if (SymbolValue(INTERRUPTS_ENABLED,thread) != NIL) { - struct interrupt_data *data = thread->interrupt_data; - - /* There may be no pending handler, because it was only a gc - * that had to be executed or because pseudo atomic triggered - * twice for a single interrupt. For the interested reader, - * that may happen if an interrupt hits after the interrupted - * flag is cleared but before pseudo-atomic is set and a - * pseudo atomic is interrupted in that interrupt. */ - if (data->pending_handler) { - - /* If we're here as the result of a pseudo-atomic as opposed - * to WITHOUT-INTERRUPTS, then INTERRUPT_PENDING is already - * NIL, because maybe_defer_handler sets - * PSEUDO_ATOMIC_INTERRUPTED only if interrupts are enabled.*/ - SetSymbolValue(INTERRUPT_PENDING, NIL, thread); - - /* restore the saved signal mask from the original signal (the - * one that interrupted us during the critical section) into the - * os_context for the signal we're currently in the handler for. - * This should ensure that when we return from the handler the - * blocked signals are unblocked */ - sigcopyset(os_context_sigmask_addr(context), &data->pending_mask); - - /* This will break on sparc linux: the deferred handler really wants - * to be called with a void_context */ - run_deferred_handler(data,(void *)context); - } + /* There may be no pending handler, because it was only a gc that + * had to be executed or because Lisp is a bit too eager to call + * DO-PENDING-INTERRUPT. */ + if ((SymbolValue(INTERRUPTS_ENABLED,thread) != NIL) && + (data->pending_handler)) { + /* No matter how we ended up here, clear both + * INTERRUPT_PENDING and pseudo atomic interrupted. It's safe + * because we checked above that there is no GC pending. */ + SetSymbolValue(INTERRUPT_PENDING, NIL, thread); + arch_clear_pseudo_atomic_interrupted(context); + /* Restore the sigmask in the context. */ + sigcopyset(os_context_sigmask_addr(context), &data->pending_mask); + /* This will break on sparc linux: the deferred handler really + * wants to be called with a void_context */ + run_deferred_handler(data,(void *)context); } + /* It is possible that the end of this function was reached + * without never actually doing anything, the tests in Lisp for + * when to call receive-pending-interrupt are not exact. */ + FSHOW_SIGNAL((stderr, "/exiting interrupt_handle_pending\n")); #endif } @@ -779,6 +876,7 @@ interrupt_handle_now(int signal, siginfo_t *info, os_context_t *context) info_sap, context_sap); } else { + /* This cannot happen in sane circumstances. */ FSHOW_SIGNAL((stderr,"/calling C-level handler\n")); @@ -816,6 +914,7 @@ run_deferred_handler(struct interrupt_data *data, void *v_context) void (*pending_handler) (int, siginfo_t*, void*)=data->pending_handler; data->pending_handler=0; + FSHOW_SIGNAL((stderr, "/running deferred handler %p\n", pending_handler)); (*pending_handler)(data->pending_signal,&(data->pending_info), v_context); } @@ -830,6 +929,10 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, if (SymbolValue(INTERRUPT_PENDING,thread) != NIL) lose("interrupt already pending\n"); + if (thread->interrupt_data->pending_handler) + lose("there is a pending handler already (PA)\n"); + if (data->gc_blocked_deferrables) + lose("maybe_defer_handler: gc_blocked_deferrables true\n"); check_interrupt_context_or_lose(context); /* If interrupts are disabled then INTERRUPT_PENDING is set and * not PSEDUO_ATOMIC_INTERRUPTED. This is important for a pseudo @@ -844,8 +947,9 @@ maybe_defer_handler(void *handler, struct interrupt_data *data, store_signal_data_for_later(data,handler,signal,info,context); SetSymbolValue(INTERRUPT_PENDING, T,thread); FSHOW_SIGNAL((stderr, - "/maybe_defer_handler(%x,%d): deferred\n", - (unsigned int)handler,signal)); + "/maybe_defer_handler(%x,%d): deferred (RACE=%d)\n", + (unsigned int)handler,signal, + in_leaving_without_gcing_race_p(thread))); check_interrupt_context_or_lose(context); return 1; } @@ -885,15 +989,16 @@ store_signal_data_for_later (struct interrupt_data *data, void *handler, FSHOW_SIGNAL((stderr, "/store_signal_data_for_later: signal: %d\n", signal)); - if(context) { - /* the signal mask in the context (from before we were - * interrupted) is copied to be restored when - * run_deferred_handler happens. Then the usually-blocked - * signals are added to the mask in the context so that we are - * running with blocked signals when the handler returns */ - sigcopyset(&(data->pending_mask),os_context_sigmask_addr(context)); - sigaddset_deferrable(os_context_sigmask_addr(context)); - } + if(!context) + lose("Null context"); + + /* the signal mask in the context (from before we were + * interrupted) is copied to be restored when run_deferred_handler + * happens. Then the usually-blocked signals are added to the mask + * in the context so that we are running with blocked signals when + * the handler returns */ + sigcopyset(&(data->pending_mask),os_context_sigmask_addr(context)); + sigaddset_deferrable(os_context_sigmask_addr(context)); } static void @@ -941,6 +1046,7 @@ low_level_maybe_now_maybe_later(int signal, siginfo_t *info, void *void_context) #ifdef LISP_FEATURE_SB_THREAD +/* This function must not cons, because that may trigger a GC. */ void sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) { @@ -958,10 +1064,14 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) } else if (arch_pseudo_atomic_atomic(context)) { SetSymbolValue(STOP_FOR_GC_PENDING,T,thread); arch_set_pseudo_atomic_interrupted(context); + maybe_save_gc_mask_and_block_deferrables + (os_context_sigmask_addr(context)); FSHOW_SIGNAL((stderr,"sig_stop_for_gc deferred (PA)\n")); return; } + FSHOW_SIGNAL((stderr, "/sig_stop_for_gc_handler\n")); + /* Not PA and GC not inhibited -- we can stop now. */ /* need the context stored so it can have registers scavenged */ diff --git a/src/runtime/interrupt.h b/src/runtime/interrupt.h index bf9fe42..1a4c2af 100644 --- a/src/runtime/interrupt.h +++ b/src/runtime/interrupt.h @@ -37,6 +37,7 @@ extern sigset_t deferrable_sigset; extern sigset_t blockable_sigset; extern sigset_t gc_sigset; +extern void block_deferrable_signals(void); extern void block_blockable_signals(void); extern void unblock_deferrable_signals(void); extern void unblock_gc_signals(void); @@ -47,6 +48,8 @@ extern void check_blockables_blocked_or_lose(void); extern void check_gc_signals_unblocked_or_lose(void); extern void check_gc_signals_unblocked_in_sigset_or_lose(sigset_t *sigset); +extern void maybe_save_gc_mask_and_block_deferrables(sigset_t *sigset); + static inline void sigcopyset(sigset_t *new, sigset_t *old) { @@ -94,6 +97,11 @@ struct interrupt_data { int pending_signal; siginfo_t pending_info; sigset_t pending_mask; + /* Was pending mask saved for gc request? True if GC_PENDING or + * SIG_STOP_FOR_GC happened in a pseudo atomic with no GC_INHIBIT + * NIL. Both deferrable interrupt handlers and gc are careful not + * to clobber each other's pending_mask. */ + boolean gc_blocked_deferrables; }; extern boolean interrupt_handler_pending_p(void); diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 600d168..8e6c3c2 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -467,6 +467,7 @@ create_thread_struct(lispobj initial_function) { return 0; } th->interrupt_data->pending_handler = 0; + th->interrupt_data->gc_blocked_deferrables = 0; th->no_tls_value_marker=initial_function; th->stepping = NIL; diff --git a/version.lisp-expr b/version.lisp-expr index a77e414..76c2dbc 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.25.36" +"1.0.25.37"