From c01ff86b012283af04641a02e45f066aa7cdb10c Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Sun, 29 May 2005 21:08:45 +0000 Subject: [PATCH] 0.9.1.7: "fix" SB-SPROF on non-gencgc platforms * block some "potentially dangerous, but not really important" signals for GC on non-gencgc platforms. see kludge_sigset_for_gc for commentary. --- NEWS | 1 + src/code/signal.lisp | 11 ++++++-- src/runtime/interrupt.c | 64 ++++++++++++++++++++++++++++++++++++++++++++--- src/runtime/parse.c | 1 + src/runtime/ppc-arch.c | 3 ++- version.lisp-expr | 2 +- 6 files changed, 74 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 0d8a1b8..d553f1e 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,5 @@ changes in sbcl-0.9.2 relative to sbcl-0.9.1: + * SB-SPROF now works (more) reliably on non-GENCGC platforms. * fixed some lockups due to gc/thread interaction * dynamic space size on PPC has been increased to 768Mb. (thanks to Cyrus Harmon) diff --git a/src/code/signal.lisp b/src/code/signal.lisp index a4c5298..91d7a13 100644 --- a/src/code/signal.lisp +++ b/src/code/signal.lisp @@ -47,13 +47,20 @@ `(flet ((,name () ,@body)) (if *interrupts-enabled* (unwind-protect - (let ((*interrupts-enabled* nil)) - (,name)) + (let ((*interrupts-enabled* nil)) + (,name)) ;; FIXME: Does it matter that an interrupt coming in here ;; could be executed before any of the pending interrupts? ;; Or do incoming interrupts have the good grace to check ;; whether interrupts are pending before executing themselves ;; immediately? + ;; + ;; FIXME: from the kernel's POV we've already handled the + ;; signal the moment we return from the handler having + ;; decided to defer it, meaning that the kernel is free + ;; to deliver _more_ signals to us... of which we save, + ;; and subsequently handle here, only the last one. + ;; PSEUDO-ATOMIC has the same issue. -- NS 2005-05-19 (when *interrupt-pending* (receive-pending-interrupt))) (,name))))) diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 502c2cd..c2ef993 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -305,8 +305,9 @@ interrupt_handle_pending(os_context_t *context) thread=arch_os_get_current_thread(); data=thread->interrupt_data; - /* FIXME I'm not altogether sure this is appropriate if we're - * here as the result of a pseudo-atomic */ + + /* FIXME: This is almost certainly wrong if we're here as the + * result of a pseudo-atomic as opposed to WITHOUT-INTERRUPTS. */ SetSymbolValue(INTERRUPT_PENDING, NIL,thread); /* restore the saved signal mask from the original signal (the @@ -816,6 +817,53 @@ interrupt_maybe_gc(int signal, siginfo_t *info, void *void_context) #endif +void +kludge_sigset_for_gc(sigset_t * set) +{ +#ifndef LISP_FEATURE_GENCGC + /* FIXME: It is not sure if GENCGC is really right here: maybe this + * really affects eg. only Sparc and PPC. And the following KLUDGE + * could really use real fixing as well. + * + * KLUDGE: block some async signals that seem to have the ability + * to hang us in an uninterruptible state during GC -- at least + * part of the time. The main beneficiary of this is SB-SPROF, as + * SIGPROF was almost certain to be eventually triggered at a bad + * moment, rendering it virtually useless. SIGINT and SIGIO from + * user or eg. Slime also seemed to occasionally do this. + * + * The problem this papers over appears to be something going awry + * in SB-UNIX:RECEIVE-PENDING-SIGNALS at the end of the + * WITHOUT-INTERRUPTS in SUB-GC: adding debugging output shows us + * leaving the body of W-I, but never entering sigtrap_handler. + * + * Empirically, it seems that the problem is only triggered if the + * GC was triggered/deferred during a PA section, but this is not + * a sufficient condition: some collections triggered in such a + * manner seem to be able to receive and defer a signal during the + * GC without issues. Likewise empirically, it seems that the + * problem arises more often with floating point code then not. Eg + * (LOOP (* (RANDOM 1.0) (RANDOM 1.0))) will eventually hang if + * run with SB-SPROF on, but (LOOP (FOO (MAKE-LIST 24))) will not. + * All this makes some badnesss in the interaction between PA and + * W-I seem likely, possibly in the form of one or more bad VOPs. + * + * For additional entertainment on the affected platforms we + * currently use an actual illegal instruction to receive pending + * interrupts instead of a trap: whether this has any bearing on + * the matter is unknown. + * + * Apparently CMUCL blocks everything but SIGILL for GC on Sparc, + * possibly for this very reason. + * + * -- NS 2005-05-20 + */ + sigdelset(set, SIGPROF); + sigdelset(set, SIGIO); + sigdelset(set, SIGINT); +#endif +} + /* this is also used by gencgc, in alloc() */ boolean interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) @@ -823,16 +871,24 @@ interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) sigset_t new; os_context_t *context=(os_context_t *) void_context; fake_foreign_function_call(context); + /* SUB-GC may return without GCing if *GC-INHIBIT* is set, in * which case we will be running with no gc trigger barrier * thing for a while. But it shouldn't be long until the end - * of WITHOUT-GCING. */ + * of WITHOUT-GCING. + * + * FIXME: It would be good to protect the end of dynamic space + * and signal a storage condition from there. + */ + /* enable some signals before calling into Lisp */ sigemptyset(&new); sigaddset_blockable(&new); - /* enable signals before calling into Lisp */ + kludge_sigset_for_gc(&new); sigprocmask(SIG_UNBLOCK,&new,0); + funcall0(SymbolFunction(SUB_GC)); + undo_fake_foreign_function_call(context); return 1; } diff --git a/src/runtime/parse.c b/src/runtime/parse.c index fd89930..8054641 100644 --- a/src/runtime/parse.c +++ b/src/runtime/parse.c @@ -27,6 +27,7 @@ #include "interrupt.h" #include "lispregs.h" #include "monitor.h" +#include "validate.h" #include "arch.h" #include "search.h" #include "thread.h" diff --git a/src/runtime/ppc-arch.c b/src/runtime/ppc-arch.c index fb5958e..df302e1 100644 --- a/src/runtime/ppc-arch.c +++ b/src/runtime/ppc-arch.c @@ -158,7 +158,8 @@ sigtrap_handler(int signal, siginfo_t *siginfo, os_context_t *context) break; case trap_PendingInterrupt: - /* when do we run this branch instead of the twlti code above? */ + /* This is supposed run after WITHOUT-INTERRUPTS if there + * were pending signals. */ arch_skip_instruction(context); interrupt_handle_pending(context); break; diff --git a/version.lisp-expr b/version.lisp-expr index e1e3aa6..3044841 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.9.1.6" +"0.9.1.7" -- 1.7.10.4