From: Nikodemus Siivola Date: Mon, 26 Mar 2007 10:30:21 +0000 (+0000) Subject: 1.0.4.3: interrupt and GC issues X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=dcf8b8ccc1e15a5c1c6aba00204b7d3a81827acc;p=sbcl.git 1.0.4.3: interrupt and GC issues * Add WITHOUT-INTERRUPTS to WITHOUT-GCING. * Warn if WITH-INTERRUPTS nested in WITHOUT-GCING. * Make sure that SIG_STOP_FOR_GC and SIG_RESUME_FROM_GC are enabled on threaded builds before calling into SUB-GC from the runtime. * Better WITHOUT-GCING, WITHOUT-INTERRUPTS, and WITH-INTERRUPTS documentation. * Internals documentation about POSIX signal safety rules. --- diff --git a/NEWS b/NEWS index c8131fe..34d975b 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,13 @@ ;;;; -*- coding: utf-8; -*- +changes in sbcl-1.0.5 relative to sbcl-1.0.4: + * documentation: unwinding from asyncronous events has been + documented as unsafe. + * documentation: SB-SYS:WITHOUT-GCING has been documented as unsafe + in multithreaded application code. + * bug fix: GC deadlocks from asynchronous interrupts has been fixed + by disabling interrupts for the duration of any + SB-SYS:WITHOUT-GCING section. + changes in sbcl-1.0.4 relative to sbcl-1.0.3: * new platform: experimental support for x86-64/darwin (MacOS). * incompatible change: the thread-safe (on most platforms) getaddrinfo diff --git a/doc/internals/signals.texinfo b/doc/internals/signals.texinfo index ebdc421..1f27d3f 100644 --- a/doc/internals/signals.texinfo +++ b/doc/internals/signals.texinfo @@ -92,9 +92,36 @@ filling up the queue and then a gc hits and tries to send Signal handlers should automatically restore errno and fp state. Currently, this is not the case. -Furthormore, while @code{arrange_return_to_lisp_function} exits, most -signal handlers invoke unsafe functions without hesitation: gc and all -lisp level handlers think nothing of it. +@subsection POSIX -- Letter and Spirit + +POSIX restricts signal handlers to a use only a narrow subset of POSIX +functions, and declares anything else to have undefined semantics. + +Apparently the real reason is that a signal handler is potentially +interrupting a POSIX call: so the signal safety requirement is really +a re-entrancy requirement. We can work around the letter of the +standard by arranging to handle the interrupt when the signal handler +returns (see: @code{arrange_return_to_lisp_function}.) This does, +however, in no way protect us from the real issue of re-entrancy: even +though we would no longer be in a signal handler, we might still be in +the middle of an interrupted POSIX call. + +For some signals this appears to be a non-issue: @code{SIGSEGV} and +other semi-synchronous signals are raised by our code for our code, +and so we can be sure that we are not interrupting a POSIX call with +any of them. + +For asynchronous signals like @code{SIGALARM} and @code{SIGINT} this +is a real issue. + +The right thing to do in multithreaded builds would probably be to use +POSIX semaphores (which are signal safe) to inform a separate handler +thread about such asynchronous events. In single-threaded builds there +does not seem to be any other option aside from generally blocking +asynch signals and listening for them every once and a while at safe +points. Neither of these is implemented as of SBCL 1.0.4. + +Currently all our handlers invoke unsafe functions without hesitation. @node Programming with signal handling in mind @section Programming with signal handling in mind diff --git a/src/code/signal.lisp b/src/code/signal.lisp index 55eb7ca..9d2290a 100644 --- a/src/code/signal.lisp +++ b/src/code/signal.lisp @@ -42,7 +42,11 @@ (sb!xc:defmacro without-interrupts (&body body) #!+sb-doc - "Execute BODY in a context impervious to interrupts." + "Execute BODY with all deferrable interrupts deferred. Deferrable interrupts +include most blockable POSIX signals, and SB-THREAD:INTERRUPT-THREAD. Does not +interfere with garbage collection, and unlike in many traditional Lisps using +userspace threads, in SBCL WITHOUT-INTERRUPTS does not inhibit scheduling of +other threads." (let ((name (gensym "WITHOUT-INTERRUPTS-BODY-"))) `(flet ((,name () ,@body)) (if *interrupts-enabled* @@ -66,12 +70,16 @@ (sb!xc:defmacro with-interrupts (&body body) #!+sb-doc "Allow interrupts while executing BODY. As interrupts are normally allowed, - this is only useful inside a WITHOUT-INTERRUPTS." +this is only useful inside a SB-SYS:WITHOUT-INTERRUPTS. Signals a runtime +warning if used inside the dynamic countour of SB-SYS:WITHOUT-GCING." (let ((name (gensym))) `(flet ((,name () ,@body)) (if *interrupts-enabled* (,name) - (let ((*interrupts-enabled* t)) - (when *interrupt-pending* - (receive-pending-interrupt)) - (,name)))))) + (progn + (when sb!kernel:*gc-inhibit* + (warn "Re-enabling interrupts while GC is inhibited.")) + (let ((*interrupts-enabled* t)) + (when *interrupt-pending* + (receive-pending-interrupt)) + (,name))))))) diff --git a/src/code/sysmacs.lisp b/src/code/sysmacs.lisp index b61e1b3..3556bc1 100644 --- a/src/code/sysmacs.lisp +++ b/src/code/sysmacs.lisp @@ -32,14 +32,26 @@ (defmacro without-gcing (&body body) #!+sb-doc - "Executes the forms in the body without doing a garbage -collection. It inhibits both automatically and explicitly triggered -gcs. Finally, upon leaving the BODY if gc is not inhibited it runs the -pending gc. Similarly, if gc is triggered in another thread then it -waits until gc is enabled in this thread." + "Executes the forms in the body without doing a garbage collection. It +inhibits both automatically and explicitly triggered collections. Finally, +upon leaving the BODY if gc is not inhibited it runs the pending gc. +Similarly, if gc is triggered in another thread then it waits until gc is +enabled in this thread. + +Implies SB-SYS:WITHOUT-INTERRUPTS for BODY, and causes any nested +SB-SYS:WITH-INTERRUPTS to signal a warning during execution of the BODY. + +Should be used with great care, and not at all in multithreaded application +code: Any locks that are ever acquired while GC is inhibited need to be always +held with GC inhibited to prevent deadlocks: if T1 holds the lock and is +stopped for GC while T2 is waiting for the lock inside WITHOUT-GCING the +system will be deadlocked. Since SBCL does not currently document its internal +locks, application code can never be certain that this invariant is +maintained." `(unwind-protect - (let ((*gc-inhibit* t)) - ,@body) + (without-interrupts + (let ((*gc-inhibit* t)) + ,@body)) ;; the test is racy, but it can err only on the overeager side (sb!kernel::maybe-handle-pending-gc))) diff --git a/src/code/timer.lisp b/src/code/timer.lisp index 2d4c67a..5352385 100644 --- a/src/code/timer.lisp +++ b/src/code/timer.lisp @@ -361,8 +361,26 @@ triggers." (defmacro sb!ext:with-timeout (expires &body body) #!+sb-doc - "Execute the body, asynchronously interrupting it and signalling a -TIMEOUT condition after at least EXPIRES seconds have passed." + "Execute the body, asynchronously interrupting it and signalling a TIMEOUT +condition after at least EXPIRES seconds have passed. + +Note that it is never safe to unwind from an asynchronous condition. Consider: + + (defun call-with-foo (function) + (let (foo) + (unwind-protect + (progn + (setf foo (get-foo)) + (funcall function foo)) + (when foo + (release-foo foo))))) + +If TIMEOUT occurs after GET-FOO has executed, but before the assignment, then +RELEASE-FOO will be missed. While individual sites like this can be made proof +against asynchronous unwinds, this doesn't solve the fundamental issue, as all +the frames potentially unwound through need to be proofed, which includes both +system and application code -- and in essence proofing everything will make +the system uninterruptible." (with-unique-names (timer) ;; FIXME: a temporary compatibility workaround for CLX, if unsafe ;; unwinds are handled revisit it. diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index 00c920e..eb1e834 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -123,7 +123,7 @@ static sigset_t blockable_sigset; #endif void -check_blockables_blocked_or_lose() +check_blockables_blocked_or_lose(void) { #if !defined(LISP_FEATURE_WIN32) /* Get the current sigmask, by blocking the empty set. */ @@ -138,6 +138,41 @@ check_blockables_blocked_or_lose() #endif } +void +check_gc_signals_unblocked_or_lose(void) +{ +#ifdef LISP_FEATURE_SB_THREAD +# if !defined(LISP_FEATURE_WIN32) + /* Get the current sigmask, by blocking the empty set. */ + sigset_t empty,current; + sigemptyset(&empty); + thread_sigmask(SIG_BLOCK, &empty, ¤t); + if (sigismember(¤t, SIG_STOP_FOR_GC)) + lose("SIG_STOP_FOR_GC blocked in thread %p at a bad place\n", + arch_os_get_current_thread()); +# if defined(SIG_RESUME_FROM_GC) + if (sigismember(¤t, SIG_RESUME_FROM_GC)) + lose("SIG_RESUME_FROM_GC blocked in thread %p at a bad place\n", + arch_os_get_current_thread()); +# endif +# endif +#endif +} + +void +unblock_gc_signals(void) +{ +#ifdef LISP_FEATURE_SB_THREAD + sigset_t new; + sigemptyset(&new); +#if defined(SIG_RESUME_FROM_GC) + sigaddset(&new,SIG_RESUME_FROM_GC); +#endif + sigaddset(&new,SIG_STOP_FOR_GC); + thread_sigmask(SIG_UNBLOCK,&new,0); +#endif +} + inline static void check_interrupts_enabled_or_lose(os_context_t *context) { @@ -1147,22 +1182,14 @@ interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) * outer context. */ #ifndef LISP_FEATURE_WIN32 - if(SymbolValue(INTERRUPTS_ENABLED,thread)!=NIL) + if(SymbolValue(INTERRUPTS_ENABLED,thread)!=NIL) { thread_sigmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); -#ifdef LISP_FEATURE_SB_THREAD - else { - sigset_t new; - sigemptyset(&new); -#if defined(SIG_RESUME_FROM_GC) - sigaddset(&new,SIG_RESUME_FROM_GC); -#endif - sigaddset(&new,SIG_STOP_FOR_GC); - thread_sigmask(SIG_UNBLOCK,&new,0); + check_gc_signals_unblocked_or_lose(); } -#endif + else + unblock_gc_signals(); #endif funcall0(SymbolFunction(SUB_GC)); - undo_fake_foreign_function_call(context); return 1; } diff --git a/version.lisp-expr b/version.lisp-expr index 8ed9c1c..9682fa0 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.4.2" +"1.0.4.3"