1.0.25.30: INTERRUPT-THREAD without RT signals

author Gabor Melis <mega@hotpop.com>

Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)

committer Gabor Melis <mega@hotpop.com>

Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)
author Gabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)
committer Gabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)
diff --git a/NEWS b/NEWS

index e175a0f..9a946f9 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ changes in sbcl-1.0.26 relative to 1.0.25:
      fault, etc. In the absence of --lose-on-corruption a warning is
      printed to stderr.
    * optimization: slightly faster gc on multithreaded builds
+  * bug fix: real-time signals are not used anymore, so no more
+    hanging when the system wide real-time signal queue gets full.
  
  changes in sbcl-1.0.25 relative to 1.0.24:
    * incompatible change: SB-INTROSPECT:FUNCTION-ARGLIST is deprecated, to be
diff --git a/doc/internals/signals.texinfo b/doc/internals/signals.texinfo

index dc2b3a6..33c0adf 100644 (file)
--- a/doc/internals/signals.texinfo
+++ b/doc/internals/signals.texinfo
@@ -73,20 +73,6 @@ is deferred by pseudo atomic and @code{WITHOUT-GCING}.
  @node Implementation warts
  @section Implementation warts
  
-@subsection RT signals
-
-Sending and receiving the same number of signals is crucial for
-@code{INTERRUPT-THREAD} and @code{sig_stop_for_gc}, hence they are
-real-time signals for which the kernel maintains a queue as opposed to
-just setting a flag for ``sigint pending''.
-
-Note, however, that the rt signal queue is finite and on current linux
-kernels a system wide resource. If the queue is full, SBCL tries to
-signal until it succeeds. This behaviour can lead to deadlocks, if a
-thread in a @code{WITHOUT-INTERRUPTS} is interrupted many times,
-filling up the queue and then a gc hits and tries to send
-@code{SIG_STOP_FOR_GC}.
-
  @subsection Miscellaneous issues
  
  Signal handlers should automatically restore errno and fp
diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp

index d4a90a5..a0cf2e0 100644 (file)
--- a/src/code/target-thread.lisp
+++ b/src/code/target-thread.lisp
@@ -892,12 +892,18 @@ return DEFAULT if given or else signal JOIN-THREAD-ERROR."
  (defun run-interruption ()
    (in-interruption ()
      (loop
-       (let ((interruption (with-interruptions-lock (*current-thread*)
-                             (pop (thread-interruptions *current-thread*)))))
-         (if interruption
-             (with-interrupts
-               (funcall interruption))
-             (return))))))
+     (let ((interruption (with-interruptions-lock (*current-thread*)
+                           (pop (thread-interruptions *current-thread*)))))
+       ;; Resignalling after popping one works fine, because from the
+       ;; OS's point of view we have returned from the signal handler
+       ;; (thanks to arrange_return_to_lisp_function) so at least one
+       ;; more signal will be delivered.
+       (when (thread-interruptions *current-thread*)
+         (signal-interrupt-thread (thread-os-thread *current-thread*)))
+       (if interruption
+           (with-interrupts
+             (funcall interruption))
+           (return))))))
  
  ;;; The order of interrupt execution is peculiar. If thread A
  ;;; interrupts thread B with I1, I2 and B for some reason receives I1
diff --git a/src/runtime/linux-os.h b/src/runtime/linux-os.h

index 02851a3..e930693 100644 (file)
--- a/src/runtime/linux-os.h
+++ b/src/runtime/linux-os.h
@@ -39,5 +39,5 @@ typedef int os_vm_prot_t;
  
  #define SIG_MEMORY_FAULT SIGSEGV
  
-#define SIG_INTERRUPT_THREAD (SIGRTMIN)
+#define SIG_INTERRUPT_THREAD (SIGPWR)
  #define SIG_STOP_FOR_GC (SIGUSR1)
diff --git a/src/runtime/sunos-os.h b/src/runtime/sunos-os.h

index b8560c7..dda6f4f 100644 (file)
--- a/src/runtime/sunos-os.h
+++ b/src/runtime/sunos-os.h
@@ -32,7 +32,7 @@ typedef int os_vm_prot_t;
  
  #define SIG_MEMORY_FAULT SIGSEGV
  
-#define SIG_INTERRUPT_THREAD (SIGRTMIN)
+#define SIG_INTERRUPT_THREAD (SIGPWR)
  #define SIG_STOP_FOR_GC (SIGUSR1)
  
  /* Yaargh?! */
diff --git a/src/runtime/thread.c b/src/runtime/thread.c

index 24daaae..600d168 100644 (file)
--- a/src/runtime/thread.c
+++ b/src/runtime/thread.c
@@ -560,31 +560,9 @@ os_thread_t create_thread(lispobj initial_function) {
      }
  }
  
-/* Send the signo to os_thread, retry if the rt signal queue is
- * full. */
-int
-kill_thread_safely(os_thread_t os_thread, int signo)
-{
-    int r;
-    /* The man page does not mention EAGAIN as a valid return value
-     * for either pthread_kill or kill. But that's theory, this is
-     * practice. By waiting here we assume that the delivery of this
-     * signal is not necessary for the delivery of the signals in the
-     * queue. In other words, we _assume_ there are no deadlocks. */
-    while ((r=pthread_kill(os_thread,signo))==EAGAIN) {
-        /* wait a bit then try again in the hope of the rt signal
-         * queue not being full */
-        FSHOW_SIGNAL((stderr,"/rt signal queue full\n"));
-        /* FIXME: some kind of backoff (random, exponential) would be
-         * nice. */
-        sleep(1);
-    }
-    return r;
-}
-
  int signal_interrupt_thread(os_thread_t os_thread)
  {
-    int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD);
+    int status = pthread_kill(os_thread, SIG_INTERRUPT_THREAD);
      FSHOW_SIGNAL((stderr,"/signal_interrupt_thread: %lu\n", os_thread));
      if (status == 0) {
          return 0;
diff --git a/src/runtime/thread.h b/src/runtime/thread.h

index f8ec4e7..c123077 100644 (file)
--- a/src/runtime/thread.h
+++ b/src/runtime/thread.h
@@ -235,6 +235,5 @@ static inline struct thread *arch_os_get_current_thread(void)
  #endif
  
  extern void create_initial_thread(lispobj);
-extern int kill_thread_safely(os_thread_t os_thread, int signo);
  
  #endif /* _INCLUDE_THREAD_H_ */
diff --git a/version.lisp-expr b/version.lisp-expr

index 0f7cc46..48cffab 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.25.29"
+"1.0.25.30"
author	Gabor Melis <mega@hotpop.com>
	Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)
committer	Gabor Melis <mega@hotpop.com>
	Mon, 16 Feb 2009 21:45:22 +0000 (21:45 +0000)
NEWS		patch \| blob \| history
doc/internals/signals.texinfo		patch \| blob \| history
src/code/target-thread.lisp		patch \| blob \| history
src/runtime/linux-os.h		patch \| blob \| history
src/runtime/sunos-os.h		patch \| blob \| history
src/runtime/thread.c		patch \| blob \| history
src/runtime/thread.h		patch \| blob \| history
version.lisp-expr		patch \| blob \| history