;;;; -*- coding: utf-8; -*-
+changes in sbcl-0.9.18 (1.0.beta?) relative to sbcl-0.9.16:
+ * bug fix: two potential GC deadlocks affecting threaded builds.
+
changes in sbcl-0.9.17 (0.9.99?) relative to sbcl-0.9.16:
* feature: weak hash tables, see MAKE-HASH-TABLE documentation
* incompatible change: External-format support for FFI calls. The
;;;; GC hooks
(defvar *after-gc-hooks* nil
- "Called after each garbage collection. In a multithreaded
-environment these hooks may run in any thread.")
+ "Called after each garbage collection, except for garbage collections
+triggered during thread exits. In a multithreaded environment these hooks may
+run in any thread.")
\f
;;;; internal GC
(sb!thread:make-mutex :name "GC lock") "ID of thread running SUB-GC")
(defun sub-gc (&key (gen 0))
- (unless (eq sb!thread:*current-thread*
+ (unless (eq sb!thread:*current-thread*
(sb!thread::mutex-value *already-in-gc*))
;; With gencgc, unless *GC-PENDING* every allocation in this
;; function triggers another gc, potentially exceeding maximum
;;
;; Can that be avoided by having the finalizers and hooks run only
;; from the outermost SUB-GC?
- (run-pending-finalizers)
- (dolist (hook *after-gc-hooks*)
- (handler-case
- (funcall hook)
- (error (c)
- (warn "Error calling after GC hook ~S:~% ~S" hook c)))))))
+ ;;
+ ;; KLUDGE: Don't run the hooks in GC's triggered by dying threads,
+ ;; so that user-code never runs with
+ ;; (thread-alive-p *current-thread*) => nil
+ ;; The long-term solution will be to keep a separate thread for
+ ;; finalizers and after-gc hooks.
+ (when (sb!thread:thread-alive-p sb!thread:*current-thread*)
+ (run-pending-finalizers)
+ (dolist (hook *after-gc-hooks*)
+ (handler-case
+ (funcall hook)
+ (error (c)
+ (warn "Error calling after-GC hook ~S:~% ~A" hook c))))))))
;;; This is the user-advertised garbage collection function.
(defun gc (&key (gen 0) (full nil) &allow-other-keys)
(defvar *all-threads* ())
(defvar *all-threads-lock* (make-mutex :name "all threads lock"))
+(defmacro with-all-threads-lock (&body body)
+ #!-sb-thread
+ `(locally ,@body)
+ #!+sb-thread
+ `(without-interrupts
+ (with-mutex (*all-threads-lock*)
+ ,@body)))
+
(defun list-all-threads ()
#!+sb-doc
"Return a list of the live threads."
- (with-mutex (*all-threads-lock*)
+ (with-all-threads-lock
(copy-list *all-threads*)))
(declaim (inline current-thread-sap))
(define-alien-routine "signal_interrupt_thread"
integer (os-thread unsigned-long))
- (define-alien-routine "block_blockable_signals"
+ (define-alien-routine "block_deferrable_signals"
void)
#!+sb-lutex
;;; Remove thread from its session, if it has one.
#!+sb-thread
(defun handle-thread-exit (thread)
- (with-mutex (*all-threads-lock*)
- (/show0 "HANDLING THREAD EXIT")
- #!+sb-lutex
- (when (thread-interruptions-lock thread)
- (/show0 "FREEING MUTEX LUTEX")
- (with-lutex-address (lutex (mutex-lutex (thread-interruptions-lock thread)))
- (%lutex-destroy lutex)))
- (setq *all-threads* (delete thread *all-threads*)))
- (when *session*
- (%delete-thread-from-session thread *session*)))
+ (/show0 "HANDLING THREAD EXIT")
+ ;; We're going down, can't handle interrupts sanely anymore.
+ ;; GC remains enabled.
+ (block-deferrable-signals)
+ ;; Lisp-side cleanup
+ (with-all-threads-lock
+ (setf (thread-%alive-p thread) nil)
+ (setf (thread-os-thread thread) nil)
+ (setq *all-threads* (delete thread *all-threads*))
+ (when *session*
+ (%delete-thread-from-session thread *session*)))
+ #!+sb-lutex
+ (when (thread-interruptions-lock thread)
+ (/show0 "FREEING MUTEX LUTEX")
+ (with-lutex-address (lutex (mutex-lutex (thread-interruptions-lock thread)))
+ (%lutex-destroy lutex))))
(defun terminate-session ()
#!+sb-doc
(sb!impl::*internal-symbol-output-fun* nil)
(sb!impl::*descriptor-handlers* nil)) ; serve-event
(setf (thread-os-thread thread) (current-thread-sap-id))
- (with-mutex (*all-threads-lock*)
+ (with-all-threads-lock
(push thread *all-threads*))
(with-session-lock (*session*)
(push thread (session-threads *session*)))
;; threads, it's time to enable signals
(sb!unix::reset-signal-mask)
(funcall real-function))
- ;; we're going down, can't handle
- ;; interrupts sanely anymore
- (let ((sb!impl::*gc-inhibit* t))
- (block-blockable-signals)
- (setf (thread-%alive-p thread) nil)
- (setf (thread-os-thread thread) nil)
- ;; and remove what can be the last
- ;; reference to this thread
- (handle-thread-exit thread)))))))
+ (handle-thread-exit thread))))))
(values))))
;; Keep INITIAL-FUNCTION pinned until the child thread is
;; initialized properly.
#endif
}
+void
+block_deferrable_signals(void)
+{
+#ifndef LISP_FEATURE_WIN32
+ thread_sigmask(SIG_BLOCK, &deferrable_sigset, 0);
+#endif
+}
+
\f
/*
* utility routines used by various signal handlers
{
lispobj function;
int result, lock_ret;
+
FSHOW((stderr,"/creating thread %lu\n", thread_self()));
function = th->no_tls_value_marker;
th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
gc_assert(lock_ret == 0);
result = funcall0(function);
+
+ /* Block GC */
+ block_blockable_signals();
th->state=STATE_DEAD;
/* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
(wait-for-threads threads)))
(format t "backtrace test done~%")
+
+(format t "~&starting gc deadlock test: WARNING: THIS TEST WILL HANG ON FAILURE!~%")
+
+(with-test (:name (:gc-deadlock))
+ ;; Prior to 0.9.16.46 thread exit potentially deadlocked the
+ ;; GC due to *all-threads-lock* and session lock. On earlier
+ ;; versions and at least on one specific box this test is good enough
+ ;; to catch that typically well before the 1500th iteration.
+ (loop
+ with i = 0
+ with n = 3000
+ while (< i n)
+ do
+ (incf i)
+ (when (zerop (mod i 100))
+ (write-char #\.)
+ (force-output))
+ (handler-case
+ (if (oddp i)
+ (sb-thread:make-thread
+ (lambda ()
+ (sleep (random 0.001)))
+ :name (list :sleep i))
+ (sb-thread:make-thread
+ (lambda ()
+ ;; KLUDGE: what we are doing here is explicit,
+ ;; but the same can happen because of a regular
+ ;; MAKE-THREAD or LIST-ALL-THREADS, and various
+ ;; session functions.
+ (sb-thread:with-mutex (sb-thread::*all-threads-lock*)
+ (sb-thread::with-session-lock (sb-thread::*session*)
+ (sb-ext:gc))))
+ :name (list :gc i)))
+ (error (e)
+ (format t "~%error creating thread ~D: ~A -- backing off for retry~%" i e)
+ (sleep 0.1)
+ (incf i)))))
+
+(format t "~&gc deadlock test done~%")
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.17.1"
+"0.9.17.2"