From 1d238a6b36387151202940a95b7cec7ad7d14e9b Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Mon, 28 Nov 2011 19:45:16 +0200 Subject: [PATCH] more robust deadlock detection Lock around building the deadlock chain using WITH-CAS-LOCK after the tentative deadlock has been detected, and break the deadlock chain before signaling the error. This means that a single deadlock is reported only in a single thread. Fixes occasional failures of deadlock-detection.1 due to a bogus vicious metacircle. (Two threads detecting the same deadlock, then racing to report the error detected as another deadlock, the reporting of which in turn looked like a metacircle to CLOS if PRINT-OBJECT didn't yet have the right method in cache.) --- NEWS | 2 ++ src/code/target-thread.lisp | 36 +++++++++++++++++++++++++++++------- tests/threads.pure.lisp | 5 +---- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 60733ce..122a0ea 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ changes relative to sbcl-1.0.54: ** --arch option can be used to specify the architecture to build for. (Mainly useful for building 32-bit SBCL's on x86-64 hosts, not full-blows cross-compilation.) + * bug fix: deadlock detection could report the same deadlock twice, for + two different threads. Now a single deadlock is reported exactly once. changes in sbcl-1.0.54 relative to sbcl-1.0.53: * minor incompatible changes: diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 7ec3309..4059d14 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -362,6 +362,8 @@ HOLDING-MUTEX-P." ;; Make sure to get the current value. (sb!ext:compare-and-swap (mutex-%owner mutex) nil nil)) +(sb!ext:defglobal **deadlock-lock** nil) + ;;; Signals an error if owner of LOCK is waiting on a lock whose release ;;; depends on the current thread. Does not detect deadlocks from sempahores. (defun check-deadlock () @@ -373,16 +375,36 @@ HOLDING-MUTEX-P." (let ((other-thread (mutex-%owner lock))) (cond ((not other-thread)) ((eq self other-thread) - (let* ((chain (deadlock-chain self origin)) + (let* ((chain + (with-cas-lock ((symbol-value '**deadlock-lock**)) + (prog1 (deadlock-chain self origin) + ;; We're now committed to signaling the + ;; error and breaking the deadlock, so + ;; mark us as no longer waiting on the + ;; lock. This ensures that a single + ;; deadlock is reported in only one + ;; thread, and that we don't look like + ;; we're waiting on the lock when print + ;; stuff -- because that may lead to + ;; further deadlock checking, in turn + ;; possibly leading to a bogus vicious + ;; metacycle on PRINT-OBJECT. + (setf (thread-waiting-for self) nil)))) (barf - (format nil - "~%WARNING: DEADLOCK CYCLE DETECTED:~%~@< ~@;~ - ~{~:@_~S~:@_~}~:@>~ - ~%END OF CYCLE~%" - (mapcar #'car chain)))) + (with-output-to-string (s) + (funcall (formatter + "~%WARNING: DEADLOCK CYCLE DETECTED:~%~@< ~@;~ + ~{~:@_~S~:@_~}~:@>~ + ~%END OF CYCLE~%") + s + (mapcar #'car chain))))) ;; Barf to stderr in case the system is too tied up - ;; to report the error properly -- to avoid cross-talk + ;; to report the error properly -- and to avoid cross-talk ;; build the whole string up first. + ;; + ;; ...would be even better if we had + ;; sensible locks on streams, but what can + ;; you do... (write-string barf sb!sys:*stderr*) (finish-output sb!sys:*stderr*) (error 'thread-deadlock diff --git a/tests/threads.pure.lisp b/tests/threads.pure.lisp index adcf13a..e58cda7 100644 --- a/tests/threads.pure.lisp +++ b/tests/threads.pure.lisp @@ -307,13 +307,10 @@ (t1 (sb-thread:make-thread (test m1 m2 s1 s2) :name "T1")) (t2 (sb-thread:make-thread (test m2 m1 s2 s1) :name "T2"))) ;; One will deadlock, and the other will then complete normally. - ;; ...except sometimes, when we get unlucky, and both will do - ;; the deadlock detection in parallel and both signal. (let ((res (list (sb-thread:join-thread t1) (sb-thread:join-thread t2)))) (assert (or (equal '(:deadlock :ok) res) - (equal '(:ok :deadlock) res) - (equal '(:deadlock :deadlock) res)))))))) + (equal '(:ok :deadlock) res)))))))) (with-test (:name deadlock-detection.2 :skipped-on '(not :sb-thread)) (let* ((m1 (sb-thread:make-mutex :name "M1")) -- 1.7.10.4