(lp#819269)
* bug fix: &REST to &MORE conversion still works in unsafe call to known
functions; reported by Lutz Euler (lp#826459).
+ * bug fix: bogus deadlocks from interrupts and GCs. (lp#807475, regression
+ since 1.0.48)
changes in sbcl-1.0.50 relative to sbcl-1.0.49:
* enhancement: errors from FD handlers now provide a restart to remove
;; Now, if GET-MUTEX did not cons, that would be enough.
;; Because it does, we need the :IN-PROGRESS bit above to
;; tell the runtime not to trigger gcs.
- (let ((sb!impl::*in-without-gcing* t)
- (sb!impl::*deadline* nil)
- (sb!impl::*deadline-seconds* nil))
- (sb!thread:with-mutex (*already-in-gc*)
- (let ((*gc-inhibit* t))
- (let ((old-usage (dynamic-usage))
- (new-usage 0))
- (unsafe-clear-roots gen)
- (gc-stop-the-world)
- (let ((start-time (get-internal-run-time)))
- (collect-garbage gen)
- (setf *gc-epoch* (cons nil nil))
- (let ((run-time (- (get-internal-run-time) start-time)))
- ;; KLUDGE: Sometimes we see the second getrusage() call
- ;; return a smaller value than the first, which can
- ;; lead to *GC-RUN-TIME* to going negative, which in
- ;; turn is a type-error.
- (when (plusp run-time)
- (incf *gc-run-time* run-time))))
- (setf *gc-pending* nil
- new-usage (dynamic-usage))
- #!+sb-thread
- (assert (not *stop-for-gc-pending*))
- (gc-start-the-world)
- ;; In a multithreaded environment the other threads
- ;; will see *n-b-f-o-p* change a little late, but
- ;; that's OK.
- (let ((freed (- old-usage new-usage)))
- ;; GENCGC occasionally reports negative here, but
- ;; the current belief is that it is part of the
- ;; normal order of things and not a bug.
- (when (plusp freed)
- (incf *n-bytes-freed-or-purified* freed)))))))
+ (sb!thread::without-thread-waiting-for (:already-without-interrupts t)
+ (let* ((sb!impl::*in-without-gcing* t)
+ (sb!impl::*deadline* nil)
+ (sb!impl::*deadline-seconds* nil))
+ (sb!thread:with-mutex (*already-in-gc*)
+ (let ((*gc-inhibit* t))
+ (let ((old-usage (dynamic-usage))
+ (new-usage 0))
+ (unsafe-clear-roots gen)
+ (gc-stop-the-world)
+ (let ((start-time (get-internal-run-time)))
+ (collect-garbage gen)
+ (setf *gc-epoch* (cons nil nil))
+ (let ((run-time (- (get-internal-run-time) start-time)))
+ ;; KLUDGE: Sometimes we see the second getrusage() call
+ ;; return a smaller value than the first, which can
+ ;; lead to *GC-RUN-TIME* to going negative, which in
+ ;; turn is a type-error.
+ (when (plusp run-time)
+ (incf *gc-run-time* run-time))))
+ (setf *gc-pending* nil
+ new-usage (dynamic-usage))
+ #!+sb-thread
+ (assert (not *stop-for-gc-pending*))
+ (gc-start-the-world)
+ ;; In a multithreaded environment the other threads
+ ;; will see *n-b-f-o-p* change a little late, but
+ ;; that's OK.
+ (let ((freed (- old-usage new-usage)))
+ ;; GENCGC occasionally reports negative here, but
+ ;; the current belief is that it is part of the
+ ;; normal order of things and not a bug.
+ (when (plusp freed)
+ (incf *n-bytes-freed-or-purified* freed))))))))
;; While holding the mutex we were protected from
;; SIG_STOP_FOR_GC and recursive GCs. Now, in order to
;; preserve the invariant (*GC-PENDING* ->
;; finalizers and after-gc hooks.
(when (sb!thread:thread-alive-p sb!thread:*current-thread*)
(when *allow-with-interrupts*
- (with-interrupts
- (run-pending-finalizers)
- (call-hooks "after-GC" *after-gc-hooks* :on-error :warn)))))
+ (sb!thread::without-thread-waiting-for ()
+ (with-interrupts
+ (run-pending-finalizers)
+ (call-hooks "after-GC" *after-gc-hooks* :on-error :warn))))))
;;; This is the user-advertised garbage collection function.
(defun gc (&key (gen 0) (full nil) &allow-other-keys)
(let ((*unblock-deferrables-on-enabling-interrupts-p* t))
(with-interrupt-bindings
(let ((sb!debug:*stack-top-hint*
- (nth-value 1 (sb!kernel:find-interrupted-name-and-frame))))
- (allow-with-interrupts
- (nlx-protect (funcall function)
- ;; We've been running with deferrables
- ;; blocked in Lisp called by a C signal
- ;; handler. If we return normally the sigmask
- ;; in the interrupted context is restored.
- ;; However, if we do an nlx the operating
- ;; system will not restore it for us.
- (when *unblock-deferrables-on-enabling-interrupts-p*
- ;; This means that storms of interrupts
- ;; doing an nlx can still run out of stack.
- (unblock-deferrable-signals)))))))))
+ (nth-value 1 (sb!kernel:find-interrupted-name-and-frame))))
+ (sb!thread::without-thread-waiting-for (:already-without-interrupts t)
+ (allow-with-interrupts
+ (nlx-protect (funcall function)
+ ;; We've been running with deferrables
+ ;; blocked in Lisp called by a C signal
+ ;; handler. If we return normally the sigmask
+ ;; in the interrupted context is restored.
+ ;; However, if we do an nlx the operating
+ ;; system will not restore it for us.
+ (when *unblock-deferrables-on-enabling-interrupts-p*
+ ;; This means that storms of interrupts
+ ;; doing an nlx can still run out of stack.
+ (unblock-deferrable-signals))))))))))
(defmacro in-interruption ((&key) &body body)
#!+sb-doc
(defmacro with-deadlocks ((thread lock &optional timeout) &body forms)
(declare (ignorable timeout))
- (with-unique-names (prev n-thread n-lock n-timeout new)
+ (with-unique-names (n-thread n-lock n-timeout new)
`(let* ((,n-thread ,thread)
(,n-lock ,lock)
(,n-timeout #!-sb-lutex
`(or ,timeout
(when sb!impl::*deadline*
sb!impl::*deadline-seconds*))))
- ;; If we get interrupted while waiting for a lock, etc.
- (,prev (thread-waiting-for ,n-thread))
(,new (if ,n-timeout
(cons ,n-timeout ,n-lock)
,n-lock)))
(progn
(setf (thread-waiting-for ,n-thread) ,new)
,@forms)
- (setf (thread-waiting-for ,n-thread) ,prev)))))
+ ;; Interrupt handlers and GC save and restore any
+ ;; previous wait marks using WITHOUT-DEADLOCKS below.
+ (setf (thread-waiting-for ,n-thread) nil)))))
(declaim (inline get-spinlock release-spinlock))
(name nil :type (or null thread-name))
(value nil))
+(sb!xc:defmacro without-thread-waiting-for ((&key already-without-interrupts) &body body)
+ (with-unique-names (thread prev)
+ (let ((without (if already-without-interrupts
+ 'progn
+ 'without-interrupts))
+ (with (if already-without-interrupts
+ 'progn
+ 'with-local-interrupts)))
+ `(let* ((,thread *current-thread*)
+ (,prev (thread-waiting-for ,thread)))
+ (flet ((exec () ,@body))
+ (if ,prev
+ (,without
+ (unwind-protect
+ (progn
+ (setf (thread-waiting-for ,thread) nil)
+ (,with (exec)))
+ (setf (thread-waiting-for ,thread) ,prev)))
+ (exec)))))))
+
(sb!xc:defmacro with-mutex ((mutex &key (value '*current-thread*) (wait-p t))
&body body)
#!+sb-doc
(format t "~%joined ~S~%" (sb-thread:thread-name th)))
(list d1 d2 d3 i))))
(format t "parallel defclass test done~%")
+
+(with-test (:name (:deadlock-detection :interrupts))
+ (let* ((m1 (sb-thread:make-mutex :name "M1"))
+ (m2 (sb-thread:make-mutex :name "M2"))
+ (t1 (sb-thread:make-thread
+ (lambda ()
+ (sb-thread:with-mutex (m1)
+ (sleep 0.3)
+ :ok))
+ :name "T1"))
+ (t2 (sb-thread:make-thread
+ (lambda ()
+ (sleep 0.1)
+ (sb-thread:with-mutex (m1 :wait-p t)
+ (sleep 0.2)
+ :ok))
+ :name "T2")))
+ (sleep 0.2)
+ (sb-thread:interrupt-thread t2 (lambda ()
+ (sb-thread:with-mutex (m2 :wait-p t)
+ (sleep 0.3))))
+ (sleep 0.05)
+ (sb-thread:interrupt-thread t1 (lambda ()
+ (sb-thread:with-mutex (m2 :wait-p t)
+ (sleep 0.3))))
+ ;; both threads should finish without a deadlock or deadlock
+ ;; detection error
+ (let ((res (list (sb-thread:join-thread t1)
+ (sb-thread:join-thread t2))))
+ (assert (equal '(:ok :ok) res)))))
+
+(with-test (:name (:deadlock-detection :gc))
+ ;; To semi-reliably trigger the error (in SBCL's where)
+ ;; it was present you had to run this for > 30 seconds,
+ ;; but that's a bit long for a single test.
+ (let* ((stop (+ 5 (get-universal-time)))
+ (m1 (sb-thread:make-mutex :name "m1"))
+ (t1 (sb-thread:make-thread
+ (lambda ()
+ (loop until (> (get-universal-time) stop)
+ do (sb-thread:with-mutex (m1)
+ (eval `(make-array 24))))
+ :ok)))
+ (t2 (sb-thread:make-thread
+ (lambda ()
+ (loop until (> (get-universal-time) stop)
+ do (sb-thread:with-mutex (m1)
+ (eval `(make-array 24))))
+ :ok))))
+ (let ((res (list (sb-thread:join-thread t1)
+ (sb-thread:join-thread t2))))
+ (assert (equal '(:ok :ok) res)))))