From 8a33054f6541596c61b091e2b77118deda1511e2 Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Thu, 3 May 2012 13:25:09 +0300 Subject: [PATCH] better timeout handling in EXIT and %EXIT-OTHER-THREADS Account the timeout against all the threads being joined, not each separately. Also move handling of "main thread exiting even though another thread got the call" handling to %EXIT. ...and one missing #!+sb-doc --- package-data-list.lisp-expr | 1 + src/code/cold-init.lisp | 11 +++++----- src/code/late-extensions.lisp | 23 +++++++++++++++++++++ src/code/target-thread.lisp | 46 ++++++++++++++++++++--------------------- src/code/toplevel.lisp | 19 ++++++++++------- 5 files changed, 64 insertions(+), 36 deletions(-) diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 13ede30..dde04ce 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1059,6 +1059,7 @@ possibly temporariliy, because it might be used internally." "!DEF-BOOLEAN-ATTRIBUTE" "WITH-REBOUND-IO-SYNTAX" "WITH-SANE-IO-SYNTAX" + "WITH-PROGRESSIVE-TIMEOUT" ;; ..and CONDITIONs.. "BUG" diff --git a/src/code/cold-init.lisp b/src/code/cold-init.lisp index 802b325..30f3eaa 100644 --- a/src/code/cold-init.lisp +++ b/src/code/cold-init.lisp @@ -282,7 +282,7 @@ (critically-unreachable "after trying to die in QUIT")) (declaim (ftype (sfunction (&key (:code (or null exit-code)) - (:timeout (or null real)) + (:timeout (or null real)) (:abort t)) nil) exit)) @@ -307,10 +307,11 @@ TIMEOUT controls waiting for other threads to terminate when ABORT is NIL. Once current thread has been unwound and *EXIT-HOOKS* have been run, spawning new threads is prevented and all other threads are terminated by calling TERMINATE-THREAD on them. The system then waits -for them to finish using JOIN-THREAD with the specified TIMEOUT. If a -thread does not finish in TIMEOUT seconds, it is left to its own -devices while the exit protocol continues. TIMEOUT defaults to -*EXIT-TIMEOUT*, which in turn defaults to 60. +for them to finish using JOIN-THREAD, waiting at most a total TIMEOUT +seconds for all threads to join. Those threads that do not finish +in time are simply ignored while the exit protocol continues. TIMEOUT +defaults to *EXIT-TIMEOUT*, which in turn defaults to 60. TIMEOUT NIL +means to wait indefinitely. Note that TIMEOUT applies only to JOIN-THREAD, not *EXIT-HOOKS*. Since TERMINATE-THREAD is asynchronous, getting multithreaded application diff --git a/src/code/late-extensions.lisp b/src/code/late-extensions.lisp index 59a2fd6..d18939b 100644 --- a/src/code/late-extensions.lisp +++ b/src/code/late-extensions.lisp @@ -331,6 +331,7 @@ See also the declarations SB-EXT:GLOBAL and SB-EXT:ALWAYS-BOUND." (go :restart))))))) (defmacro wait-for (test-form &key timeout) + #!+sb-doc "Wait until TEST-FORM evaluates to true, then return its primary value. If TIMEOUT is provided, waits at most approximately TIMEOUT seconds before returning NIL. @@ -342,3 +343,25 @@ deadline. Experimental: subject to change without prior notice." `(dx-flet ((wait-for-test () (progn ,test-form))) (%wait-for #'wait-for-test ,timeout))) + +(defmacro with-progressive-timeout ((name &key seconds) + &body body) + #!+sb-doc + "Binds NAME as a local function for BODY. Each time #'NAME is called, it +returns SECONDS minus the time that has elapsed since BODY was entered, or +zero if more time than SECONDS has elapsed. If SECONDS is NIL, #'NAME +returns NIL each time." + (with-unique-names (deadline time-left sec) + `(let* ((,sec ,seconds) + (,deadline + (when ,sec + (+ (get-internal-real-time) + (round (* ,seconds internal-time-units-per-second)))))) + (flet ((,name () + (when ,deadline + (let ((,time-left (- ,deadline (get-internal-real-time)))) + (if (plusp ,time-left) + (* (coerce ,time-left 'single-float) + ,(/ 1.0 internal-time-units-per-second)) + 0))))) + ,@body)))) diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index d5a3331..6d44ead 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -1186,12 +1186,7 @@ on this semaphore, then N of them is woken up." (defun handle-thread-exit (thread) (/show0 "HANDLING THREAD EXIT") (when *exit-in-process* - (if (consp *exit-in-process*) - ;; This means we're the main thread, but someone else - ;; requested the exit and exiting with the right code is the - ;; only thing left to do. - (os-exit (car *exit-in-process*) :abort nil) - (%exit))) + (%exit)) ;; Lisp-side cleanup (with-all-threads-lock (setf (thread-%alive-p thread) nil) @@ -1208,10 +1203,11 @@ on this semaphore, then N of them is woken up." (grab-mutex *make-thread-lock*) (let ((timeout sb!ext:*exit-timeout*) (code *exit-in-process*) + (current *current-thread*) (joinees nil) (main nil)) (dolist (thread (list-all-threads)) - (cond ((eq thread *current-thread*)) + (cond ((eq thread current)) ((main-thread-p thread) (setf main thread)) (t @@ -1220,23 +1216,25 @@ on this semaphore, then N of them is woken up." (terminate-thread thread) (push thread joinees)) (interrupt-thread-error ()))))) - (dolist (thread (nreverse joinees)) - (join-thread thread :default t :timeout timeout)) - ;; Need to defer till others have joined, because when main - ;; thread exits, we're gone. Can't use TERMINATE-THREAD -- would - ;; get the exit code wrong. - (when main - (handler-case - (interrupt-thread - main - (lambda () - (setf *exit-in-process* (list code)) - (throw 'sb!impl::%end-of-the-world t))) - (interrupt-thread-error ())) - ;; Normally this never finishes, as once the main-thread - ;; unwinds we exit with the right code, but if times out - ;; before that happens, we will exit after returning. - (join-thread main :default t :timeout timeout))))) + (with-progressive-timeout (time-left :seconds timeout) + (dolist (thread joinees) + (join-thread thread :default t :timeout (time-left))) + ;; Need to defer till others have joined, because when main + ;; thread exits, we're gone. Can't use TERMINATE-THREAD -- would + ;; get the exit code wrong. + (when main + (handler-case + (interrupt-thread + main + (lambda () + (setf *exit-in-process* (list code)) + (throw 'sb!impl::%end-of-the-world t))) + (interrupt-thread-error ())) + ;; Normally this never finishes, as once the main-thread unwinds we + ;; exit with the right code, but if times out before that happens, + ;; we will exit after returning -- or rathe racing the main thread + ;; to calling OS-EXIT. + (join-thread main :default t :timeout (time-left))))))) (defun terminate-session () #!+sb-doc diff --git a/src/code/toplevel.lisp b/src/code/toplevel.lisp index 9fa8189..714fc4a 100644 --- a/src/code/toplevel.lisp +++ b/src/code/toplevel.lisp @@ -107,13 +107,18 @@ means to wait indefinitely.") (defun %exit () ;; If anything goes wrong, we will exit immediately and forcibly. (handler-bind ((serious-condition *exit-error-handler*)) - (let (ok) - (unwind-protect - (progn - (flush-standard-output-streams) - (sb!thread::%exit-other-threads) - (setf ok t)) - (os-exit *exit-in-process* :abort (not ok)))))) + (let ((ok nil) + (code *exit-in-process*)) + (if (consp code) + ;; Another thread called EXIT, and passed the buck to us -- only + ;; final call left to do. + (os-exit (car code) :abort nil) + (unwind-protect + (progn + (flush-standard-output-streams) + (sb!thread::%exit-other-threads) + (setf ok t)) + (os-exit code :abort (not ok))))))) ;;;; working with *CURRENT-ERROR-DEPTH* and *MAXIMUM-ERROR-DEPTH* -- 1.7.10.4