From 2d266da0f5a288eaf571fc7c03621cda71aafb3f Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Tue, 21 Sep 2010 13:10:37 +0000 Subject: [PATCH] 1.0.42.50: workaround a Darwin nanosleep() bug Fixes lp#640516. It turns out that on Darwin, if a nanosleep() call is interrupted, and the signal handler takes longer than the requested sleep time was, then the call will return with EINTR and (unsigned)-1 in the remaining seconds. Since we call nanosleep() again when it returns with EINTR with the remaining time, this would cause us to sleep ~136 years... So, check that the remainder is not increasing before calling nanosleep() again. Many, many thanks to Joe Lobraco who reported and diagnosed the issue. --- NEWS | 2 ++ src/code/unix.lisp | 27 +++++++++++++++++++++------ tests/signals.impure.lisp | 16 ++++++++++++++++ version.lisp-expr | 2 +- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index feef931..bc45c8a 100644 --- a/NEWS +++ b/NEWS @@ -59,6 +59,8 @@ changes relative to sbcl-1.0.42 (lp#316068) * bug fix: (SETF DOCUMENTATION) of a macro works properly. (lp#643958, thanks to Stas Boukarev) + * bug fix: interrupt taking longer than the requested period caused SLEEP + to hang on Darwin. (lp#640516, thanks to Joe Lebroco for the analysis) changes in sbcl-1.0.42 relative to sbcl-1.0.41 * build changes diff --git a/src/code/unix.lisp b/src/code/unix.lisp index 37d715f..47eba3d 100644 --- a/src/code/unix.lisp +++ b/src/code/unix.lisp @@ -916,12 +916,27 @@ corresponds to NAME, or NIL if there is none." (rem (struct timespec))) (setf (slot req 'tv-sec) secs) (setf (slot req 'tv-nsec) nsecs) - (loop while (eql sb!unix:eintr - (nth-value 1 - (int-syscall ("nanosleep" (* (struct timespec)) - (* (struct timespec))) - (addr req) (addr rem)))) - do (rotatef req rem)))) + (loop while (and (eql sb!unix:eintr + (nth-value 1 + (int-syscall ("nanosleep" (* (struct timespec)) + (* (struct timespec))) + (addr req) (addr rem)))) + ;; KLUDGE: On Darwin, if an interrupt cases nanosleep to + ;; take longer than the requested time, the call will + ;; return with EINT and (unsigned)-1 seconds in the + ;; remainder timespec, which would cause us to enter + ;; nanosleep again for ~136 years. So, we check that the + ;; remainder time is actually decreasing. Since the cost + ;; of this check is neglible, do it on all platforms. + ;; http://osdir.com/ml/darwin-kernel/2010-03/msg00007.html + (let ((rem-sec (slot rem 'tv-sec)) + (rem-nsec (slot rem 'tv-nsec))) + (when (or (> secs rem-sec) + (and (= secs rem-sec) (>= nsecs rem-nsec))) + (setf secs rem-sec + nsecs rem-nsec) + t))) + do (rotatef req rem)))) (defun unix-get-seconds-west (secs) (multiple-value-bind (ignore seconds dst) (get-timezone secs) diff --git a/tests/signals.impure.lisp b/tests/signals.impure.lisp index 7be8bac..093bf23 100644 --- a/tests/signals.impure.lisp +++ b/tests/signals.impure.lisp @@ -65,3 +65,19 @@ sb-unix:sigint) (sb-sys:interactive-interrupt () :condition))))) + +(with-test (:name :bug-640516) + ;; On Darwin interrupting a SLEEP so that it took longer than + ;; the requested amount caused it to hang. + (assert + (handler-case + (sb-ext:with-timeout 10 + (let (to) + (handler-bind ((sb-ext:timeout (lambda (c) + (unless to + (setf to t) + (sleep 2) + (continue c))))) + (sb-ext:with-timeout 0.1 (sleep 1) t)))) + (sb-ext:timeout () + nil)))) diff --git a/version.lisp-expr b/version.lisp-expr index 5ec2273..7c64c3e 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.42.49" +"1.0.42.50" -- 1.7.10.4