X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-thread.lisp;h=080c17323b7f8a16df28af8eee3a7a01d84811a6;hb=3f3033a6c0ddf0af8dd1b5a17c2a4b82ea59b94f;hp=553dc675e8804803229542be4ba385f2a2736500;hpb=4255b37e50876702d2563f3418a44a3f5bf8a2e8;p=sbcl.git diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 553dc67..080c173 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -35,32 +35,34 @@ WITH-CAS-LOCK can be entered recursively." (%with-cas-lock (,place) ,@body))) (defmacro %with-cas-lock ((place) &body body &environment env) - (with-unique-names (self owner) - ;; Take care not to multiply-evaluate anything. - ;; - ;; FIXME: Once we get DEFCAS this can use GET-CAS-EXPANSION. - (let* ((placex (sb!xc:macroexpand place env)) - (place-op (if (consp placex) - (car placex) - (error "~S: ~S is not a valid place for ~S" - 'with-cas-lock - place 'sb!ext:compare-and-swap))) - (place-args (cdr placex)) - (temps (make-gensym-list (length place-args) t)) - (place `(,place-op ,@temps))) - `(let* (,@(mapcar #'list temps place-args) + (with-unique-names (owner self) + (multiple-value-bind (vars vals old new cas-form read-form) + (sb!ext:get-cas-expansion place env) + `(let* (,@(mapcar #'list vars vals) + (,owner (progn + (barrier (:read)) + ,read-form)) (,self *current-thread*) - (,owner ,place)) + (,old nil) + (,new ,self)) (unwind-protect (progn (unless (eq ,owner ,self) - (loop while (setf ,owner - (or ,place - (sb!ext:compare-and-swap ,place nil ,self))) + (loop until (loop repeat 100 + when (and (progn + (barrier (:read)) + (not ,read-form)) + (not (setf ,owner ,cas-form))) + return t + else + do (sb!ext:spin-loop-hint)) do (thread-yield))) ,@body) (unless (eq ,owner ,self) - (sb!ext:compare-and-swap ,place ,self nil))))))) + (let ((,old ,self) + (,new nil)) + (unless (eq ,old ,cas-form) + (bug "Failed to release CAS lock!"))))))))) ;;; Conditions @@ -76,10 +78,16 @@ read by the function THREAD-ERROR-THREAD.")) ((cycle :initarg :cycle :reader thread-deadlock-cycle)) (:report (lambda (condition stream) - (let ((*print-circle* t)) - (format stream "Deadlock cycle detected:~%~@< ~@;~ - ~{~:@_~S~:@_~}~:@>" - (mapcar #'car (thread-deadlock-cycle condition))))))) + (let* ((*print-circle* t) + (cycle (thread-deadlock-cycle condition)) + (start (caar cycle))) + (format stream "Deadlock cycle detected:~%") + (loop for part = (pop cycle) + while part + do (format stream " ~S~% waited for:~% ~S~% owned by:~%" + (car part) + (cdr part))) + (format stream " ~S~%" start))))) #!+sb-doc (setf @@ -168,7 +176,9 @@ arbitrary printable objects, and need not be unique.") (multiple-value-list (join-thread thread :default cookie)))) (state (if (eq :running info) - (let* ((thing (thread-waiting-for thread))) + (let* ((thing (progn + (barrier (:read)) + (thread-waiting-for thread)))) (typecase thing (cons (list "waiting on:" (cdr thing) @@ -323,10 +333,12 @@ created and old ones may exit at any time." (unwind-protect (progn (setf (thread-waiting-for ,n-thread) ,new) + (barrier (:write)) ,@forms) ;; Interrupt handlers and GC save and restore any ;; previous wait marks using WITHOUT-DEADLOCKS below. - (setf (thread-waiting-for ,n-thread) nil))))) + (setf (thread-waiting-for ,n-thread) nil) + (barrier (:write)))))) ;;;; Mutexes @@ -356,32 +368,41 @@ HOLDING-MUTEX-P." ;; Make sure to get the current value. (sb!ext:compare-and-swap (mutex-%owner mutex) nil nil)) +(sb!ext:defglobal **deadlock-lock** nil) + ;;; Signals an error if owner of LOCK is waiting on a lock whose release ;;; depends on the current thread. Does not detect deadlocks from sempahores. (defun check-deadlock () (let* ((self *current-thread*) - (origin (thread-waiting-for self))) + (origin (progn + (barrier (:read)) + (thread-waiting-for self)))) (labels ((detect-deadlock (lock) (let ((other-thread (mutex-%owner lock))) (cond ((not other-thread)) ((eq self other-thread) - (let* ((chain (deadlock-chain self origin)) - (barf - (format nil - "~%WARNING: DEADLOCK CYCLE DETECTED:~%~@< ~@;~ - ~{~:@_~S~:@_~}~:@>~ - ~%END OF CYCLE~%" - (mapcar #'car chain)))) - ;; Barf to stderr in case the system is too tied up - ;; to report the error properly -- to avoid cross-talk - ;; build the whole string up first. - (write-string barf sb!sys:*stderr*) - (finish-output sb!sys:*stderr*) + (let ((chain + (with-cas-lock ((symbol-value '**deadlock-lock**)) + (prog1 (deadlock-chain self origin) + ;; We're now committed to signaling the + ;; error and breaking the deadlock, so + ;; mark us as no longer waiting on the + ;; lock. This ensures that a single + ;; deadlock is reported in only one + ;; thread, and that we don't look like + ;; we're waiting on the lock when print + ;; stuff -- because that may lead to + ;; further deadlock checking, in turn + ;; possibly leading to a bogus vicious + ;; metacycle on PRINT-OBJECT. + (setf (thread-waiting-for self) nil))))) (error 'thread-deadlock :thread *current-thread* :cycle chain))) (t - (let ((other-lock (thread-waiting-for other-thread))) + (let ((other-lock (progn + (barrier (:read)) + (thread-waiting-for other-thread)))) ;; If the thread is waiting with a timeout OTHER-LOCK ;; is a cons, and we don't consider it a deadlock -- since ;; it will time out on its own sooner or later. @@ -390,6 +411,7 @@ HOLDING-MUTEX-P." (deadlock-chain (thread lock) (let* ((other-thread (mutex-owner lock)) (other-lock (when other-thread + (barrier (:read)) (thread-waiting-for other-thread)))) (cond ((not other-thread) ;; The deadlock is gone -- maybe someone unwound @@ -406,7 +428,7 @@ HOLDING-MUTEX-P." (list (list thread lock))) (t (if other-lock - (cons (list thread lock) + (cons (cons thread lock) (deadlock-chain other-thread other-lock)) ;; Again, the deadlock is gone? (return-from check-deadlock nil))))))) @@ -424,10 +446,11 @@ HOLDING-MUTEX-P." #!-sb-thread (when old (error "Strange deadlock on ~S in an unithreaded build?" mutex)) - #!-sb-futex - (and (not (mutex-%owner mutex)) + #!-(and sb-thread sb-futex) + (and (not old) + ;; Don't even bother to try to CAS if it looks bad. (not (sb!ext:compare-and-swap (mutex-%owner mutex) nil new-owner))) - #!+sb-futex + #!+(and sb-thread sb-futex) ;; From the Mutex 2 algorithm from "Futexes are Tricky" by Ulrich Drepper. (when (eql +lock-free+ (sb!ext:compare-and-swap (mutex-state mutex) +lock-free+ @@ -444,11 +467,16 @@ HOLDING-MUTEX-P." (declare (ignore to-sec to-usec)) #!-sb-futex (flet ((cas () - (loop repeat 24 - when (and (not (mutex-%owner mutex)) + (loop repeat 100 + when (and (progn + (barrier (:read)) + (not (mutex-%owner mutex))) (not (sb!ext:compare-and-swap (mutex-%owner mutex) nil new-owner))) - do (return-from cas t)) + do (return-from cas t) + else + do + (sb!ext:spin-loop-hint)) ;; Check for pending interrupts. (with-interrupts nil))) (declare (dynamic-extent #'cas)) @@ -589,7 +617,7 @@ IF-NOT-OWNER is :FORCE)." ;; FIXME: Is a :memory barrier too strong here? Can we use a :write ;; barrier instead? (barrier (:memory))) - #!+sb-futex + #!+(and sb-thread sb-futex) (when old-owner ;; FIXME: once ATOMIC-INCF supports struct slots with word sized ;; unsigned-byte type this can be used: @@ -616,7 +644,7 @@ IF-NOT-OWNER is :FORCE)." #!+sb-doc "Waitqueue type." (name nil :type (or null thread-name)) - #!+sb-futex + #!+(and sb-thread sb-futex) (token nil)) #!+(and sb-thread (not sb-futex)) @@ -649,17 +677,18 @@ IF-NOT-OWNER is :FORCE)." (setf (thread-waiting-for thread) nil) (let ((head (waitqueue-%head queue))) (do ((list head (cdr list)) - (prev nil)) - ((eq (car list) thread) - (let ((rest (cdr list))) - (cond (prev - (setf (cdr prev) rest)) - (t - (setf (waitqueue-%head queue) rest - prev rest))) - (unless rest - (setf (waitqueue-%tail queue) prev)))) - (setf prev list))) + (prev nil list)) + ((or (null list) + (eq (car list) thread)) + (when list + (let ((rest (cdr list))) + (cond (prev + (setf (cdr prev) rest)) + (t + (setf (waitqueue-%head queue) rest + prev rest))) + (unless rest + (setf (waitqueue-%tail queue) prev))))))) nil) (defun %waitqueue-wakeup (queue n) (declare (fixnum n)) @@ -673,7 +702,8 @@ IF-NOT-OWNER is :FORCE)." (setf (waitqueue-%head queue) (cdr head))) (car head))) while next - do (when (eq queue (sb!ext:compare-and-swap (thread-waiting-for next) queue nil)) + do (when (eq queue (sb!ext:compare-and-swap + (thread-waiting-for next) queue nil)) (decf n))) nil)) @@ -737,7 +767,7 @@ around the call, checking the the associated data: (declare (ignore queue)) (assert mutex) #!-sb-thread - (wait-for nil :timeout timeout) ; Yeah... + (sb!ext:wait-for nil :timeout timeout) ; Yeah... #!+sb-thread (let ((me *current-thread*)) (barrier (:read)) @@ -752,11 +782,13 @@ around the call, checking the the associated data: (progn #!-sb-futex (progn - (%waitqueue-enqueue me queue) + (%with-cas-lock ((waitqueue-%owner queue)) + (%waitqueue-enqueue me queue)) (release-mutex mutex) (setf status (or (flet ((wakeup () - (when (neq queue (thread-waiting-for me)) + (barrier (:read)) + (unless (eq queue (thread-waiting-for me)) :ok))) (declare (dynamic-extent #'wakeup)) (allow-with-interrupts @@ -888,8 +920,38 @@ future." (setf (fdocumentation 'semaphore-name 'function) "The name of the semaphore INSTANCE. Setfable.") +(defstruct (semaphore-notification (:constructor make-semaphore-notification ()) + (:copier nil)) + #!+sb-doc + "Semaphore notification object. Can be passed to WAIT-ON-SEMAPHORE and +TRY-SEMAPHORE as the :NOTIFICATION argument. Consequences are undefined if +multiple threads are using the same notification object in parallel." + (%status nil :type boolean)) + +(setf (fdocumentation 'make-semaphore-notification 'function) + "Constructor for SEMAPHORE-NOTIFICATION objects. SEMAPHORE-NOTIFICATION-STATUS +is initially NIL.") + +(declaim (inline semaphore-notification-status)) +(defun semaphore-notification-status (semaphore-notification) + #!+sb-doc + "Returns T if a WAIT-ON-SEMAPHORE or TRY-SEMAPHORE using +SEMAPHORE-NOTICATION has succeeded since the notification object was created +or cleared." + (barrier (:read)) + (semaphore-notification-%status semaphore-notification)) + +(declaim (inline clear-semaphore-notification)) +(defun clear-semaphore-notification (semaphore-notification) + #!+sb-doc + "Resets the SEMAPHORE-NOTIFICATION object for use with another call to +WAIT-ON-SEMAPHORE or TRY-SEMAPHORE." + (barrier (:write) + (setf (semaphore-notification-%status semaphore-notification) nil))) + (declaim (inline semaphore-count)) (defun semaphore-count (instance) + #!+sb-doc "Returns the current count of the semaphore INSTANCE." (barrier (:read)) (semaphore-%count instance)) @@ -899,14 +961,23 @@ future." "Create a semaphore with the supplied COUNT and NAME." (%make-semaphore name count)) -(defun wait-on-semaphore (semaphore &key timeout) +(defun wait-on-semaphore (semaphore &key timeout notification) #!+sb-doc "Decrement the count of SEMAPHORE if the count would not be negative. Else blocks until the semaphore can be decremented. Returns T on success. If TIMEOUT is given, it is the maximum number of seconds to wait. If the count cannot be decremented in that time, returns NIL without decrementing the -count." +count. + +If NOTIFICATION is given, it must be a SEMAPHORE-NOTIFICATION object whose +SEMAPHORE-NOTIFICATION-STATUS is NIL. If WAIT-ON-SEMAPHORE succeeds and +decrements the count, the status is set to T." + (when (and notification (semaphore-notification-status notification)) + (with-simple-restart (continue "Clear notification status and continue.") + (error "~@" + 'wait-on-semaphore semaphore)) + (clear-semaphore-notification notification)) ;; A more direct implementation based directly on futexes should be ;; possible. ;; @@ -918,36 +989,55 @@ count." (with-system-mutex ((semaphore-mutex semaphore) :allow-with-interrupts t) ;; Quick check: is it positive? If not, enter the wait loop. (let ((count (semaphore-%count semaphore))) - (if (plusp count) - (setf (semaphore-%count semaphore) (1- count)) - (unwind-protect - (progn - ;; Need to use ATOMIC-INCF despite the lock, because on our - ;; way out from here we might not be locked anymore -- so - ;; another thread might be tweaking this in parallel using - ;; ATOMIC-DECF. No danger over overflow, since there it - ;; at most one increment per thread waiting on the semaphore. - (sb!ext:atomic-incf (semaphore-waitcount semaphore)) - (loop until (plusp (setf count (semaphore-%count semaphore))) - do (or (condition-wait (semaphore-queue semaphore) - (semaphore-mutex semaphore) - :timeout timeout) - (return-from wait-on-semaphore nil))) - (setf (semaphore-%count semaphore) (1- count))) - ;; Need to use ATOMIC-DECF instead of DECF, as CONDITION-WAIT - ;; may unwind without the lock being held due to timeouts. - (sb!ext:atomic-decf (semaphore-waitcount semaphore)))))) + (cond ((plusp count) + (setf (semaphore-%count semaphore) (1- count)) + (when notification + (setf (semaphore-notification-%status notification) t))) + (t + (unwind-protect + (progn + ;; Need to use ATOMIC-INCF despite the lock, because on our + ;; way out from here we might not be locked anymore -- so + ;; another thread might be tweaking this in parallel using + ;; ATOMIC-DECF. No danger over overflow, since there it + ;; at most one increment per thread waiting on the semaphore. + (sb!ext:atomic-incf (semaphore-waitcount semaphore)) + (loop until (plusp (setf count (semaphore-%count semaphore))) + do (or (condition-wait (semaphore-queue semaphore) + (semaphore-mutex semaphore) + :timeout timeout) + (return-from wait-on-semaphore nil))) + (setf (semaphore-%count semaphore) (1- count)) + (when notification + (setf (semaphore-notification-%status notification) t))) + ;; Need to use ATOMIC-DECF as we may unwind without the lock + ;; being held! + (sb!ext:atomic-decf (semaphore-waitcount semaphore))))))) t) -(defun try-semaphore (semaphore &optional (n 1)) +(defun try-semaphore (semaphore &optional (n 1) notification) #!+sb-doc "Try to decrement the count of SEMAPHORE by N. If the count were to -become negative, punt and return NIL, otherwise return true." +become negative, punt and return NIL, otherwise return true. + +If NOTIFICATION is given it must be a semaphore notification object +with SEMAPHORE-NOTIFICATION-STATUS of NIL. If the count is decremented, +the status is set to T." (declare (type (integer 1) n)) + (when (and notification (semaphore-notification-status notification)) + (with-simple-restart (continue "Clear notification status and continue.") + (error "~@" + 'try-semaphore semaphore)) + (clear-semaphore-notification notification)) (with-system-mutex ((semaphore-mutex semaphore) :allow-with-interrupts t) (let ((new-count (- (semaphore-%count semaphore) n))) (when (not (minusp new-count)) - (setf (semaphore-%count semaphore) new-count))))) + (setf (semaphore-%count semaphore) new-count) + (when notification + (setf (semaphore-notification-%status notification) t)) + ;; FIXME: We don't actually document this -- should we just + ;; return T, or document new count as the return? + new-count)))) (defun signal-semaphore (semaphore &optional (n 1)) #!+sb-doc @@ -1301,17 +1391,59 @@ change." (defun interrupt-thread (thread function) #!+sb-doc - "Interrupt the live THREAD and make it run FUNCTION. A moderate -degree of care is expected for use of INTERRUPT-THREAD, due to its -nature: if you interrupt a thread that was holding important locks -then do something that turns out to need those locks, you probably -won't like the effect. FUNCTION runs with interrupts disabled, but -WITH-INTERRUPTS is allowed in it. Keep in mind that many things may -enable interrupts (GET-MUTEX when contended, for instance) so the -first thing to do is usually a WITH-INTERRUPTS or a -WITHOUT-INTERRUPTS. Within a thread interrupts are queued, they are -run in same the order they were sent." - #!+win32 + "Interrupt THREAD and make it run FUNCTION. + +The interrupt is asynchronous, and can occur anywhere with the exception of +sections protected using SB-SYS:WITHOUT-INTERRUPTS. + +FUNCTION is called with interrupts disabled, under +SB-SYS:ALLOW-WITH-INTERRUPTS. Since functions such as GRAB-MUTEX may try to +enable interrupts internally, in most cases FUNCTION should either enter +SB-SYS:WITH-INTERRUPTS to allow nested interrupts, or +SB-SYS:WITHOUT-INTERRUPTS to prevent them completely. + +When a thread receives multiple interrupts, they are executed in the order +they were sent -- first in, first out. + +This means that a great degree of care is required to use INTERRUPT-THREAD +safely and sanely in a production environment. The general recommendation is +to limit uses of INTERRUPT-THREAD for interactive debugging, banning it +entirely from production environments -- it is simply exceedingly hard to use +correctly. + +With those caveats in mind, what you need to know when using it: + + * If calling FUNCTION causes a non-local transfer of control (ie. an + unwind), all normal cleanup forms will be executed. + + However, if the interrupt occurs during cleanup forms of an UNWIND-PROTECT, + it is just as if that had happened due to a regular GO, THROW, or + RETURN-FROM: the interrupted cleanup form and those following it in the + same UNWIND-PROTECT do not get executed. + + SBCL tries to keep its own internals asynch-unwind-safe, but this is + frankly an unreasonable expectation for third party libraries, especially + given that asynch-unwind-safety does not compose: a function calling + only asynch-unwind-safe function isn't automatically asynch-unwind-safe. + + This means that in order for an asych unwind to be safe, the entire + callstack at the point of interruption needs to be asynch-unwind-safe. + + * In addition to asynch-unwind-safety you must consider the issue of + re-entrancy. INTERRUPT-THREAD can cause function that are never normally + called recursively to be re-entered during their dynamic contour, + which may cause them to misbehave. (Consider binding of special variables, + values of global variables, etc.) + +Take togather, these two restrict the \"safe\" things to do using +INTERRUPT-THREAD to a fairly minimal set. One useful one -- exclusively for +interactive development use is using it to force entry to debugger to inspect +the state of a thread: + + (interrupt-thread thread #'break) + +Short version: be careful out there." + #!+win32 (declare (ignore thread)) #!+win32 (with-interrupt-bindings @@ -1336,8 +1468,43 @@ run in same the order they were sent." (defun terminate-thread (thread) #!+sb-doc - "Terminate the thread identified by THREAD, by causing it to run -SB-EXT:QUIT - the usual cleanup forms will be evaluated" + "Terminate the thread identified by THREAD, by interrupting it and causing +it to call SB-EXT:QUIT. + +The unwind caused by TERMINATE-THREAD is asynchronous, meaning that eg. thread +executing + + (let (foo) + (unwind-protect + (progn + (setf foo (get-foo)) + (work-on-foo foo)) + (when foo + ;; An interrupt occurring inside the cleanup clause + ;; will cause cleanups from the current UNWIND-PROTECT + ;; to be dropped. + (release-foo foo)))) + +might miss calling RELEASE-FOO despite GET-FOO having returned true if the +interrupt occurs inside the cleanup clause, eg. during execution of +RELEASE-FOO. + +Thus, in order to write an asynch unwind safe UNWIND-PROTECT you need to use +WITHOUT-INTERRUPTS: + + (let (foo) + (sb-sys:without-interrupts + (unwind-protect + (progn + (setf foo (sb-sys:allow-with-interrupts + (get-foo))) + (sb-sys:with-local-interrupts + (work-on-foo foo))) + (when foo + (release-foo foo))))) + +Since most libraries using UNWIND-PROTECT do not do this, you should never +assume that unknown code can safely be terminated using TERMINATE-THREAD." (interrupt-thread thread 'sb!ext:quit)) (define-alien-routine "thread_yield" int) @@ -1371,34 +1538,17 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" (with-all-threads-lock (loop (if (thread-alive-p thread) - (let* ((epoch sb!kernel::*gc-epoch*) - (offset (sb!kernel:get-lisp-obj-address + (let* ((offset (sb!kernel:get-lisp-obj-address (sb!vm::symbol-tls-index symbol))) - (tl-val (sap-ref-word (%thread-sap thread) offset))) + (obj (sap-ref-lispobj (%thread-sap thread) offset)) + (tl-val (sb!kernel:get-lisp-obj-address obj))) (cond ((zerop offset) (return (values nil :no-tls-value))) ((or (eql tl-val sb!vm:no-tls-value-marker-widetag) (eql tl-val sb!vm:unbound-marker-widetag)) (return (values nil :unbound-in-thread))) (t - (multiple-value-bind (obj ok) (make-lisp-obj tl-val nil) - ;; The value we constructed may be invalid if a GC has - ;; occurred. That is harmless, though, since OBJ is - ;; either in a register or on stack, and we are - ;; conservative on both on GENCGC -- so a bogus object - ;; is safe here as long as we don't return it. If we - ;; ever port threads to a non-conservative GC we must - ;; pin the TL-VAL address before constructing OBJ, or - ;; make WITH-ALL-THREADS-LOCK imply WITHOUT-GCING. - ;; - ;; The reason we don't just rely on TL-VAL pinning the - ;; object is that the call to MAKE-LISP-OBJ may cause - ;; bignum allocation, at which point TL-VAL might not - ;; be alive anymore -- hence the epoch check. - (when (eq epoch sb!kernel::*gc-epoch*) - (if ok - (return (values obj :ok)) - (return (values obj :invalid-tls-value)))))))) + (return (values obj :ok))))) (return (values nil :thread-dead)))))) (defun %set-symbol-value-in-thread (symbol thread value) @@ -1412,8 +1562,8 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" (cond ((zerop offset) (values nil :no-tls-value)) (t - (setf (sap-ref-word (%thread-sap thread) offset) - (get-lisp-obj-address value)) + (setf (sap-ref-lispobj (%thread-sap thread) offset) + value) (values value :ok)))) (values nil :thread-dead)))))