1.0.37.20: Make stress test for SB-CONCURRENCY:MAILBOX more robust.
authorTobias C. Rittweiler <trittweiler@users.sourceforge.net>
Wed, 31 Mar 2010 19:35:11 +0000 (19:35 +0000)
committerTobias C. Rittweiler <trittweiler@users.sourceforge.net>
Wed, 31 Mar 2010 19:35:11 +0000 (19:35 +0000)
  * The MAILBOX.INTERRUPT-SAFETY.1 test kills threads randomly while
    stress-testing a mailbox--in the test we made sure that at least
    one receiver remains to properly finish the test; however, some of
    the receiver threads were written with a specific upper bound of
    runs. In the unlikely event that only those receivers remain,
    there was a chance that they could not properly finish the test.

  * We rewrite the receivers to do their work until they receive a
    special FIN token as message. After all senders are done, we make
    sure to send enough FIN for all receivers to shut down.

  * Also gather some more information during the test so in case of
    failure we can gain understanding on what went wrong.

contrib/sb-concurrency/tests/test-mailbox.lisp
contrib/sb-concurrency/tests/test-utils.lisp
version.lisp-expr

index 87d76d8..f69628a 100644 (file)
 (defstruct counter
   (ref 0 :type sb-vm:word))
 
+(defun receiver-distribution (n-receivers)
+  (let* ((aux              (floor n-receivers 2))
+         (n-recv-msg       (- n-receivers aux))
+         (n-recv-pend-msgs (floor aux 3))
+         (n-recv-msg-n-h   (- aux n-recv-pend-msgs)))
+    (values n-recv-msg
+            n-recv-msg-n-h
+            n-recv-pend-msgs)))
+
 (defun test-mailbox-producers-consumers
-    (&key n-senders n-receivers n-messages mailbox interruptor)
-  (let* ((cnt  (make-counter))
-         (mbox (or mailbox (make-mailbox)))
-         (senders
-          (make-threads n-senders "SENDER"
-            #'(lambda ()
-                (dotimes (i n-messages)
-                  (send-message mbox i)
-                  (sleep (random 0.001))))))
-         (receivers
-          ;; We have three groups of receivers, one using
-          ;; RECEIVE-MESSAGE, one RECEIVE-MESSAGE-NO-HANG, and another
-          ;; one RECEIVE-PENDING-MESSAGES.
-          (let* ((aux              (floor n-receivers 2))
-                 (n-recv-msg       (- n-receivers aux))
-                 (n-recv-pend-msgs (floor aux 3))
-                 (n-recv-msg-n-h   (- aux n-recv-pend-msgs)))
-            (append
-             (make-threads n-recv-msg "RECV-MSG"
-               #'(lambda ()
-                   (sleep (random 0.001))
-                   (handler-case
-                       (loop
-                         (sb-sys:with-deadline (:seconds 1.0)
-                           (let ((msg (receive-message mbox)))
-                             (sb-ext:atomic-incf (counter-ref cnt))
-                             (unless (< -1 msg n-messages)
-                               (hang)))))
-                     (sb-ext:timeout ()))))
-             (make-threads n-recv-pend-msgs "RECV-PEND-MSGS"
-               #'(lambda ()
-                   (sleep (random 0.001))
-                   (dotimes (i 10)
-                     (thread-yield)
-                     (let ((msgs (receive-pending-messages mbox (random 5))))
-                       (mapc #'(lambda (msg)
-                                 (sb-ext:atomic-incf (counter-ref cnt))
-                                 (unless (< -1 msg n-messages)
-                                   (hang)))
-                             msgs)))))
-             (make-threads n-recv-msg-n-h "RECV-MSG-NO-HANG"
-               #'(lambda ()
-                   (sleep (random 0.001))
-                   (dotimes (i 30)
-                     (thread-yield)
-                     (multiple-value-bind (msg ok)
-                         (receive-message-no-hang mbox)
-                       (when ok
-                         (sb-ext:atomic-incf (counter-ref cnt))
-                         (unless (< -1 msg n-messages)
-                           (hang))))))))))
-         (threads (append receivers senders)))
-    (when interruptor (funcall interruptor threads))
-    (mapc #'timed-join-thread threads)
-    (values mbox (counter-ref cnt) (* n-senders n-messages))))
+    (&key n-senders n-receivers n-messages interruptor)
+  (let ((mbox    (make-mailbox))
+        (counter (make-counter))
+        (+sleep+ 0.0001)
+        (+fin-token+ :finish) ; end token for receivers to stop
+        (+blksize+ 5))        ; "block size" for RECEIVE-PENDING-MESSAGES
+    (multiple-value-bind (n-recv-msg
+                          n-recv-msg-n-h
+                          n-recv-pend-msgs)
+        ;; We have three groups of receivers, one using
+        ;; RECEIVE-MESSAGE, one RECEIVE-MESSAGE-NO-HANG, and
+        ;; another one RECEIVE-PENDING-MESSAGES.
+        (receiver-distribution n-receivers)
+      (let ((senders
+             (make-threads n-senders "SENDER"
+                           #'(lambda ()
+                               (dotimes (i n-messages t)
+                                 (send-message mbox i)
+                                 (sleep (random +sleep+))))))
+            (receivers
+             (flet ((process-msg (msg out)
+                      (cond
+                        ((eq msg +fin-token+)
+                         (funcall out t))
+                        ((not (< -1 msg n-messages))
+                         (funcall out nil))
+                        (t
+                         (sb-ext:atomic-incf (counter-ref counter))))))
+               (append
+                (make-threads n-recv-msg "RECV-MSG"
+                  #'(lambda ()
+                      (sleep (random +sleep+))
+                      (loop (process-msg (receive-message mbox)
+                                         #'(lambda (x) (return x))))))
+                (make-threads n-recv-pend-msgs "RECV-PEND-MSGS"
+                  #'(lambda ()
+                      (loop
+                        (sleep (random +sleep+))
+                        (mapc #'(lambda (msg)
+                                  (process-msg msg #'(lambda (x) (return x))))
+                              (receive-pending-messages mbox +blksize+)))))
+                (make-threads n-recv-msg-n-h "RECV-MSG-NO-HANG"
+                  #'(lambda ()
+                      (loop
+                        (sleep (random +sleep+))
+                        (multiple-value-bind (msg ok)
+                            (receive-message-no-hang mbox)
+                          (when ok
+                            (process-msg msg #'(lambda (x)
+                                                 (return x))))))))))))
+
+        (when interruptor
+          (funcall interruptor (append receivers senders)))
+        (let ((garbage  0)
+              (errors   0)
+              (timeouts 0))
+          (flet ((wait-for (threads)
+                   (mapc #'(lambda (thread)
+                             (ecase (timed-join-thread thread)
+                               ((t))
+                               ((nil)      (incf garbage))
+                               ((:aborted) (incf errors))
+                               ((:timeout) (incf timeouts)
+                                           (kill-thread thread))))
+                         threads)))
+            ;; First wait until all messages are propagating.
+            (wait-for senders)
+            ;; Senders are finished, inform and wait for the
+            ;; receivers.
+            (loop repeat (+ n-recv-msg
+                            n-recv-msg-n-h
+                            (* n-recv-pend-msgs +blksize+))
+                  ;; The number computed above is an upper bound; if
+                  ;; we send as many FINs as that, we can be sure that
+                  ;; every receiver must have got at least one FIN.
+                  do (send-message mbox +fin-token+))
+            (wait-for receivers)
+            ;; We may in fact have sent too many FINs, so make sure
+            ;; it's only FINs in the mailbox now.
+            (mapc #'(lambda (msg) (unless (eq msg +fin-token+)
+                                    (incf garbage)))
+                  (list-mailbox-messages mbox))
+            (values  `(:received . ,(counter-ref counter))
+                     `(:garbage  . ,garbage)
+                     `(:errors   . ,errors)
+                     `(:timeouts . ,timeouts))))))))
+
 
 (deftest mailbox.single-producer-single-consumer
-    (multiple-value-bind (mbox received total)
-        (test-mailbox-producers-consumers :n-senders 1
-                                          :n-receivers 1
-                                          :n-messages 10000)
-      (values
-       (= received total)
-       (mailbox-count mbox)
-       (list-mailbox-messages mbox)))
-  t
-  0
-  nil)
+    (test-mailbox-producers-consumers :n-senders 1
+                                      :n-receivers 1
+                                      :n-messages 10000)
+  (:received . 10000)
+  (:garbage  . 0)
+  (:errors   . 0)
+  (:timeouts . 0))
 
 (deftest mailbox.single-producer-multiple-consumers
-    (multiple-value-bind (mbox received total)
-        (test-mailbox-producers-consumers :n-senders 1
-                                          :n-receivers 100
-                                          :n-messages 10000)
-      (values
-       (= received total)
-       (mailbox-count mbox)
-       (list-mailbox-messages mbox)))
-  t
-  0
-  nil)
+    (test-mailbox-producers-consumers :n-senders 1
+                                      :n-receivers 100
+                                      :n-messages 10000)
+  (:received . 10000)
+  (:garbage  . 0)
+  (:errors   . 0)
+  (:timeouts . 0))
 
 (deftest mailbox.multiple-producers-single-consumer
-    (multiple-value-bind (mbox received total)
-        (test-mailbox-producers-consumers :n-senders 100
-                                          :n-receivers 10
-                                          :n-messages 1000)
-      (values
-       (= received total)
-       (mailbox-count mbox)
-       (list-mailbox-messages mbox)))
-  t
-  0
-  nil)
+    (test-mailbox-producers-consumers :n-senders 100
+                                      :n-receivers 1
+                                      :n-messages 100)
+  (:received . 10000)
+  (:garbage  . 0)
+  (:errors   . 0)
+  (:timeouts . 0))
 
 (deftest mailbox.multiple-producers-multiple-consumers
-    (multiple-value-bind (mbox received total)
-        (test-mailbox-producers-consumers :n-senders 100
-                                          :n-receivers 100
-                                          :n-messages 1000)
-      (values
-       (= received total)
-       (mailbox-count mbox)
-       (list-mailbox-messages mbox)))
-  t
-  0
-  nil)
+    (test-mailbox-producers-consumers :n-senders 100
+                                      :n-receivers 100
+                                      :n-messages 10000)
+  (:received . 1000000)
+  (:garbage  . 0)
+  (:errors   . 0)
+  (:timeouts . 0))
 
 (deftest mailbox.interrupts-safety.1
-    (multiple-value-bind (mbox received total)
+    (multiple-value-bind (received garbage errors timeouts)
         (test-mailbox-producers-consumers
          :n-senders 100
          :n-receivers 100
          :n-messages 1000
-         :interruptor #'(lambda (threads)
-                          (let ((n (length threads)))
-                            ;; 99 so even in the unlikely case that only
-                            ;; receivers (or only senders) are shot
-                            ;; dead, there's still one that survives to
-                            ;; properly end the test.
-                            (loop repeat 99 do
-                              (kill-thread (nth (random n) threads))))))
+         :interruptor #'(lambda (threads &aux (n (length threads)))
+                          ;; 99 so even in the unlikely case that only
+                          ;; receivers (or only senders) are shot
+                          ;; dead, there's still one that survives to
+                          ;; properly end the test.
+                          (loop repeat 99
+                                for victim = (nth (random n) threads)
+                                do (kill-thread victim)
+                                   (sleep (random 0.0001)))))
       (values
        ;; We may have killed a receiver before it got to incrementing
        ;; the counter.
-       (<= received total)
-       (mailbox-count mbox)
-       (list-mailbox-messages mbox)))
-  t
-  0
-  nil)
+       (if (<= (cdr received) 1000000)
+           `(:received . :ok)
+           received)
+       garbage
+       ;; we may have gotten errors due to our killing spree.
+       timeouts))
+  (:received . :ok)
+  (:garbage  . 0)
+  (:timeouts . 0))
 
 ) ; #+sb-thread (progn ...
\ No newline at end of file
index 6a5e82a..bc21ebc 100644 (file)
@@ -3,15 +3,17 @@
 #+sb-thread
 (progn
 
-(defparameter +timeout+ 60.0)
+(defparameter +timeout+ 30.0)
 
 (defun make-threads (n name fn)
   (loop for i from 1 to n
         collect (make-thread fn :name (format nil "~A-~D" name i))))
 
 (defun timed-join-thread (thread &optional (timeout +timeout+))
-  (sb-sys:with-deadline (:seconds timeout)
-    (join-thread thread :default :aborted)))
+  (handler-case (sb-sys:with-deadline (:seconds timeout)
+                  (join-thread thread :default :aborted))
+    (sb-ext:timeout ()
+      :timeout)))
 
 (defun hang ()
   (join-thread *current-thread*))
index cbdba5e..8650ceb 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.37.19"
+"1.0.37.20"