1.0.29.34: hopefully thread-safe SB-PROFILE
authorNikodemus Siivola <nikodemus@random-state.net>
Mon, 22 Jun 2009 16:06:22 +0000 (16:06 +0000)
committerNikodemus Siivola <nikodemus@random-state.net>
Mon, 22 Jun 2009 16:06:22 +0000 (16:06 +0000)
* Nuke PCOUNTER stuff, and replace it with a COUNTER local to
  profile.lisp:

  ** New counter uses ATOMIC-INCF for atomicity, plus a lock and
     an overflow counter to handle counts over word in size.

  ** Stack allocate counters and counter value cells when possible
     to reduce overhead.

* Nuke the FASTBIG-stuff. A generic arithmetic call with fixnum args
  is not that slow -- and if it turns out to be too slow after all,
  then the compiler should take care of this under appropriate policy
  instead of us using hacks like this.

* Test case from Volkan Yazici.

build-order.lisp-expr
doc/manual/profiling.texinfo
package-data-list.lisp-expr
src/code/early-pcounter.lisp [deleted file]
src/code/pcounter.lisp [deleted file]
src/code/profile.lisp
src/compiler/fndb.lisp
tests/profile.impure.lisp [new file with mode: 0644]
version.lisp-expr

index 9c5d805..55fa294 100644 (file)
  ;; accessors.)
  ("src/code/type-class")
 
- ("src/code/early-pcounter")
- ("src/code/pcounter" :not-host)
-
  ("src/code/ansi-stream" :not-host)
 
  ("src/code/sysmacs" :not-host)
index 53c8c48..7770c27 100644 (file)
@@ -21,6 +21,9 @@ Inlined functions do not appear in the results reported by either.
 The package @code{sb-profile} provides a classic, per-function-call
 profiler.
 
+@strong{NOTE}: When profiling code executed by multiple threads in
+parallel, the consing attributed to each function is inaccurate.
+
 @include macro-sb-profile-profile.texinfo
 @include macro-sb-profile-unprofile.texinfo
 @include fun-sb-profile-report.texinfo
index 1e99f35..ed9736c 100644 (file)
@@ -938,15 +938,6 @@ possibly temporariliy, because it might be used internally."
                "SBCL-HOMEDIR-PATHNAME"
                "SIMPLIFY-NAMESTRING"
 
-               ;; PCOUNTERs
-               "FASTBIG-INCF-PCOUNTER-OR-FIXNUM"
-               "INCF-PCOUNTER"
-               "INCF-PCOUNTER-OR-FIXNUM"
-               "MAKE-PCOUNTER"
-               "PCOUNTER"
-               "PCOUNTER->INTEGER"
-               "PCOUNTER-OR-FIXNUM->INTEGER"
-               "PCOUNTER-P"
                "*N-BYTES-FREED-OR-PURIFIED*"
 
                ;; miscellaneous non-standard but handy user-level functions..
diff --git a/src/code/early-pcounter.lisp b/src/code/early-pcounter.lisp
deleted file mode 100644 (file)
index c14f30e..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-;;;; PCOUNTERs
-;;;;
-;;;; a PCOUNTER is used to represent an unsigned integer quantity which
-;;;; can grow bigger than a fixnum, but typically does so, if at all,
-;;;; in many small steps, where we don't want to cons on every step.
-;;;; Such quantities typically arise in profiling, e.g.
-;;;; total system consing, time spent in a profiled function, and
-;;;; bytes consed in a profiled function are all examples of such
-;;;; quantities. The name is an abbreviation for "Profiling COUNTER".
-
-;;; This stuff is implemented in the SB!PROFILE package because the
-;;; profiling code is currently the only code which wants to poke
-;;; around in the implementation details. This needs to be done on the
-;;; host for type information.
-
-(in-package "SB!PROFILE")
-
-(def!struct (pcounter (:copier nil))
-  (integer 0 :type unsigned-byte)
-  (fixnum 0 :type (and fixnum unsigned-byte)))
-
diff --git a/src/code/pcounter.lisp b/src/code/pcounter.lisp
deleted file mode 100644 (file)
index aee1cad..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-;;;; PCOUNTERs
-;;;;
-;;;; a PCOUNTER is used to represent an unsigned integer quantity which
-;;;; can grow bigger than a fixnum, but typically does so, if at all,
-;;;; in many small steps, where we don't want to cons on every step.
-;;;; Such quantities typically arise in profiling, e.g.
-;;;; total system consing, time spent in a profiled function, and
-;;;; bytes consed in a profiled function are all examples of such
-;;;; quantities. The name is an abbreviation for "Profiling COUNTER".
-;;;;
-;;;; (This isn't one of my more brilliant names, so if you have a
-;;;; better suggestion, let me know. -- WHN 2001-06-22)
-
-;;; This stuff is implemented in the SB!PROFILE because the profiling
-;;; code is currently the only code which wants to poke around in the
-;;; implementation details.
-(in-package "SB!PROFILE")
-\f
-;;;; basic PCOUNTER stuff
-
-(/show0 "pcounter.lisp 21")
-
-(declaim (maybe-inline incf-pcounter))
-(defun incf-pcounter (pcounter delta)
-  (aver (typep delta 'unsigned-byte))
-  (let ((sum (+ (pcounter-fixnum pcounter) delta)))
-    (cond ((typep sum 'fixnum)
-           (setf (pcounter-fixnum pcounter) sum))
-          (t
-           (incf (pcounter-integer pcounter) sum)
-           (setf (pcounter-fixnum pcounter) 0))))
-  pcounter)
-
-(/show0 "pcounter.lisp 34")
-
-;;;(declaim (inline pcounter->integer)) ; FIXME: maybe inline when more stable
-(defun pcounter->integer (pcounter)
-  (+ (pcounter-integer pcounter)
-     (pcounter-fixnum pcounter)))
-\f
-;;;; operations on (OR PCOUNTER FIXNUM)
-;;;;
-;;;; When we don't want to cons a PCOUNTER unless we're forced to, we
-;;;; start with a FIXNUM counter and only create a PCOUNTER if the
-;;;; FIXNUM overflows.
-
-(/show0 "pcounter.lisp 47")
-
-(declaim (inline %incf-pcounter-or-fixnum))
-(defun %incf-pcounter-or-fixnum (x delta)
-  (etypecase x
-    (fixnum
-     (let ((sum (+ x delta)))
-       (if (typep sum 'fixnum)
-           sum
-           (make-pcounter :integer sum))))
-    (pcounter
-     (incf-pcounter x delta))))
-
-(define-modify-macro incf-pcounter-or-fixnum (delta) %incf-pcounter-or-fixnum)
-
-(/show0 "pcounter.lisp 62")
-
-;;; Trade off space for execution time by handling the common fast
-;;; (TYPEP DELTA 'FIXNUM) case inline and only calling generic
-;;; arithmetic as a last resort.
-(defmacro fastbig-incf-pcounter-or-fixnum (x delta)
-  (let ((delta-sym (gensym "DELTA")))
-    `(let ((,delta-sym ,delta))
-       (aver (typep ,delta-sym 'unsigned-byte))
-       ;;(declare (type unsigned-byte ,delta-sym))
-       (if (typep ,delta-sym 'fixnum)
-           (incf-pcounter-or-fixnum ,x ,delta)
-           (incf-pcounter-or-fixnum ,x ,delta)))))
-
-(/show0 "pcounter.lisp 76")
-
-(declaim (maybe-inline pcounter-or-fixnum->integer))
-(defun pcounter-or-fixnum->integer (x)
-  (etypecase x
-    (fixnum x)
-    (pcounter (pcounter->integer x))))
-
-(/show0 "pcounter.lisp end")
index ddfd722..2e30d72 100644 (file)
@@ -9,33 +9,61 @@
 
 (in-package "SB-PROFILE") ; (SB-, not SB!, since we're built in warm load.)
 \f
-;;;; reading internal run time with high resolution and low overhead
+
+;;;; COUNTER object
+;;;;
+;;;; Thread safe, and reasonably fast: in common case increment is just an
+;;;; ATOMIC-INCF, in overflow case grab a lock and increment overflow counter.
+
+(defconstant +most-positive-word+ (1- (expt 2 sb-vm:n-word-bits)))
+
+(declaim (inline make-counter))
+(defstruct (counter (:copier nil))
+  (count 0 :type sb-vm:word)
+  (overflow 0 :type unsigned-byte)
+  (overflow-lock (sb-thread::make-spinlock) :type sb-thread::spinlock))
+
+(defun incf-counter (counter delta)
+  ;; When running multi-threaded we can easily get negative numbers for the
+  ;; cons-counter. Don't count them at all.
+  (when (plusp delta)
+    ;; ATOMIC-INCF is restricted to signed-word, but delta can be bigger: first
+    ;; count the number of full overflows...
+    (loop while (>= delta +most-positive-word+)
+          do (sb-thread::with-spinlock ((counter-overflow-lock counter))
+               (incf (counter-overflow counter) 1))
+          (decf delta +most-positive-word+))
+    (flet ((%incf (d)
+             (let ((prev (atomic-incf (counter-count counter) d)))
+               (when (< (logand +most-positive-word+ (+ prev d)) prev)
+                 (sb-thread::with-spinlock ((counter-overflow-lock counter))
+                   (incf (counter-overflow counter)))))))
+      (if (typep delta '(signed-byte 32))
+          (%incf delta)
+          ;; ...and if delta is still too big, split it into four parts: they
+          ;; are guaranteed to fit into a signed word.
+          (let ((quarter (truncate delta 4)))
+            (%incf quarter)
+            (%incf quarter)
+            (%incf quarter)
+            (%incf quarter)))))
+  counter)
+
+(defun counter->integer (counter)
+  (+ (counter-count counter)
+     (* (counter-overflow counter)
+        +most-positive-word+)))
+\f
+;;;; High resolution timer
+
+;;; FIXME: High resolution this is not. Build a microsecond-accuracy version
+;;; on top of unix-getrusage, maybe.
 
 (defconstant +ticks-per-second+ internal-time-units-per-second)
 
 (declaim (inline get-internal-ticks))
-(defun get-internal-ticks () (get-internal-run-time))
-\f
-;;;; implementation-dependent interfaces
-
-#|
-;;; To avoid unnecessary consing in the "encapsulation" code, we want
-;;; find out the number of required arguments, and use &REST to
-;;; capture only non-required arguments. This function returns (VALUES
-;;; MIN-ARGS OPTIONALS-P), where MIN-ARGS is the number of required
-;;; arguments and OPTIONALS-P is true iff there are any non-required
-;;; arguments (such as &OPTIONAL, &REST, or &KEY).
-(declaim (ftype (function ((or symbol cons)) (values fixnum t)) fun-signature))
-(defun fun-signature (name)
-  (let ((type (info :function :type name)))
-    (cond ((not (fun-type-p type))
-           (values 0 t))
-          (t
-           (values (length (fun-type-required type))
-                   (or (fun-type-optional type)
-                       (fun-type-keyp type)
-                       (fun-type-rest type)))))))
-|#
+(defun get-internal-ticks ()
+  (get-internal-run-time))
 \f
 ;;;; global data structures
 
@@ -60,9 +88,9 @@
 ;;; resource consumed for each nested call is added into the
 ;;; appropriate variable. When the outer function returns, these
 ;;; amounts are subtracted from the total.
-(defvar *enclosed-ticks* 0)
-(defvar *enclosed-consing* 0)
-(declaim (type (or pcounter fixnum) *enclosed-ticks* *enclosed-consing*))
+(declaim (counter *enclosed-ticks* *enclosed-consing*))
+(defvar *enclosed-ticks*)
+(defvar *enclosed-consing*)
 
 ;;; This variable is also used to subtract out time for nested
 ;;; profiled calls. The time inside the profile wrapper call --
 ;;; GET-INTERNAL-TICKS, and after we get to the second call. By
 ;;; keeping track of the count of enclosed profiled calls, we can try
 ;;; to compensate for that.
-(defvar *enclosed-profiles* 0)
-(declaim (type (or pcounter fixnum) *enclosed-profiles*))
+(declaim (counter *enclosed-profiles*))
+(defvar *enclosed-profiles*)
 
 ;;; the encapsulated function we're currently computing profiling data
 ;;; for, recorded so that we can detect the problem of
 \f
 ;;;; profile encapsulations
 
-;;; Trade off space for time by handling the usual all-FIXNUM cases inline.
-(defmacro fastbig- (x y)
-  (once-only ((x x) (y y))
-    `(if (and (typep ,x '(and fixnum unsigned-byte))
-              (typep ,y '(and fixnum unsigned-byte)))
-         ;; special case: can use fixnum arithmetic and be guaranteed
-         ;; the result is also a fixnum
-         (- ,x ,y)
-         ;; general case
-         (- ,x ,y))))
-(defmacro fastbig-1+ (x)
-  (once-only ((x x))
-    `(if (typep ,x 'index)
-         (1+ ,x)
-         (1+ ,x))))
-
 ;;; Return a collection of closures over the same lexical context,
 ;;;   (VALUES ENCAPSULATION-FUN READ-STATS-FUN CLEAR-STATS-FUN).
 ;;;
 ;;; will minimize profiling overhead.)
 (defun profile-encapsulation-lambdas (encapsulated-fun)
   (declare (type function encapsulated-fun))
-  (let* ((count 0)
-         (ticks 0)
-         (consing 0)
-         (profiles 0))
-    (declare (type (or pcounter fixnum) count ticks consing profiles))
+  (let* ((count (make-counter))
+         (ticks (make-counter))
+         (consing (make-counter))
+         (profiles (make-counter)))
+    (declare (counter count ticks consing profiles))
     (values
      ;; ENCAPSULATION-FUN
      (lambda (&more arg-context arg-count)
        ;; Make sure that we're not recursing infinitely.
        (when (boundp '*computing-profiling-data-for*)
          (unprofile-all) ; to avoid further recursion
-         (error "~@<When computing profiling data for ~S, the profiled function ~S was called. To get out of this infinite recursion, all functions have been unprofiled. (Since the profiling system evidently uses ~S in its computations, it looks as though it's a bad idea to profile it.)~:@>"
-                *computing-profiling-data-for*
-                encapsulated-fun
+         (error "~@<When computing profiling data for ~S, the profiled ~
+                    function ~S was called. To get out of this infinite recursion, all ~
+                    functions have been unprofiled. (Since the profiling system evidently ~
+                    uses ~S in its computations, it looks as though it's a bad idea to ~
+                    profile it.)~:@>"
+                *computing-profiling-data-for* encapsulated-fun
                 encapsulated-fun))
-       ;; FIXME: Probably when this is stable, we should optimize (SAFETY 0).
-       (fastbig-incf-pcounter-or-fixnum count 1)
+       (incf-counter count 1)
        (let ((dticks 0)
              (dconsing 0)
              (inner-enclosed-profiles 0))
-         (declare (type unsigned-byte dticks dconsing inner-enclosed-profiles)
-                  (dynamic-extent dticks dconsing inner-enclosed-profiles))
-         (aver (typep dticks 'unsigned-byte))
-         (aver (typep dconsing 'unsigned-byte))
-         (aver (typep inner-enclosed-profiles 'unsigned-byte))
+         (declare (truly-dynamic-extent dticks dconsing inner-enclosed-profiles))
          (unwind-protect
              (let* ((start-ticks (get-internal-ticks))
-                    (*enclosed-ticks* 0)
-                    (*enclosed-consing* 0)
-                    (*enclosed-profiles* 0)
+                    (*enclosed-ticks* (make-counter))
+                    (*enclosed-consing* (make-counter))
+                    (*enclosed-profiles* (make-counter))
                     (nbf0 *n-bytes-freed-or-purified*)
                     (dynamic-usage-0 (sb-kernel:dynamic-usage)))
-               (declare (inline pcounter-or-fixnum->integer))
+               (declare (dynamic-extent *enclosed-ticks* *enclosed-consing* *enclosed-profiles*))
                (unwind-protect
                    (multiple-value-call encapsulated-fun
                                         (sb-c:%more-arg-values arg-context
                                                                arg-count))
                  (let ((*computing-profiling-data-for* encapsulated-fun)
                        (dynamic-usage-1 (sb-kernel:dynamic-usage)))
-                   (setf dticks (fastbig- (get-internal-ticks) start-ticks))
-                   (setf dconsing
-                         (if (eql *n-bytes-freed-or-purified* nbf0)
-                             ;; common special case where we can avoid
-                             ;; bignum arithmetic
-                             (- dynamic-usage-1 dynamic-usage-0)
-                             ;; general case
-                             (- (get-bytes-consed) nbf0 dynamic-usage-0)))
-                   (setf inner-enclosed-profiles
-                         (pcounter-or-fixnum->integer *enclosed-profiles*))
-                   (let ((net-dticks (fastbig- dticks *enclosed-ticks*)))
-                     (fastbig-incf-pcounter-or-fixnum ticks net-dticks))
-                   (let ((net-dconsing (fastbig- dconsing
-                                                 (pcounter-or-fixnum->integer
-                                                  *enclosed-consing*))))
-                     (fastbig-incf-pcounter-or-fixnum consing net-dconsing))
-                   (fastbig-incf-pcounter-or-fixnum profiles
-                                                    inner-enclosed-profiles))))
-           (fastbig-incf-pcounter-or-fixnum *enclosed-ticks* dticks)
-           (fastbig-incf-pcounter-or-fixnum *enclosed-consing* dconsing)
-           (fastbig-incf-pcounter-or-fixnum *enclosed-profiles*
-                                            (fastbig-1+
-                                             inner-enclosed-profiles)))))
+                   (setf dticks (- (get-internal-ticks) start-ticks)
+                         dconsing (if (eql *n-bytes-freed-or-purified* nbf0)
+                                      ;; common special case where we can avoid
+                                      ;; bignum arithmetic
+                                      (- dynamic-usage-1 dynamic-usage-0)
+                                      ;; general case
+                                      (- (get-bytes-consed) nbf0 dynamic-usage-0))
+                         inner-enclosed-profiles (counter->integer *enclosed-profiles*))
+                   (incf-counter ticks (- dticks (counter->integer *enclosed-ticks*)))
+                   (incf-counter consing (- dconsing (counter->integer *enclosed-consing*)))
+                   (incf-counter profiles inner-enclosed-profiles))))
+           (when (boundp '*enclosed-ticks*)
+             (incf-counter *enclosed-ticks* dticks)
+             (incf-counter *enclosed-consing* dconsing)
+             (incf-counter *enclosed-profiles* (1+ inner-enclosed-profiles))))))
      ;; READ-STATS-FUN
      (lambda ()
-       (values (pcounter-or-fixnum->integer count)
-               (pcounter-or-fixnum->integer ticks)
-               (pcounter-or-fixnum->integer consing)
-               (pcounter-or-fixnum->integer profiles)))
+       (values (counter->integer count)
+               (counter->integer ticks)
+               (counter->integer consing)
+               (counter->integer profiles)))
      ;; CLEAR-STATS-FUN
      (lambda ()
-       (setf count 0
-             ticks 0
-             consing 0
-             profiles 0)))))
+       (setf count (make-counter)
+             ticks (make-counter)
+             consing (make-counter)
+             profiles (make-counter))))))
 \f
 ;;;; interfaces
 
index a070e62..25a45dc 100644 (file)
 (defknown get-bytes-consed () unsigned-byte (flushable))
 (defknown mask-signed-field ((integer 0 *) integer) integer
           (movable flushable foldable))
-
-;;; PCOUNTERs
-(defknown incf-pcounter (pcounter unsigned-byte) pcounter)
-(defknown pcounter->integer (pcounter) unsigned-byte)
-(defknown %incf-pcounter-or-fixnum ((or pcounter fixnum) unsigned-byte)
-  (or pcounter fixnum))
-(defknown pcounter-or-fixnum->integer ((or pcounter fixnum)) unsigned-byte)
 \f
 ;;;; magical compiler frobs
 
diff --git a/tests/profile.impure.lisp b/tests/profile.impure.lisp
new file mode 100644 (file)
index 0000000..ab1c95c
--- /dev/null
@@ -0,0 +1,91 @@
+;;;; tests PROFILE with multiple threads
+
+;;;; This software is part of the SBCL system. See the README file for
+;;;; more information.
+;;;;
+;;;; While most of SBCL is derived from the CMU CL system, the test
+;;;; files (like this one) were written from scratch after the fork
+;;;; from CMU CL.
+;;;;
+;;;; This software is in the public domain and is provided with
+;;;; absolutely no warranty. See the COPYING and CREDITS files for
+;;;; more information.
+
+(load "assertoid.lisp")
+(load "test-util.lisp")
+
+(defpackage :profile-test
+  (:use :cl :sb-thread))
+
+(in-package :profile-test)
+
+(defun miller-rabin-prime-p (n &optional (s 50))
+ "Miller-Rabin primality test written by R. Matthew Emerson."
+ (flet ((witness-p (a n)
+          (loop with b = (- n 1)
+                for i from (integer-length b) downto 0
+                for d = 1 then (mod (* d d) n)
+                for x = d
+                do (progn
+                     (when (and (= d 1) (/= x 1) (/= x (- n 1)))
+                       (return-from witness-p t))
+                     (when (logbitp i b)
+                       (setf d (mod (* d a) n))))
+                finally (return (/= d 1)))))
+   (dotimes (i s n)
+     (let ((w (1+ (random (- n 1)))))
+       (when (witness-p w n)
+         (return-from miller-rabin-prime-p nil))))))
+
+(defun random-of-bit-size (n-bits)
+ "Returns a random number of maximum size `N-BITS'."
+ (random (ash 1 n-bits)))
+
+(defun prime-of-bit-size (n-bits)
+ "Returns a prime number of maximum size `N-BITS'."
+ (loop for maybe-prime = (random-of-bit-size n-bits)
+       when (miller-rabin-prime-p maybe-prime)
+         do (return maybe-prime)))
+
+(defun waste-cpu-cycles (n-primes n-prime-bits n-workers)
+  (if (= n-workers 1)
+      (handler-case
+          (progn
+            (loop repeat n-primes
+                  do (prime-of-bit-size n-prime-bits))
+            (list t))
+        (serious-condition (s)
+          s))
+      (let* ((r (make-semaphore))
+             (w (make-semaphore))
+             (workers
+              (loop repeat n-workers
+                    collect (sb-thread:make-thread
+                             (let ((rs (make-random-state)))
+                               (lambda ()
+                                 (block nil
+                                     (handler-bind ((serious-condition (lambda (c)
+                                                                         (princ c)
+                                                                         (sb-debug:backtrace)
+                                                                         (return c))))
+                                       (let ((*random-state* rs))
+                                         (signal-semaphore r)
+                                         (wait-on-semaphore w)
+                                         (loop repeat n-primes
+                                               do (prime-of-bit-size n-prime-bits))
+                                         t)))))))))
+        (loop repeat n-workers do (wait-on-semaphore r))
+        (signal-semaphore w n-workers)
+        (mapcar #'sb-thread:join-thread workers))))
+
+(in-package :cl-user)
+
+(with-test (:name (profile threads))
+  (profile "PROFILE-TEST")
+  ;; This used to signal an error with threads
+  (let* ((n #+sb-thread 5 #-sb-thread 1)
+         (res (profile-test::waste-cpu-cycles 10 256 n))
+         (want (make-list n :initial-element t)))
+    (unless (equal res want)
+      (error "wanted ~S, got ~S" want res)))
+  (report))
index 3d8f13d..10c7499 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.29.33"
+"1.0.29.34"