From: Nikodemus Siivola Date: Mon, 22 Jun 2009 16:06:22 +0000 (+0000) Subject: 1.0.29.34: hopefully thread-safe SB-PROFILE X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=c461e239d10c94d77649856bbde06431666da4fd;p=sbcl.git 1.0.29.34: hopefully thread-safe SB-PROFILE * Nuke PCOUNTER stuff, and replace it with a COUNTER local to profile.lisp: ** New counter uses ATOMIC-INCF for atomicity, plus a lock and an overflow counter to handle counts over word in size. ** Stack allocate counters and counter value cells when possible to reduce overhead. * Nuke the FASTBIG-stuff. A generic arithmetic call with fixnum args is not that slow -- and if it turns out to be too slow after all, then the compiler should take care of this under appropriate policy instead of us using hacks like this. * Test case from Volkan Yazici. --- diff --git a/build-order.lisp-expr b/build-order.lisp-expr index 9c5d805..55fa294 100644 --- a/build-order.lisp-expr +++ b/build-order.lisp-expr @@ -160,9 +160,6 @@ ;; accessors.) ("src/code/type-class") - ("src/code/early-pcounter") - ("src/code/pcounter" :not-host) - ("src/code/ansi-stream" :not-host) ("src/code/sysmacs" :not-host) diff --git a/doc/manual/profiling.texinfo b/doc/manual/profiling.texinfo index 53c8c48..7770c27 100644 --- a/doc/manual/profiling.texinfo +++ b/doc/manual/profiling.texinfo @@ -21,6 +21,9 @@ Inlined functions do not appear in the results reported by either. The package @code{sb-profile} provides a classic, per-function-call profiler. +@strong{NOTE}: When profiling code executed by multiple threads in +parallel, the consing attributed to each function is inaccurate. + @include macro-sb-profile-profile.texinfo @include macro-sb-profile-unprofile.texinfo @include fun-sb-profile-report.texinfo diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 1e99f35..ed9736c 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -938,15 +938,6 @@ possibly temporariliy, because it might be used internally." "SBCL-HOMEDIR-PATHNAME" "SIMPLIFY-NAMESTRING" - ;; PCOUNTERs - "FASTBIG-INCF-PCOUNTER-OR-FIXNUM" - "INCF-PCOUNTER" - "INCF-PCOUNTER-OR-FIXNUM" - "MAKE-PCOUNTER" - "PCOUNTER" - "PCOUNTER->INTEGER" - "PCOUNTER-OR-FIXNUM->INTEGER" - "PCOUNTER-P" "*N-BYTES-FREED-OR-PURIFIED*" ;; miscellaneous non-standard but handy user-level functions.. diff --git a/src/code/early-pcounter.lisp b/src/code/early-pcounter.lisp deleted file mode 100644 index c14f30e..0000000 --- a/src/code/early-pcounter.lisp +++ /dev/null @@ -1,21 +0,0 @@ -;;;; PCOUNTERs -;;;; -;;;; a PCOUNTER is used to represent an unsigned integer quantity which -;;;; can grow bigger than a fixnum, but typically does so, if at all, -;;;; in many small steps, where we don't want to cons on every step. -;;;; Such quantities typically arise in profiling, e.g. -;;;; total system consing, time spent in a profiled function, and -;;;; bytes consed in a profiled function are all examples of such -;;;; quantities. The name is an abbreviation for "Profiling COUNTER". - -;;; This stuff is implemented in the SB!PROFILE package because the -;;; profiling code is currently the only code which wants to poke -;;; around in the implementation details. This needs to be done on the -;;; host for type information. - -(in-package "SB!PROFILE") - -(def!struct (pcounter (:copier nil)) - (integer 0 :type unsigned-byte) - (fixnum 0 :type (and fixnum unsigned-byte))) - diff --git a/src/code/pcounter.lisp b/src/code/pcounter.lisp deleted file mode 100644 index aee1cad..0000000 --- a/src/code/pcounter.lisp +++ /dev/null @@ -1,84 +0,0 @@ -;;;; PCOUNTERs -;;;; -;;;; a PCOUNTER is used to represent an unsigned integer quantity which -;;;; can grow bigger than a fixnum, but typically does so, if at all, -;;;; in many small steps, where we don't want to cons on every step. -;;;; Such quantities typically arise in profiling, e.g. -;;;; total system consing, time spent in a profiled function, and -;;;; bytes consed in a profiled function are all examples of such -;;;; quantities. The name is an abbreviation for "Profiling COUNTER". -;;;; -;;;; (This isn't one of my more brilliant names, so if you have a -;;;; better suggestion, let me know. -- WHN 2001-06-22) - -;;; This stuff is implemented in the SB!PROFILE because the profiling -;;; code is currently the only code which wants to poke around in the -;;; implementation details. -(in-package "SB!PROFILE") - -;;;; basic PCOUNTER stuff - -(/show0 "pcounter.lisp 21") - -(declaim (maybe-inline incf-pcounter)) -(defun incf-pcounter (pcounter delta) - (aver (typep delta 'unsigned-byte)) - (let ((sum (+ (pcounter-fixnum pcounter) delta))) - (cond ((typep sum 'fixnum) - (setf (pcounter-fixnum pcounter) sum)) - (t - (incf (pcounter-integer pcounter) sum) - (setf (pcounter-fixnum pcounter) 0)))) - pcounter) - -(/show0 "pcounter.lisp 34") - -;;;(declaim (inline pcounter->integer)) ; FIXME: maybe inline when more stable -(defun pcounter->integer (pcounter) - (+ (pcounter-integer pcounter) - (pcounter-fixnum pcounter))) - -;;;; operations on (OR PCOUNTER FIXNUM) -;;;; -;;;; When we don't want to cons a PCOUNTER unless we're forced to, we -;;;; start with a FIXNUM counter and only create a PCOUNTER if the -;;;; FIXNUM overflows. - -(/show0 "pcounter.lisp 47") - -(declaim (inline %incf-pcounter-or-fixnum)) -(defun %incf-pcounter-or-fixnum (x delta) - (etypecase x - (fixnum - (let ((sum (+ x delta))) - (if (typep sum 'fixnum) - sum - (make-pcounter :integer sum)))) - (pcounter - (incf-pcounter x delta)))) - -(define-modify-macro incf-pcounter-or-fixnum (delta) %incf-pcounter-or-fixnum) - -(/show0 "pcounter.lisp 62") - -;;; Trade off space for execution time by handling the common fast -;;; (TYPEP DELTA 'FIXNUM) case inline and only calling generic -;;; arithmetic as a last resort. -(defmacro fastbig-incf-pcounter-or-fixnum (x delta) - (let ((delta-sym (gensym "DELTA"))) - `(let ((,delta-sym ,delta)) - (aver (typep ,delta-sym 'unsigned-byte)) - ;;(declare (type unsigned-byte ,delta-sym)) - (if (typep ,delta-sym 'fixnum) - (incf-pcounter-or-fixnum ,x ,delta) - (incf-pcounter-or-fixnum ,x ,delta))))) - -(/show0 "pcounter.lisp 76") - -(declaim (maybe-inline pcounter-or-fixnum->integer)) -(defun pcounter-or-fixnum->integer (x) - (etypecase x - (fixnum x) - (pcounter (pcounter->integer x)))) - -(/show0 "pcounter.lisp end") diff --git a/src/code/profile.lisp b/src/code/profile.lisp index ddfd722..2e30d72 100644 --- a/src/code/profile.lisp +++ b/src/code/profile.lisp @@ -9,33 +9,61 @@ (in-package "SB-PROFILE") ; (SB-, not SB!, since we're built in warm load.) -;;;; reading internal run time with high resolution and low overhead + +;;;; COUNTER object +;;;; +;;;; Thread safe, and reasonably fast: in common case increment is just an +;;;; ATOMIC-INCF, in overflow case grab a lock and increment overflow counter. + +(defconstant +most-positive-word+ (1- (expt 2 sb-vm:n-word-bits))) + +(declaim (inline make-counter)) +(defstruct (counter (:copier nil)) + (count 0 :type sb-vm:word) + (overflow 0 :type unsigned-byte) + (overflow-lock (sb-thread::make-spinlock) :type sb-thread::spinlock)) + +(defun incf-counter (counter delta) + ;; When running multi-threaded we can easily get negative numbers for the + ;; cons-counter. Don't count them at all. + (when (plusp delta) + ;; ATOMIC-INCF is restricted to signed-word, but delta can be bigger: first + ;; count the number of full overflows... + (loop while (>= delta +most-positive-word+) + do (sb-thread::with-spinlock ((counter-overflow-lock counter)) + (incf (counter-overflow counter) 1)) + (decf delta +most-positive-word+)) + (flet ((%incf (d) + (let ((prev (atomic-incf (counter-count counter) d))) + (when (< (logand +most-positive-word+ (+ prev d)) prev) + (sb-thread::with-spinlock ((counter-overflow-lock counter)) + (incf (counter-overflow counter))))))) + (if (typep delta '(signed-byte 32)) + (%incf delta) + ;; ...and if delta is still too big, split it into four parts: they + ;; are guaranteed to fit into a signed word. + (let ((quarter (truncate delta 4))) + (%incf quarter) + (%incf quarter) + (%incf quarter) + (%incf quarter))))) + counter) + +(defun counter->integer (counter) + (+ (counter-count counter) + (* (counter-overflow counter) + +most-positive-word+))) + +;;;; High resolution timer + +;;; FIXME: High resolution this is not. Build a microsecond-accuracy version +;;; on top of unix-getrusage, maybe. (defconstant +ticks-per-second+ internal-time-units-per-second) (declaim (inline get-internal-ticks)) -(defun get-internal-ticks () (get-internal-run-time)) - -;;;; implementation-dependent interfaces - -#| -;;; To avoid unnecessary consing in the "encapsulation" code, we want -;;; find out the number of required arguments, and use &REST to -;;; capture only non-required arguments. This function returns (VALUES -;;; MIN-ARGS OPTIONALS-P), where MIN-ARGS is the number of required -;;; arguments and OPTIONALS-P is true iff there are any non-required -;;; arguments (such as &OPTIONAL, &REST, or &KEY). -(declaim (ftype (function ((or symbol cons)) (values fixnum t)) fun-signature)) -(defun fun-signature (name) - (let ((type (info :function :type name))) - (cond ((not (fun-type-p type)) - (values 0 t)) - (t - (values (length (fun-type-required type)) - (or (fun-type-optional type) - (fun-type-keyp type) - (fun-type-rest type))))))) -|# +(defun get-internal-ticks () + (get-internal-run-time)) ;;;; global data structures @@ -60,9 +88,9 @@ ;;; resource consumed for each nested call is added into the ;;; appropriate variable. When the outer function returns, these ;;; amounts are subtracted from the total. -(defvar *enclosed-ticks* 0) -(defvar *enclosed-consing* 0) -(declaim (type (or pcounter fixnum) *enclosed-ticks* *enclosed-consing*)) +(declaim (counter *enclosed-ticks* *enclosed-consing*)) +(defvar *enclosed-ticks*) +(defvar *enclosed-consing*) ;;; This variable is also used to subtract out time for nested ;;; profiled calls. The time inside the profile wrapper call -- @@ -72,8 +100,8 @@ ;;; GET-INTERNAL-TICKS, and after we get to the second call. By ;;; keeping track of the count of enclosed profiled calls, we can try ;;; to compensate for that. -(defvar *enclosed-profiles* 0) -(declaim (type (or pcounter fixnum) *enclosed-profiles*)) +(declaim (counter *enclosed-profiles*)) +(defvar *enclosed-profiles*) ;;; the encapsulated function we're currently computing profiling data ;;; for, recorded so that we can detect the problem of @@ -98,22 +126,6 @@ ;;;; profile encapsulations -;;; Trade off space for time by handling the usual all-FIXNUM cases inline. -(defmacro fastbig- (x y) - (once-only ((x x) (y y)) - `(if (and (typep ,x '(and fixnum unsigned-byte)) - (typep ,y '(and fixnum unsigned-byte))) - ;; special case: can use fixnum arithmetic and be guaranteed - ;; the result is also a fixnum - (- ,x ,y) - ;; general case - (- ,x ,y)))) -(defmacro fastbig-1+ (x) - (once-only ((x x)) - `(if (typep ,x 'index) - (1+ ,x) - (1+ ,x)))) - ;;; Return a collection of closures over the same lexical context, ;;; (VALUES ENCAPSULATION-FUN READ-STATS-FUN CLEAR-STATS-FUN). ;;; @@ -137,11 +149,11 @@ ;;; will minimize profiling overhead.) (defun profile-encapsulation-lambdas (encapsulated-fun) (declare (type function encapsulated-fun)) - (let* ((count 0) - (ticks 0) - (consing 0) - (profiles 0)) - (declare (type (or pcounter fixnum) count ticks consing profiles)) + (let* ((count (make-counter)) + (ticks (make-counter)) + (consing (make-counter)) + (profiles (make-counter))) + (declare (counter count ticks consing profiles)) (values ;; ENCAPSULATION-FUN (lambda (&more arg-context arg-count) @@ -149,28 +161,26 @@ ;; Make sure that we're not recursing infinitely. (when (boundp '*computing-profiling-data-for*) (unprofile-all) ; to avoid further recursion - (error "~@" - *computing-profiling-data-for* - encapsulated-fun + (error "~@" + *computing-profiling-data-for* encapsulated-fun encapsulated-fun)) - ;; FIXME: Probably when this is stable, we should optimize (SAFETY 0). - (fastbig-incf-pcounter-or-fixnum count 1) + (incf-counter count 1) (let ((dticks 0) (dconsing 0) (inner-enclosed-profiles 0)) - (declare (type unsigned-byte dticks dconsing inner-enclosed-profiles) - (dynamic-extent dticks dconsing inner-enclosed-profiles)) - (aver (typep dticks 'unsigned-byte)) - (aver (typep dconsing 'unsigned-byte)) - (aver (typep inner-enclosed-profiles 'unsigned-byte)) + (declare (truly-dynamic-extent dticks dconsing inner-enclosed-profiles)) (unwind-protect (let* ((start-ticks (get-internal-ticks)) - (*enclosed-ticks* 0) - (*enclosed-consing* 0) - (*enclosed-profiles* 0) + (*enclosed-ticks* (make-counter)) + (*enclosed-consing* (make-counter)) + (*enclosed-profiles* (make-counter)) (nbf0 *n-bytes-freed-or-purified*) (dynamic-usage-0 (sb-kernel:dynamic-usage))) - (declare (inline pcounter-or-fixnum->integer)) + (declare (dynamic-extent *enclosed-ticks* *enclosed-consing* *enclosed-profiles*)) (unwind-protect (multiple-value-call encapsulated-fun (sb-c:%more-arg-values arg-context @@ -178,41 +188,33 @@ arg-count)) (let ((*computing-profiling-data-for* encapsulated-fun) (dynamic-usage-1 (sb-kernel:dynamic-usage))) - (setf dticks (fastbig- (get-internal-ticks) start-ticks)) - (setf dconsing - (if (eql *n-bytes-freed-or-purified* nbf0) - ;; common special case where we can avoid - ;; bignum arithmetic - (- dynamic-usage-1 dynamic-usage-0) - ;; general case - (- (get-bytes-consed) nbf0 dynamic-usage-0))) - (setf inner-enclosed-profiles - (pcounter-or-fixnum->integer *enclosed-profiles*)) - (let ((net-dticks (fastbig- dticks *enclosed-ticks*))) - (fastbig-incf-pcounter-or-fixnum ticks net-dticks)) - (let ((net-dconsing (fastbig- dconsing - (pcounter-or-fixnum->integer - *enclosed-consing*)))) - (fastbig-incf-pcounter-or-fixnum consing net-dconsing)) - (fastbig-incf-pcounter-or-fixnum profiles - inner-enclosed-profiles)))) - (fastbig-incf-pcounter-or-fixnum *enclosed-ticks* dticks) - (fastbig-incf-pcounter-or-fixnum *enclosed-consing* dconsing) - (fastbig-incf-pcounter-or-fixnum *enclosed-profiles* - (fastbig-1+ - inner-enclosed-profiles))))) + (setf dticks (- (get-internal-ticks) start-ticks) + dconsing (if (eql *n-bytes-freed-or-purified* nbf0) + ;; common special case where we can avoid + ;; bignum arithmetic + (- dynamic-usage-1 dynamic-usage-0) + ;; general case + (- (get-bytes-consed) nbf0 dynamic-usage-0)) + inner-enclosed-profiles (counter->integer *enclosed-profiles*)) + (incf-counter ticks (- dticks (counter->integer *enclosed-ticks*))) + (incf-counter consing (- dconsing (counter->integer *enclosed-consing*))) + (incf-counter profiles inner-enclosed-profiles)))) + (when (boundp '*enclosed-ticks*) + (incf-counter *enclosed-ticks* dticks) + (incf-counter *enclosed-consing* dconsing) + (incf-counter *enclosed-profiles* (1+ inner-enclosed-profiles)))))) ;; READ-STATS-FUN (lambda () - (values (pcounter-or-fixnum->integer count) - (pcounter-or-fixnum->integer ticks) - (pcounter-or-fixnum->integer consing) - (pcounter-or-fixnum->integer profiles))) + (values (counter->integer count) + (counter->integer ticks) + (counter->integer consing) + (counter->integer profiles))) ;; CLEAR-STATS-FUN (lambda () - (setf count 0 - ticks 0 - consing 0 - profiles 0))))) + (setf count (make-counter) + ticks (make-counter) + consing (make-counter) + profiles (make-counter)))))) ;;;; interfaces diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index a070e62..25a45dc 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -1384,13 +1384,6 @@ (defknown get-bytes-consed () unsigned-byte (flushable)) (defknown mask-signed-field ((integer 0 *) integer) integer (movable flushable foldable)) - -;;; PCOUNTERs -(defknown incf-pcounter (pcounter unsigned-byte) pcounter) -(defknown pcounter->integer (pcounter) unsigned-byte) -(defknown %incf-pcounter-or-fixnum ((or pcounter fixnum) unsigned-byte) - (or pcounter fixnum)) -(defknown pcounter-or-fixnum->integer ((or pcounter fixnum)) unsigned-byte) ;;;; magical compiler frobs diff --git a/tests/profile.impure.lisp b/tests/profile.impure.lisp new file mode 100644 index 0000000..ab1c95c --- /dev/null +++ b/tests/profile.impure.lisp @@ -0,0 +1,91 @@ +;;;; tests PROFILE with multiple threads + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; While most of SBCL is derived from the CMU CL system, the test +;;;; files (like this one) were written from scratch after the fork +;;;; from CMU CL. +;;;; +;;;; This software is in the public domain and is provided with +;;;; absolutely no warranty. See the COPYING and CREDITS files for +;;;; more information. + +(load "assertoid.lisp") +(load "test-util.lisp") + +(defpackage :profile-test + (:use :cl :sb-thread)) + +(in-package :profile-test) + +(defun miller-rabin-prime-p (n &optional (s 50)) + "Miller-Rabin primality test written by R. Matthew Emerson." + (flet ((witness-p (a n) + (loop with b = (- n 1) + for i from (integer-length b) downto 0 + for d = 1 then (mod (* d d) n) + for x = d + do (progn + (when (and (= d 1) (/= x 1) (/= x (- n 1))) + (return-from witness-p t)) + (when (logbitp i b) + (setf d (mod (* d a) n)))) + finally (return (/= d 1))))) + (dotimes (i s n) + (let ((w (1+ (random (- n 1))))) + (when (witness-p w n) + (return-from miller-rabin-prime-p nil)))))) + +(defun random-of-bit-size (n-bits) + "Returns a random number of maximum size `N-BITS'." + (random (ash 1 n-bits))) + +(defun prime-of-bit-size (n-bits) + "Returns a prime number of maximum size `N-BITS'." + (loop for maybe-prime = (random-of-bit-size n-bits) + when (miller-rabin-prime-p maybe-prime) + do (return maybe-prime))) + +(defun waste-cpu-cycles (n-primes n-prime-bits n-workers) + (if (= n-workers 1) + (handler-case + (progn + (loop repeat n-primes + do (prime-of-bit-size n-prime-bits)) + (list t)) + (serious-condition (s) + s)) + (let* ((r (make-semaphore)) + (w (make-semaphore)) + (workers + (loop repeat n-workers + collect (sb-thread:make-thread + (let ((rs (make-random-state))) + (lambda () + (block nil + (handler-bind ((serious-condition (lambda (c) + (princ c) + (sb-debug:backtrace) + (return c)))) + (let ((*random-state* rs)) + (signal-semaphore r) + (wait-on-semaphore w) + (loop repeat n-primes + do (prime-of-bit-size n-prime-bits)) + t))))))))) + (loop repeat n-workers do (wait-on-semaphore r)) + (signal-semaphore w n-workers) + (mapcar #'sb-thread:join-thread workers)))) + +(in-package :cl-user) + +(with-test (:name (profile threads)) + (profile "PROFILE-TEST") + ;; This used to signal an error with threads + (let* ((n #+sb-thread 5 #-sb-thread 1) + (res (profile-test::waste-cpu-cycles 10 256 n)) + (want (make-list n :initial-element t))) + (unless (equal res want) + (error "wanted ~S, got ~S" want res))) + (report)) diff --git a/version.lisp-expr b/version.lisp-expr index 3d8f13d..10c7499 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.29.33" +"1.0.29.34"