X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Fprofile.lisp;h=59929eb316cf60910dda45737b1a86798bc58bb0;hb=b4f7516350d9dd848552a3b38890fc7c908cea8e;hp=5f5c2ffaaab9b2c89453582eb50dd334cd9f25a4;hpb=2e22486d5a66f899a2aeb08898b0cdd42dfc11f8;p=sbcl.git diff --git a/src/code/profile.lisp b/src/code/profile.lisp index 5f5c2ff..59929eb 100644 --- a/src/code/profile.lisp +++ b/src/code/profile.lisp @@ -9,33 +9,66 @@ (in-package "SB-PROFILE") ; (SB-, not SB!, since we're built in warm load.) -;;;; reading internal run time with high resolution and low overhead + +;;;; COUNTER object +;;;; +;;;; Thread safe, and reasonably fast: in common case increment is just an +;;;; ATOMIC-INCF, in overflow case grab a lock and increment overflow counter. + +(defconstant +most-positive-word+ (1- (expt 2 sb-vm:n-word-bits))) + +(declaim (inline make-counter)) +(defstruct (counter (:constructor make-counter) (:copier nil)) + (word 0 :type sb-vm:word) + (overflow 0 :type unsigned-byte)) + +(defun incf-counter (counter delta) + ;; When running multi-threaded we can easily get negative numbers for the + ;; cons-counter. Don't count them at all. + (when (plusp delta) + (labels ((%incf-overflow (&optional (n 1)) + ;; Overflow-counter can run into bignums... so we need to loop + ;; around CAS till the increment succeeds. + (loop for old = (counter-overflow counter) + until (eq old (compare-and-swap (counter-overflow counter) + old (+ old n))))) + (%incf (d) + ;; Increment the word-sized counter. If it overflows, record the + ;; overflow. + (let ((prev (atomic-incf (counter-word counter) d))) + (when (< (logand +most-positive-word+ (+ prev d)) prev) + (%incf-overflow))))) + ;; DELTA can potentially be a bignum -- cut it down to word-size. + (unless (typep delta 'sb-vm:word) + (multiple-value-bind (n r) (truncate delta (1+ +most-positive-word+)) + (%incf-overflow n) + (setf delta r))) + ;; ATOMIC-INCF can at most handle SIGNED-WORD: if DELTA doesn't fit that, + ;; DELTA/2 will. + (if (typep delta 'sb-vm:signed-word) + (%incf delta) + ;; ...and if delta is still too big, split it into four parts: they + ;; are guaranteed to fit into a signed word. + (multiple-value-bind (n r) (truncate delta 2) + (%incf n) + (%incf n) + (%incf r))))) + counter) + +(defun counter-count (counter) + (+ (counter-word counter) + (* (counter-overflow counter) (1+ +most-positive-word+)))) + +;;;; High resolution timer + +;;; FIXME: High resolution this is not. Build a microsecond-accuracy version +;;; on top of unix-getrusage, maybe. (defconstant +ticks-per-second+ internal-time-units-per-second) (declaim (inline get-internal-ticks)) -(defun get-internal-ticks () (get-internal-run-time)) - -;;;; implementation-dependent interfaces - -#| -;;; To avoid unnecessary consing in the "encapsulation" code, we want -;;; find out the number of required arguments, and use &REST to -;;; capture only non-required arguments. This function returns (VALUES -;;; MIN-ARGS OPTIONALS-P), where MIN-ARGS is the number of required -;;; arguments and OPTIONALS-P is true iff there are any non-required -;;; arguments (such as &OPTIONAL, &REST, or &KEY). -(declaim (ftype (function ((or symbol cons)) (values fixnum t)) fun-signature)) -(defun fun-signature (name) - (let ((type (info :function :type name))) - (cond ((not (fun-type-p type)) - (values 0 t)) - (t - (values (length (fun-type-required type)) - (or (fun-type-optional type) - (fun-type-keyp type) - (fun-type-rest type))))))) -|# +(defun get-internal-ticks () + (get-internal-run-time)) ;;;; global data structures @@ -46,7 +79,8 @@ (make-hash-table ;; EQL testing isn't good enough for generalized function names ;; like (SETF FOO). - :test 'equal)) + :test 'equal + :synchronized t)) (defstruct (profile-info (:copier nil)) (name (missing-arg) :read-only t) (encapsulated-fun (missing-arg) :type function :read-only t) @@ -59,9 +93,9 @@ ;;; resource consumed for each nested call is added into the ;;; appropriate variable. When the outer function returns, these ;;; amounts are subtracted from the total. -(defvar *enclosed-ticks* 0) -(defvar *enclosed-consing* 0) -(declaim (type (or pcounter fixnum) *enclosed-ticks* *enclosed-consing*)) +(declaim (counter *enclosed-ticks* *enclosed-consing*)) +(defvar *enclosed-ticks*) +(defvar *enclosed-consing*) ;;; This variable is also used to subtract out time for nested ;;; profiled calls. The time inside the profile wrapper call -- @@ -71,8 +105,11 @@ ;;; GET-INTERNAL-TICKS, and after we get to the second call. By ;;; keeping track of the count of enclosed profiled calls, we can try ;;; to compensate for that. -(defvar *enclosed-profiles* 0) -(declaim (type (or pcounter fixnum) *enclosed-profiles*)) +(declaim (counter *enclosed-profiles*)) +(defvar *enclosed-profiles*) + +(declaim (counter *enclosed-gc-run-time*)) +(defvar *enclosed-gc-run-time*) ;;; the encapsulated function we're currently computing profiling data ;;; for, recorded so that we can detect the problem of @@ -97,27 +134,11 @@ ;;;; profile encapsulations -;;; Trade off space for time by handling the usual all-FIXNUM cases inline. -(defmacro fastbig- (x y) - (once-only ((x x) (y y)) - `(if (and (typep ,x '(and fixnum unsigned-byte)) - (typep ,y '(and fixnum unsigned-byte))) - ;; special case: can use fixnum arithmetic and be guaranteed - ;; the result is also a fixnum - (- ,x ,y) - ;; general case - (- ,x ,y)))) -(defmacro fastbig-1+ (x) - (once-only ((x x)) - `(if (typep ,x 'index) - (1+ ,x) - (1+ ,x)))) - ;;; Return a collection of closures over the same lexical context, ;;; (VALUES ENCAPSULATION-FUN READ-STATS-FUN CLEAR-STATS-FUN). ;;; ;;; ENCAPSULATION-FUN is a plug-in replacement for ENCAPSULATED-FUN, -;;; which updates statistics whenver it's called. +;;; which updates statistics whenever it's called. ;;; ;;; READ-STATS-FUN returns the statistics: ;;; (VALUES COUNT TIME CONSING PROFILE). @@ -136,11 +157,12 @@ ;;; will minimize profiling overhead.) (defun profile-encapsulation-lambdas (encapsulated-fun) (declare (type function encapsulated-fun)) - (let* ((count 0) - (ticks 0) - (consing 0) - (profiles 0)) - (declare (type (or pcounter fixnum) count ticks consing profiles)) + (let* ((count (make-counter)) + (ticks (make-counter)) + (consing (make-counter)) + (profiles (make-counter)) + (gc-run-time (make-counter))) + (declare (counter count ticks consing profiles gc-run-time)) (values ;; ENCAPSULATION-FUN (lambda (&more arg-context arg-count) @@ -148,28 +170,29 @@ ;; Make sure that we're not recursing infinitely. (when (boundp '*computing-profiling-data-for*) (unprofile-all) ; to avoid further recursion - (error "~@" - *computing-profiling-data-for* - encapsulated-fun + (error "~@" + *computing-profiling-data-for* encapsulated-fun encapsulated-fun)) - ;; FIXME: Probably when this is stable, we should optimize (SAFETY 0). - (fastbig-incf-pcounter-or-fixnum count 1) + (incf-counter count 1) (let ((dticks 0) (dconsing 0) - (inner-enclosed-profiles 0)) - (declare (type unsigned-byte dticks dconsing)) - (declare (type unsigned-byte inner-enclosed-profiles)) - (aver (typep dticks 'unsigned-byte)) - (aver (typep dconsing 'unsigned-byte)) - (aver (typep inner-enclosed-profiles 'unsigned-byte)) + (inner-enclosed-profiles 0) + (dgc-run-time 0)) + (declare (truly-dynamic-extent dticks dconsing inner-enclosed-profiles)) (unwind-protect (let* ((start-ticks (get-internal-ticks)) - (*enclosed-ticks* 0) - (*enclosed-consing* 0) - (*enclosed-profiles* 0) + (start-gc-run-time *gc-run-time*) + (*enclosed-ticks* (make-counter)) + (*enclosed-consing* (make-counter)) + (*enclosed-profiles* (make-counter)) (nbf0 *n-bytes-freed-or-purified*) - (dynamic-usage-0 (sb-kernel:dynamic-usage))) - (declare (inline pcounter-or-fixnum->integer)) + (dynamic-usage-0 (sb-kernel:dynamic-usage)) + (*enclosed-gc-run-time* (make-counter))) + (declare (dynamic-extent *enclosed-ticks* *enclosed-consing* *enclosed-profiles* *enclosed-gc-run-time*)) (unwind-protect (multiple-value-call encapsulated-fun (sb-c:%more-arg-values arg-context @@ -177,41 +200,38 @@ arg-count)) (let ((*computing-profiling-data-for* encapsulated-fun) (dynamic-usage-1 (sb-kernel:dynamic-usage))) - (setf dticks (fastbig- (get-internal-ticks) start-ticks)) - (setf dconsing - (if (eql *n-bytes-freed-or-purified* nbf0) - ;; common special case where we can avoid - ;; bignum arithmetic - (- dynamic-usage-1 dynamic-usage-0) - ;; general case - (- (get-bytes-consed) nbf0 dynamic-usage-0))) - (setf inner-enclosed-profiles - (pcounter-or-fixnum->integer *enclosed-profiles*)) - (let ((net-dticks (fastbig- dticks *enclosed-ticks*))) - (fastbig-incf-pcounter-or-fixnum ticks net-dticks)) - (let ((net-dconsing (fastbig- dconsing - (pcounter-or-fixnum->integer - *enclosed-consing*)))) - (fastbig-incf-pcounter-or-fixnum consing net-dconsing)) - (fastbig-incf-pcounter-or-fixnum profiles - inner-enclosed-profiles)))) - (fastbig-incf-pcounter-or-fixnum *enclosed-ticks* dticks) - (fastbig-incf-pcounter-or-fixnum *enclosed-consing* dconsing) - (fastbig-incf-pcounter-or-fixnum *enclosed-profiles* - (fastbig-1+ - inner-enclosed-profiles))))) + (setf dticks (- (get-internal-ticks) start-ticks) + dconsing (if (eql *n-bytes-freed-or-purified* nbf0) + ;; common special case where we can avoid + ;; bignum arithmetic + (- dynamic-usage-1 dynamic-usage-0) + ;; general case + (- (get-bytes-consed) nbf0 dynamic-usage-0)) + inner-enclosed-profiles (counter-count *enclosed-profiles*) + dgc-run-time (- *gc-run-time* start-gc-run-time)) + (incf-counter ticks (- dticks (counter-count *enclosed-ticks*))) + (incf-counter gc-run-time (- dgc-run-time (counter-count *enclosed-gc-run-time*))) + (incf-counter consing (- dconsing (counter-count *enclosed-consing*))) + (incf-counter profiles inner-enclosed-profiles)))) + (when (boundp '*enclosed-ticks*) + (incf-counter *enclosed-ticks* dticks) + (incf-counter *enclosed-consing* dconsing) + (incf-counter *enclosed-profiles* (1+ inner-enclosed-profiles)) + (incf-counter *enclosed-gc-run-time* dgc-run-time))))) ;; READ-STATS-FUN (lambda () - (values (pcounter-or-fixnum->integer count) - (pcounter-or-fixnum->integer ticks) - (pcounter-or-fixnum->integer consing) - (pcounter-or-fixnum->integer profiles))) + (values (counter-count count) + (counter-count ticks) + (counter-count consing) + (counter-count profiles) + (counter-count gc-run-time))) ;; CLEAR-STATS-FUN (lambda () - (setf count 0 - ticks 0 - consing 0 - profiles 0))))) + (setf count (make-counter) + ticks (make-counter) + consing (make-counter) + profiles (make-counter) + gc-run-time (make-counter)))))) ;;;; interfaces @@ -309,13 +329,14 @@ `(unprofile-all))) (defun unprofile-all () - (dohash (name profile-info *profiled-fun-name->info*) + (dohash ((name profile-info) *profiled-fun-name->info* + :locked t) (declare (ignore profile-info)) (unprofile-1-fun name))) (defun reset () "Reset the counters for all profiled functions." - (dohash (name profile-info *profiled-fun-name->info*) + (dohash ((name profile-info) *profiled-fun-name->info* :locked t) (declare (ignore name)) (funcall (profile-info-clear-stats-fun profile-info)))) @@ -325,7 +346,8 @@ name calls seconds - consing) + consing + gc-run-time) ;;; Return our best guess for the run time in a function, subtracting ;;; out factors for profiling overhead. We subtract out the internal @@ -358,13 +380,13 @@ Lisp process." (compute-overhead))) (let ((time-info-list ()) (no-call-name-list ())) - (dohash (name pinfo *profiled-fun-name->info*) + (dohash ((name pinfo) *profiled-fun-name->info* :locked t) (unless (eq (fdefinition name) (profile-info-encapsulation-fun pinfo)) (warn "Function ~S has been redefined, so times may be inaccurate.~@ PROFILE it again to record calls to the new definition." name)) - (multiple-value-bind (calls ticks consing profile) + (multiple-value-bind (calls ticks consing profile gc-run-time) (funcall (profile-info-read-stats-fun pinfo)) (if (zerop calls) (push name no-call-name-list) @@ -373,7 +395,8 @@ Lisp process." :seconds (compensate-time calls ticks profile) - :consing consing) + :consing consing + :gc-run-time gc-run-time) time-info-list)))) (setf time-info-list @@ -396,21 +419,26 @@ Lisp process." (let ((total-seconds 0.0) (total-consed 0) (total-calls 0) + (total-gc-run-time 0) (seconds-width (length "seconds")) (consed-width (length "consed")) (calls-width (length "calls")) (sec/call-width 10) + (gc-run-time-width (length "gc")) (name-width 6)) (dolist (time-info time-info-list) (incf total-seconds (time-info-seconds time-info)) (incf total-consed (time-info-consing time-info)) - (incf total-calls (time-info-calls time-info))) + (incf total-calls (time-info-calls time-info)) + (incf total-gc-run-time (time-info-gc-run-time time-info))) (setf seconds-width (max (length (format nil "~10,3F" total-seconds)) seconds-width) calls-width (max (length (format nil "~:D" total-calls)) calls-width) consed-width (max (length (format nil "~:D" total-consed)) - consed-width)) + consed-width) + gc-run-time-width (max (length (format nil "~10,3F" (/ total-gc-run-time internal-time-units-per-second))) + gc-run-time-width)) (flet ((dashes () (dotimes (i (+ seconds-width consed-width calls-width @@ -420,6 +448,7 @@ Lisp process." (terpri *trace-output*))) (format *trace-output* "~&~@{ ~v:@<~A~>~^|~}~%" seconds-width "seconds" + (1+ gc-run-time-width) "gc" (1+ consed-width) "consed" (1+ calls-width) "calls" (1+ sec/call-width) "sec/call" @@ -428,8 +457,9 @@ Lisp process." (dashes) (dolist (time-info time-info-list) - (format *trace-output* "~v,3F | ~v:D | ~v:D | ~10,6F | ~S~%" + (format *trace-output* "~v,3F | ~v,3F | ~v:D | ~v:D | ~10,6F | ~S~%" seconds-width (time-info-seconds time-info) + gc-run-time-width (/ (time-info-gc-run-time time-info) internal-time-units-per-second) consed-width (time-info-consing time-info) calls-width (time-info-calls time-info) (/ (time-info-seconds time-info) @@ -438,8 +468,9 @@ Lisp process." (dashes) - (format *trace-output* "~v,3F | ~v:D | ~v:D | | Total~%" + (format *trace-output* "~v,3F | ~v,3F | ~v:D | ~v:D | | Total~%" seconds-width total-seconds + gc-run-time-width (/ total-gc-run-time internal-time-units-per-second) consed-width total-consed calls-width total-calls)