X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Fprofile.lisp;h=da4f0b656038d75ae8e2593c7720c83a531c39a8;hb=cee8ef591040db9a79cdd19297867672a9529051;hp=15b8c74a339066abeae102b8df20641d2eadf29f;hpb=81880593109f9f359cd06dc5c4323750ccc2bf21;p=sbcl.git diff --git a/src/code/profile.lisp b/src/code/profile.lisp index 15b8c74..da4f0b6 100644 --- a/src/code/profile.lisp +++ b/src/code/profile.lisp @@ -9,33 +9,64 @@ (in-package "SB-PROFILE") ; (SB-, not SB!, since we're built in warm load.) -;;;; reading internal run time with high resolution and low overhead + +;;;; COUNTER object +;;;; +;;;; Thread safe, and reasonably fast: in common case increment is just an +;;;; ATOMIC-INCF, in overflow case grab a lock and increment overflow counter. + +(declaim (inline make-counter)) +(defstruct (counter (:constructor make-counter) (:copier nil)) + (word 0 :type sb-vm:word) + (overflow 0 :type unsigned-byte)) + +(defun incf-counter (counter delta) + ;; When running multi-threaded we can easily get negative numbers for the + ;; cons-counter. Don't count them at all. + (when (plusp delta) + (labels ((%incf-overflow (&optional (n 1)) + ;; Overflow-counter can run into bignums... so we need to loop + ;; around CAS till the increment succeeds. + (loop for old = (counter-overflow counter) + until (eq old (compare-and-swap (counter-overflow counter) + old (+ old n))))) + (%incf (d) + ;; Increment the word-sized counter. If it overflows, record the + ;; overflow. + (let ((prev (atomic-incf (counter-word counter) d))) + (when (< (logand most-positive-word (+ prev d)) prev) + (%incf-overflow))))) + ;; DELTA can potentially be a bignum -- cut it down to word-size. + (unless (typep delta 'sb-vm:word) + (multiple-value-bind (n r) (truncate delta (1+ most-positive-word)) + (%incf-overflow n) + (setf delta r))) + ;; ATOMIC-INCF can at most handle SIGNED-WORD: if DELTA doesn't fit that, + ;; DELTA/2 will. + (if (typep delta 'sb-vm:signed-word) + (%incf delta) + ;; ...and if delta is still too big, split it into four parts: they + ;; are guaranteed to fit into a signed word. + (multiple-value-bind (n r) (truncate delta 2) + (%incf n) + (%incf n) + (%incf r))))) + counter) + +(defun counter-count (counter) + (+ (counter-word counter) + (* (counter-overflow counter) (1+ most-positive-word)))) + +;;;; High resolution timer + +;;; FIXME: High resolution this is not. Build a microsecond-accuracy version +;;; on top of unix-getrusage, maybe. (defconstant +ticks-per-second+ internal-time-units-per-second) (declaim (inline get-internal-ticks)) -(defun get-internal-ticks () (get-internal-run-time)) - -;;;; implementation-dependent interfaces - -#| -;;; To avoid unnecessary consing in the "encapsulation" code, we want -;;; find out the number of required arguments, and use &REST to -;;; capture only non-required arguments. This function returns (VALUES -;;; MIN-ARGS OPTIONALS-P), where MIN-ARGS is the number of required -;;; arguments and OPTIONALS-P is true iff there are any non-required -;;; arguments (such as &OPTIONAL, &REST, or &KEY). -(declaim (ftype (function ((or symbol cons)) (values fixnum t)) fun-signature)) -(defun fun-signature (name) - (let ((type (info :function :type name))) - (cond ((not (fun-type-p type)) - (values 0 t)) - (t - (values (length (fun-type-required type)) - (or (fun-type-optional type) - (fun-type-keyp type) - (fun-type-rest type))))))) -|# +(defun get-internal-ticks () + (get-internal-run-time)) ;;;; global data structures @@ -60,9 +91,9 @@ ;;; resource consumed for each nested call is added into the ;;; appropriate variable. When the outer function returns, these ;;; amounts are subtracted from the total. -(defvar *enclosed-ticks* 0) -(defvar *enclosed-consing* 0) -(declaim (type (or pcounter fixnum) *enclosed-ticks* *enclosed-consing*)) +(declaim (counter *enclosed-ticks* *enclosed-consing*)) +(defvar *enclosed-ticks*) +(defvar *enclosed-consing*) ;;; This variable is also used to subtract out time for nested ;;; profiled calls. The time inside the profile wrapper call -- @@ -72,8 +103,11 @@ ;;; GET-INTERNAL-TICKS, and after we get to the second call. By ;;; keeping track of the count of enclosed profiled calls, we can try ;;; to compensate for that. -(defvar *enclosed-profiles* 0) -(declaim (type (or pcounter fixnum) *enclosed-profiles*)) +(declaim (counter *enclosed-profiles*)) +(defvar *enclosed-profiles*) + +(declaim (counter *enclosed-gc-run-time*)) +(defvar *enclosed-gc-run-time*) ;;; the encapsulated function we're currently computing profiling data ;;; for, recorded so that we can detect the problem of @@ -98,22 +132,6 @@ ;;;; profile encapsulations -;;; Trade off space for time by handling the usual all-FIXNUM cases inline. -(defmacro fastbig- (x y) - (once-only ((x x) (y y)) - `(if (and (typep ,x '(and fixnum unsigned-byte)) - (typep ,y '(and fixnum unsigned-byte))) - ;; special case: can use fixnum arithmetic and be guaranteed - ;; the result is also a fixnum - (- ,x ,y) - ;; general case - (- ,x ,y)))) -(defmacro fastbig-1+ (x) - (once-only ((x x)) - `(if (typep ,x 'index) - (1+ ,x) - (1+ ,x)))) - ;;; Return a collection of closures over the same lexical context, ;;; (VALUES ENCAPSULATION-FUN READ-STATS-FUN CLEAR-STATS-FUN). ;;; @@ -137,11 +155,12 @@ ;;; will minimize profiling overhead.) (defun profile-encapsulation-lambdas (encapsulated-fun) (declare (type function encapsulated-fun)) - (let* ((count 0) - (ticks 0) - (consing 0) - (profiles 0)) - (declare (type (or pcounter fixnum) count ticks consing profiles)) + (let* ((count (make-counter)) + (ticks (make-counter)) + (consing (make-counter)) + (profiles (make-counter)) + (gc-run-time (make-counter))) + (declare (counter count ticks consing profiles gc-run-time)) (values ;; ENCAPSULATION-FUN (lambda (&more arg-context arg-count) @@ -149,28 +168,29 @@ ;; Make sure that we're not recursing infinitely. (when (boundp '*computing-profiling-data-for*) (unprofile-all) ; to avoid further recursion - (error "~@" - *computing-profiling-data-for* - encapsulated-fun + (error "~@" + *computing-profiling-data-for* encapsulated-fun encapsulated-fun)) - ;; FIXME: Probably when this is stable, we should optimize (SAFETY 0). - (fastbig-incf-pcounter-or-fixnum count 1) + (incf-counter count 1) (let ((dticks 0) (dconsing 0) - (inner-enclosed-profiles 0)) - (declare (type unsigned-byte dticks dconsing)) - (declare (type unsigned-byte inner-enclosed-profiles)) - (aver (typep dticks 'unsigned-byte)) - (aver (typep dconsing 'unsigned-byte)) - (aver (typep inner-enclosed-profiles 'unsigned-byte)) + (inner-enclosed-profiles 0) + (dgc-run-time 0)) + (declare (truly-dynamic-extent dticks dconsing inner-enclosed-profiles)) (unwind-protect (let* ((start-ticks (get-internal-ticks)) - (*enclosed-ticks* 0) - (*enclosed-consing* 0) - (*enclosed-profiles* 0) + (start-gc-run-time *gc-run-time*) + (*enclosed-ticks* (make-counter)) + (*enclosed-consing* (make-counter)) + (*enclosed-profiles* (make-counter)) (nbf0 *n-bytes-freed-or-purified*) - (dynamic-usage-0 (sb-kernel:dynamic-usage))) - (declare (inline pcounter-or-fixnum->integer)) + (dynamic-usage-0 (sb-kernel:dynamic-usage)) + (*enclosed-gc-run-time* (make-counter))) + (declare (dynamic-extent *enclosed-ticks* *enclosed-consing* *enclosed-profiles* *enclosed-gc-run-time*)) (unwind-protect (multiple-value-call encapsulated-fun (sb-c:%more-arg-values arg-context @@ -178,41 +198,38 @@ arg-count)) (let ((*computing-profiling-data-for* encapsulated-fun) (dynamic-usage-1 (sb-kernel:dynamic-usage))) - (setf dticks (fastbig- (get-internal-ticks) start-ticks)) - (setf dconsing - (if (eql *n-bytes-freed-or-purified* nbf0) - ;; common special case where we can avoid - ;; bignum arithmetic - (- dynamic-usage-1 dynamic-usage-0) - ;; general case - (- (get-bytes-consed) nbf0 dynamic-usage-0))) - (setf inner-enclosed-profiles - (pcounter-or-fixnum->integer *enclosed-profiles*)) - (let ((net-dticks (fastbig- dticks *enclosed-ticks*))) - (fastbig-incf-pcounter-or-fixnum ticks net-dticks)) - (let ((net-dconsing (fastbig- dconsing - (pcounter-or-fixnum->integer - *enclosed-consing*)))) - (fastbig-incf-pcounter-or-fixnum consing net-dconsing)) - (fastbig-incf-pcounter-or-fixnum profiles - inner-enclosed-profiles)))) - (fastbig-incf-pcounter-or-fixnum *enclosed-ticks* dticks) - (fastbig-incf-pcounter-or-fixnum *enclosed-consing* dconsing) - (fastbig-incf-pcounter-or-fixnum *enclosed-profiles* - (fastbig-1+ - inner-enclosed-profiles))))) + (setf dticks (- (get-internal-ticks) start-ticks) + dconsing (if (eql *n-bytes-freed-or-purified* nbf0) + ;; common special case where we can avoid + ;; bignum arithmetic + (- dynamic-usage-1 dynamic-usage-0) + ;; general case + (- (get-bytes-consed) nbf0 dynamic-usage-0)) + inner-enclosed-profiles (counter-count *enclosed-profiles*) + dgc-run-time (- *gc-run-time* start-gc-run-time)) + (incf-counter ticks (- dticks (counter-count *enclosed-ticks*))) + (incf-counter gc-run-time (- dgc-run-time (counter-count *enclosed-gc-run-time*))) + (incf-counter consing (- dconsing (counter-count *enclosed-consing*))) + (incf-counter profiles inner-enclosed-profiles)))) + (when (boundp '*enclosed-ticks*) + (incf-counter *enclosed-ticks* dticks) + (incf-counter *enclosed-consing* dconsing) + (incf-counter *enclosed-profiles* (1+ inner-enclosed-profiles)) + (incf-counter *enclosed-gc-run-time* dgc-run-time))))) ;; READ-STATS-FUN (lambda () - (values (pcounter-or-fixnum->integer count) - (pcounter-or-fixnum->integer ticks) - (pcounter-or-fixnum->integer consing) - (pcounter-or-fixnum->integer profiles))) + (values (counter-count count) + (counter-count ticks) + (counter-count consing) + (counter-count profiles) + (counter-count gc-run-time))) ;; CLEAR-STATS-FUN (lambda () - (setf count 0 - ticks 0 - consing 0 - profiles 0))))) + (setf count (make-counter) + ticks (make-counter) + consing (make-counter) + profiles (make-counter) + gc-run-time (make-counter)))))) ;;;; interfaces @@ -327,7 +344,8 @@ name calls seconds - consing) + consing + gc-run-time) ;;; Return our best guess for the run time in a function, subtracting ;;; out factors for profiling overhead. We subtract out the internal @@ -350,11 +368,15 @@ (float profile))))) (max raw-compensated 0.0))) -(defun report () - "Report results from profiling. The results are approximately adjusted -for profiling overhead. The compensation may be rather inaccurate when -bignums are involved in runtime calculation, as in a very-long-running -Lisp process." +(defun report (&key limit (print-no-call-list t)) + "Report results from profiling. The results are approximately +adjusted for profiling overhead. The compensation may be rather +inaccurate when bignums are involved in runtime calculation, as in a +very-long-running Lisp process. + +If LIMIT is set to an integer, only the top LIMIT results are +reported. If PRINT-NO-CALL-LIST is T (the default) then a list of +uncalled profiled functions are listed." (unless (boundp '*overhead*) (setf *overhead* (compute-overhead))) @@ -366,7 +388,7 @@ Lisp process." (warn "Function ~S has been redefined, so times may be inaccurate.~@ PROFILE it again to record calls to the new definition." name)) - (multiple-value-bind (calls ticks consing profile) + (multiple-value-bind (calls ticks consing profile gc-run-time) (funcall (profile-info-read-stats-fun pinfo)) (if (zerop calls) (push name no-call-name-list) @@ -375,16 +397,20 @@ Lisp process." :seconds (compensate-time calls ticks profile) - :consing consing) + :consing consing + :gc-run-time gc-run-time) time-info-list)))) - (setf time-info-list - (sort time-info-list - #'>= - :key #'time-info-seconds)) - (print-profile-table time-info-list) + (let ((times + (sort time-info-list + #'>= + :key #'time-info-seconds))) + (print-profile-table + (if (and limit (> (length times) limit)) + (subseq times 0 limit) + times))) - (when no-call-name-list + (when (and print-no-call-list no-call-name-list) (format *trace-output* "~%These functions were not called:~%~{~<~%~:; ~S~>~}~%" (sort no-call-name-list #'string< @@ -398,21 +424,26 @@ Lisp process." (let ((total-seconds 0.0) (total-consed 0) (total-calls 0) + (total-gc-run-time 0) (seconds-width (length "seconds")) (consed-width (length "consed")) (calls-width (length "calls")) (sec/call-width 10) + (gc-run-time-width (length "gc")) (name-width 6)) (dolist (time-info time-info-list) (incf total-seconds (time-info-seconds time-info)) (incf total-consed (time-info-consing time-info)) - (incf total-calls (time-info-calls time-info))) + (incf total-calls (time-info-calls time-info)) + (incf total-gc-run-time (time-info-gc-run-time time-info))) (setf seconds-width (max (length (format nil "~10,3F" total-seconds)) seconds-width) calls-width (max (length (format nil "~:D" total-calls)) calls-width) consed-width (max (length (format nil "~:D" total-consed)) - consed-width)) + consed-width) + gc-run-time-width (max (length (format nil "~10,3F" (/ total-gc-run-time internal-time-units-per-second))) + gc-run-time-width)) (flet ((dashes () (dotimes (i (+ seconds-width consed-width calls-width @@ -422,6 +453,7 @@ Lisp process." (terpri *trace-output*))) (format *trace-output* "~&~@{ ~v:@<~A~>~^|~}~%" seconds-width "seconds" + (1+ gc-run-time-width) "gc" (1+ consed-width) "consed" (1+ calls-width) "calls" (1+ sec/call-width) "sec/call" @@ -430,8 +462,9 @@ Lisp process." (dashes) (dolist (time-info time-info-list) - (format *trace-output* "~v,3F | ~v:D | ~v:D | ~10,6F | ~S~%" + (format *trace-output* "~v,3F | ~v,3F | ~v:D | ~v:D | ~10,6F | ~S~%" seconds-width (time-info-seconds time-info) + gc-run-time-width (/ (time-info-gc-run-time time-info) internal-time-units-per-second) consed-width (time-info-consing time-info) calls-width (time-info-calls time-info) (/ (time-info-seconds time-info) @@ -440,8 +473,9 @@ Lisp process." (dashes) - (format *trace-output* "~v,3F | ~v:D | ~v:D | | Total~%" + (format *trace-output* "~v,3F | ~v,3F | ~v:D | ~v:D | | Total~%" seconds-width total-seconds + gc-run-time-width (/ total-gc-run-time internal-time-units-per-second) consed-width total-consed calls-width total-calls)