(:export #:*sample-interval* #:*max-samples* #:*alloc-interval*
#:start-sampling #:stop-sampling #:with-sampling
#:with-profiling #:start-profiling #:stop-profiling
+ #:profile-call-counts #:unprofile-call-counts
#:reset #:report))
(in-package #:sb-sprof)
;; the sampling-mode that was used for the profiling run
(sampling-mode (sb-impl::missing-arg) :type (member :cpu :alloc))
;; number of samples taken
- (nsamples (sb-impl::missing-arg) :type sb-impl::index)
+ (nsamples (sb-impl::missing-arg) :type sb-int:index)
;; sample count for samples not in any function
- (elsewhere-count (sb-impl::missing-arg) :type sb-impl::index)
+ (elsewhere-count (sb-impl::missing-arg) :type sb-int:index)
;; a flat list of NODEs, sorted by sample count
(flat-nodes () :type list))
;; the debug-info that this node was created from
(debug-info nil :type t)
;; list of NODEs for functions calling this one
- (callers () :type list))
+ (callers () :type list)
+ ;; the call count for the function that corresponds to this node (or NIL
+ ;; if call counting wasn't enabled for this function)
+ (call-count nil :type (or null integer)))
;;; A cycle in a call graph. The functions forming the cycle are
;;; found in the SCC-VERTICES slot of the VERTEX structure.
(defstruct (call (:include edge)
(:constructor make-call (vertex)))
;; number of times the call was sampled
- (count 1 :type sb-impl::index))
+ (count 1 :type sb-int:index))
;;; Encapsulate all the information about a sampling run
(defstruct (samples)
;; Each sample takes two cells in the vector
2))
:type simple-vector)
- (trace-count 0 :type sb-impl::index)
- (index 0 :type sb-impl::index)
+ (trace-count 0 :type sb-int:index)
+ (index 0 :type sb-int:index)
(mode nil :type (member :cpu :alloc))
(sample-interval *sample-interval* :type number)
(alloc-interval *alloc-interval* :type number)
(max-depth most-positive-fixnum :type number)
- (max-samples *max-samples* :type sb-impl::index))
+ (max-samples *max-samples* :type sb-int:index))
(defmethod print-object ((call-graph call-graph) stream)
(print-unreadable-object (call-graph stream :type t :identity t)
"Default number of traces taken. This variable is somewhat misnamed:
each trace may actually consist of an arbitrary number of samples, depending
on the depth of the call stack.")
-(declaim (type sb-impl::index *max-samples*))
+(declaim (type sb-int:index *max-samples*))
(defvar *samples* nil)
(declaim (type (or null samples) *samples*))
(defvar *old-sampling* nil)
+;; Call count encapsulation information
+(defvar *encapsulations* (make-hash-table :test 'equal))
+
(defun turn-off-sampling ()
(setq *old-sampling* *sampling*)
(setq *sampling* nil))
*samples* (make-samples :max-depth max-depth
:max-samples max-samples
:mode mode))
+ (enable-call-counting)
(sb-sys:enable-interrupt sb-unix:sigprof #'sigprof-handler)
(if (eq mode :alloc)
(setf sb-vm:*alloc-signal* (1- alloc-interval))
"Stop profiling if profiling."
(when *profiling*
(unix-setitimer :profile 0 0 0 0)
+ (disable-call-counting)
;; Even with the timer shut down we cannot be sure that there is
;; no undelivered sigprof. Besides, leaving the signal handler
;; installed won't hurt.
(when info
(multiple-value-bind (new key)
(make-node info)
+ (when (eql (node-name new) 'call-counter)
+ (return-from lookup-node (values nil nil)))
(let* ((key (cons (node-name new) key))
(found (gethash key *name->node*)))
(cond (found
(node-end-pc-or-offset new)))
found)
(t
+ (let ((call-count-info (gethash (node-name new)
+ *encapsulations*)))
+ (when call-count-info
+ (setf (node-call-count new)
+ (car call-count-info))))
(setf (gethash key *name->node*) new)
new))))))
(when print-header
(print-call-graph-header call-graph))
(format t "~& Self Total Cumul~%")
- (format t "~& Nr Count % Count % Count % Function~%")
+ (format t "~& Nr Count % Count % Count % Calls Function~%")
(print-separator)
(let ((elsewhere-count (call-graph-elsewhere-count call-graph))
(i 0))
(accrued-percent (samples-percent call-graph accrued-count)))
(incf total-count count)
(incf total-percent percent)
- (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~s~%"
+ (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~8@a ~s~%"
(node-index node)
count
percent
accrued-percent
total-count
total-percent
+ (or (node-call-count node) "-")
(node-name node))
(finish-output)))
(print-separator)
- (format t "~& ~6d ~5,1f elsewhere~%"
+ (format t "~& ~6d ~5,1f~36a elsewhere~%"
elsewhere-count
- (samples-percent call-graph elsewhere-count)))))
+ (samples-percent call-graph elsewhere-count)
+ ""))))
(defun print-cycles (call-graph)
(when (some #'cycle-p (graph-vertices call-graph))
(pushnew 'add-disassembly-profile-note sb-disassem::*default-dstate-hooks*)
+\f
+;;;; Call counting
+
+;;; The following functions tell sb-sprof to do call count profiling
+;;; for the named functions in addition to normal statistical
+;;; profiling. The benefit of this over using SB-PROFILE is that this
+;;; encapsulation is a lot more lightweight, due to not needing to
+;;; track cpu usage / consing. (For example, compiling asdf 20 times
+;;; took 13s normally, 15s with call counting for all functions in
+;;; SB-C, and 94s with SB-PROFILE profiling SB-C).
+
+(defun profile-call-counts (&rest names)
+ "Mark the functions named by NAMES as being subject to call counting
+during statistical profiling. If a string is used as a name, it will
+be interpreted as a package name. In this case call counting will be
+done for all functions with names like X or (SETF X), where X is a symbol
+with the package as its home package."
+ (dolist (name names)
+ (if (stringp name)
+ (let ((package (find-package name)))
+ (do-symbols (symbol package)
+ (when (eql (symbol-package symbol) package)
+ (dolist (function-name (list symbol (list 'setf symbol)))
+ (profile-call-counts-for-function function-name)))))
+ (profile-call-counts-for-function name))))
+
+(defun profile-call-counts-for-function (function-name)
+ (unless (gethash function-name *encapsulations*)
+ (setf (gethash function-name *encapsulations*) nil)))
+
+(defun unprofile-call-counts ()
+ "Clear all call counting information. Call counting will be done for no
+functions during statistical profiling."
+ (clrhash *encapsulations*))
+
+;;; Called when profiling is started to enable the call counting
+;;; encapsulation. Wrap all the call counted functions
+(defun enable-call-counting ()
+ (maphash (lambda (k v)
+ (declare (ignore v))
+ (enable-call-counting-for-function k))
+ *encapsulations*))
+
+;;; Called when profiling is stopped to disable the encapsulation. Restore
+;;; the original functions.
+(defun disable-call-counting ()
+ (maphash (lambda (k v)
+ (when v
+ (assert (cdr v))
+ (without-package-locks
+ (setf (fdefinition k) (cdr v)))
+ (setf (cdr v) nil)))
+ *encapsulations*))
+
+(defun enable-call-counting-for-function (function-name)
+ (let ((info (gethash function-name *encapsulations*)))
+ ;; We should never try to encapsulate an fdefn multiple times.
+ (assert (or (null info)
+ (null (cdr info))))
+ (when (and (fboundp function-name)
+ (or (not (symbolp function-name))
+ (and (not (special-operator-p function-name))
+ (not (macro-function function-name)))))
+ (let* ((original-fun (fdefinition function-name))
+ (info (cons 0 original-fun)))
+ (setf (gethash function-name *encapsulations*) info)
+ (without-package-locks
+ (setf (fdefinition function-name)
+ (sb-int:named-lambda call-counter (sb-int:&more more-context more-count)
+ (declare (optimize speed (safety 0)))
+ ;; 2^59 calls should be enough for anybody, and it
+ ;; allows using fixnum arithmetic on x86-64. 2^32
+ ;; isn't enough, so we can't do that on 32 bit platforms.
+ (incf (the (unsigned-byte 59)
+ (car info)))
+ (multiple-value-call original-fun
+ (sb-c:%more-arg-values more-context
+ 0
+ more-count)))))))))
+
+\f
;;; silly examples
(defun test-0 (n &optional (depth 0))
functions in the @code{common-lisp}-package, SBCL internals, or code
where the instrumenting overhead is excessive.
+Additionally @code{sb-sprof} includes a limited deterministic profiler
+which can be used for reporting the amounts of calls to some functions
+during
+
@subsection Example Usage
@lisp
+(in-package :cl-user)
+
(require :sb-sprof)
(declaim (optimize speed))
+(defun cpu-test-inner (a i)
+ (logxor a
+ (* i 5)
+ (+ a i)))
+
(defun cpu-test (n)
(let ((a 0))
(dotimes (i (expt 2 n) a)
- (setf a (logxor a
- (* i 5)
- (+ a i))))))
+ (setf a (cpu-test-inner a i)))))
;;;; CPU profiling
:loop nil)
(cpu-test 26))
+;;; Record call counts for functions defined on symbols in the CL-USER
+;;; package.
+(sb-sprof:profile-call-counts "CL-USER")
+
;;; Take 1000 samples of running (CPU-TEST 24), and give a flat
;;; table report at the end. The body will be re-evaluated in a loop
;;; until 1000 samples have been taken. A sample count will be printed
@lisp
Self Total Cumul
- Nr Count % Count % Count % Function
+ Nr Count % Count % Count % Calls Function
------------------------------------------------------------------------
- 1 165 38.3 165 38.3 165 38.3 SB-KERNEL:TWO-ARG-XOR
- 2 141 32.7 141 32.7 306 71.0 SB-VM::GENERIC-+
- 3 67 15.5 145 33.6 373 86.5 CPU-TEST-2
+ 1 69 24.4 97 34.3 69 24.4 67108864 CPU-TEST-INNER
+ 2 64 22.6 64 22.6 133 47.0 - SB-VM::GENERIC-+
+ 3 39 13.8 256 90.5 172 60.8 1 CPU-TEST
+ 4 31 11.0 31 11.0 203 71.7 - SB-KERNEL:TWO-ARG-XOR
@end lisp
For each function, the table will show three absolute and relative
platform-specific depth). The Cumul column shows the sum of all
Self columns up to and including that line in the table.
+Additionally the Calls column will record the amount of calls that were
+made to the function during the profiling run. This value will only
+be reported for functions that have been explicitly marked for call counting
+with @code{profile-call-counts}.
+
The profiler also hooks into the disassembler such that instructions which
have been sampled are annotated with their relative frequency of
sampling. This information is not stored across different sampling
@include fun-sb-sprof-stop-profiling.texinfo
+@include fun-sb-sprof-profile-call-counts.texinfo
+
+@include fun-sb-sprof-unprofile-call-counts.texinfo
+
@subsection Variables
@include var-sb-sprof-star-max-samples-star.texinfo