1 ;;; Copyright (C) 2003 Gerd Moellmann <gerd.moellmann@t-online.de>
2 ;;; All rights reserved.
4 ;;; Redistribution and use in source and binary forms, with or without
5 ;;; modification, are permitted provided that the following conditions
8 ;;; 1. Redistributions of source code must retain the above copyright
9 ;;; notice, this list of conditions and the following disclaimer.
10 ;;; 2. Redistributions in binary form must reproduce the above copyright
11 ;;; notice, this list of conditions and the following disclaimer in the
12 ;;; documentation and/or other materials provided with the distribution.
13 ;;; 3. The name of the author may not be used to endorse or promote
14 ;;; products derived from this software without specific prior written
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
21 ;;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 ;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 ;;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 ;;; BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 ;;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
30 ;;; Statistical profiler.
34 ;;; This profiler arranges for SIGPROF interrupts to interrupt a
35 ;;; running program at regular intervals. Each time a SIGPROF occurs,
36 ;;; the current program counter and return address is recorded in a
37 ;;; vector, until a configurable maximum number of samples have been
40 ;;; A profiling report is generated from the samples array by
41 ;;; determining the Lisp functions corresponding to the recorded
42 ;;; addresses. Each program counter/return address pair forms one
43 ;;; edge in a call graph.
47 ;;; The code being generated on x86 makes determining callers reliably
48 ;;; something between extremely difficult and impossible. Example:
50 ;;; 10979F00: .entry eval::eval-stack-args(arg-count)
51 ;;; 18: pop dword ptr [ebp-8]
52 ;;; 1B: lea esp, [ebp-32]
57 ;;; 29: mov [ebp-12], edi
58 ;;; 2C: mov dword ptr [ebp-16], #x28F0000B ; nil
59 ;;; ; No-arg-parsing entry point
60 ;;; 33: mov dword ptr [ebp-20], 0
62 ;;; 3C: L0: mov edx, esp
64 ;;; 41: mov eax, [#x10979EF8] ; #<FDEFINITION object for eval::eval-stack-pop>
66 ;;; 49: mov [edx-4], ebp
68 ;;; 4E: call dword ptr [eax+5]
71 ;;; Suppose this function is interrupted by SIGPROF at 4E. At that
72 ;;; point, the frame pointer EBP has been modified so that the
73 ;;; original return address of the caller of eval-stack-args is no
74 ;;; longer where it can be found by x86-call-context, and the new
75 ;;; return address, for the call to eval-stack-pop, is not yet on the
76 ;;; stack. The effect is that x86-call-context returns something
77 ;;; bogus, which leads to wrong edges in the call graph.
79 ;;; One thing that one might try is filtering cases where the program
80 ;;; is interrupted at a call instruction. But since the above example
81 ;;; of an interrupt at a call instruction isn't the only case where
82 ;;; the stack is something x86-call-context can't really cope with,
83 ;;; this is not a general solution.
85 ;;; Random ideas for implementation:
87 ;;; * Show a disassembly of a function annotated with sampling
90 ;;; * Space profiler. Sample when new pages are allocated instead of
93 ;;; * Record a configurable number of callers up the stack. That
94 ;;; could give a more complete graph when there are many small
97 ;;; * Print help strings for reports, include hints to the problem
100 ;;; * Make flat report the default since call-graph isn't that
103 (defpackage #:sb-sprof
104 (:use #:cl #:sb-ext #:sb-unix #:sb-alien #:sb-sys)
105 (:export #:*sample-interval* #:*max-samples*
106 #:start-sampling #:stop-sampling #:with-sampling
107 #:with-profiling #:start-profiling #:stop-profiling
110 (in-package #:sb-sprof)
115 (defstruct (vertex (:constructor make-vertex)
116 (:constructor make-scc (scc-vertices edges)))
117 (visited nil :type boolean)
118 (root nil :type (or null vertex))
120 (edges () :type list)
121 (scc-vertices () :type list))
124 (vertex (sb-impl::missing-arg) :type vertex))
127 (vertices () :type list))
129 (declaim (inline scc-p))
130 (defun scc-p (vertex)
131 (not (null (vertex-scc-vertices vertex))))
133 (defmacro do-vertices ((vertex graph) &body body)
134 `(dolist (,vertex (graph-vertices ,graph))
137 (defmacro do-edges ((edge edge-to vertex) &body body)
138 `(dolist (,edge (vertex-edges ,vertex))
139 (let ((,edge-to (edge-vertex ,edge)))
142 (defun self-cycle-p (vertex)
143 (do-edges (e to vertex)
147 (defun map-vertices (fn vertices)
149 (setf (vertex-visited v) nil))
151 (unless (vertex-visited v)
154 ;;; Eeko Nuutila, Eljas Soisalon-Soininen, around 1992. Improves on
155 ;;; Tarjan's original algorithm by not using the stack when processing
156 ;;; trivial components. Trivial components should appear frequently
157 ;;; in a call-graph such as ours, I think. Same complexity O(V+E) as
159 (defun strong-components (vertices)
160 (let ((in-component (make-array (length vertices)
161 :element-type 'boolean
162 :initial-element nil))
166 (labels ((min-root (x y)
167 (let ((rx (vertex-root x))
168 (ry (vertex-root y)))
169 (if (< (vertex-dfn rx) (vertex-dfn ry))
173 (aref in-component (vertex-dfn v)))
174 ((setf in-component) (in v)
175 (setf (aref in-component (vertex-dfn v)) in))
177 (> (vertex-dfn x) (vertex-dfn y)))
179 (setf (vertex-dfn v) (incf dfn)
182 (vertex-visited v) t)
184 (unless (vertex-visited w)
186 (unless (in-component w)
187 (setf (vertex-root v) (min-root v w))))
188 (if (eq v (vertex-root v))
189 (loop while (and stack (vertex-> (car stack) v))
191 collect w into this-component
192 do (setf (in-component w) t)
194 (setf (in-component v) t)
195 (push (cons v this-component) components))
197 (map-vertices #'visit vertices)
200 ;;; Given a dag as a list of vertices, return the list sorted
201 ;;; topologically, children first.
202 (defun topological-sort (dag)
205 (labels ((rec-sort (v)
206 (setf (vertex-visited v) t)
207 (setf (vertex-dfn v) (incf dfn))
208 (dolist (e (vertex-edges v))
209 (unless (vertex-visited (edge-vertex e))
210 (rec-sort (edge-vertex e))))
212 (map-vertices #'rec-sort dag)
215 ;;; Reduce graph G to a dag by coalescing strongly connected components
216 ;;; into vertices. Sort the result topologically.
217 (defun reduce-graph (graph &optional (scc-constructor #'make-scc))
218 (sb-int:collect ((sccs) (trivial))
219 (dolist (c (strong-components (graph-vertices graph)))
220 (if (or (cdr c) (self-cycle-p (car c)))
221 (sb-int:collect ((outgoing))
226 (sccs (funcall scc-constructor c (outgoing))))
229 (dolist (v (trivial))
231 (when (member w (vertex-scc-vertices scc))
232 (setf (edge-vertex e) scc)))))
233 (setf (graph-vertices graph)
234 (topological-sort (nconc (sccs) (trivial))))))
239 ;;; An AA tree is a red-black tree with the extra condition that left
240 ;;; children may not be red. This condition simplifies the red-black
241 ;;; algorithm. It eliminates half of the restructuring cases, and
242 ;;; simplifies the delete algorithm.
244 (defstruct (aa-node (:conc-name aa-))
245 (left nil :type (or null aa-node))
246 (right nil :type (or null aa-node))
247 (level 0 :type integer)
251 (let ((node (make-aa-node)))
252 (setf (aa-left node) node)
253 (setf (aa-right node) node)
257 (root *null-node* :type aa-node))
259 (declaim (inline skew split rotate-with-left-child rotate-with-right-child))
261 (defun rotate-with-left-child (k2)
262 (let ((k1 (aa-left k2)))
263 (setf (aa-left k2) (aa-right k1))
264 (setf (aa-right k1) k2)
267 (defun rotate-with-right-child (k1)
268 (let ((k2 (aa-right k1)))
269 (setf (aa-right k1) (aa-left k2))
270 (setf (aa-left k2) k1)
274 (if (= (aa-level (aa-left aa)) (aa-level aa))
275 (rotate-with-left-child aa)
279 (when (= (aa-level (aa-right (aa-right aa)))
281 (setq aa (rotate-with-right-child aa))
282 (incf (aa-level aa)))
285 (macrolet ((def (name () &body body)
286 (let ((name (sb-int::symbolicate 'aa- name)))
287 `(defun ,name (item tree &key
288 (test-< #'<) (test-= #'=)
289 (node-key #'identity) (item-key #'identity))
290 (let ((.item-key. (funcall item-key item)))
291 (flet ((item-< (node)
292 (funcall test-< .item-key.
293 (funcall node-key (aa-data node))))
295 (funcall test-= .item-key.
296 (funcall node-key (aa-data node)))))
297 (declare (inline item-< item-=))
301 (labels ((insert-into (aa)
302 (cond ((eq aa *null-node*)
303 (setq aa (make-aa-node :data item
305 :right *null-node*)))
307 (return-from insert-into aa))
309 (setf (aa-left aa) (insert-into (aa-left aa))))
311 (setf (aa-right aa) (insert-into (aa-right aa)))))
313 (setf (aa-tree-root tree)
314 (insert-into (aa-tree-root tree)))))
317 (let ((deleted-node *null-node*)
319 (labels ((remove-from (aa)
320 (unless (eq aa *null-node*)
323 (setf (aa-left aa) (remove-from (aa-left aa)))
325 (setq deleted-node aa)
326 (setf (aa-right aa) (remove-from (aa-right aa)))))
327 (cond ((eq aa last-node)
329 ;; If at the bottom of the tree, and item
330 ;; is present, delete it.
331 (when (and (not (eq deleted-node *null-node*))
332 (item-= deleted-node))
333 (setf (aa-data deleted-node) (aa-data aa))
334 (setq deleted-node *null-node*)
335 (setq aa (aa-right aa))))
337 ;; Otherwise not at bottom of tree; rebalance.
338 ((or (< (aa-level (aa-left aa))
340 (< (aa-level (aa-right aa))
343 (when (> (aa-level (aa-right aa)) (aa-level aa))
344 (setf (aa-level (aa-right aa)) (aa-level aa)))
346 (setf (aa-right aa) (skew (aa-right aa)))
347 (setf (aa-right (aa-right aa))
348 (skew (aa-right (aa-right aa))))
350 (setf (aa-right aa) (split (aa-right aa))))))
352 (setf (aa-tree-root tree)
353 (remove-from (aa-tree-root tree))))))
356 (let ((current (aa-tree-root tree)))
357 (setf (aa-data *null-node*) item)
359 (cond ((eq current *null-node*)
360 (return (values nil nil)))
362 (return (values (aa-data current) t)))
364 (setq current (aa-left current)))
366 (setq current (aa-right current))))))))
371 ;;; Sort the subsequence of Vec in the interval [From To] using
372 ;;; comparison function Test. Assume each element to sort consists of
373 ;;; Element-Size array slots, and that the slot Key-Offset contains
375 (defun qsort (vec &key (test #'<) (element-size 1) (key-offset 0)
376 (from 0) (to (- (length vec) element-size)))
377 (declare (fixnum to from element-size)
379 (labels ((rotate (i j)
380 (loop repeat element-size
381 for i from i and j from j do
382 (rotatef (aref vec i) (aref vec j))))
384 (aref vec (+ i key-offset)))
387 (let* ((mid (* element-size
388 (round (+ (/ from element-size)
392 (j (+ to element-size))
394 (declare (fixnum i j))
397 (loop do (incf i element-size)
399 (funcall test p (key i))))
400 (loop do (decf j element-size)
401 until (or (<= j from)
402 (funcall test (key j) p)))
403 (when (< j i) (return))
406 (rec-sort from (- j element-size))
415 "Type used for addresses, for instance, program counters,
416 code start/end locations etc."
417 '(unsigned-byte #+alpha 64 #-alpha 32))
419 (defconstant +unknown-address+ 0
420 "Constant representing an address that cannot be determined.")
422 ;;; A call graph. Vertices are NODE structures, edges are CALL
424 (defstruct (call-graph (:include graph)
425 (:constructor %make-call-graph))
426 ;; the value of *Sample-Interval* at the time the graph was created
427 (sample-interval (sb-impl::missing-arg) :type number)
428 ;; number of samples taken
429 (nsamples (sb-impl::missing-arg) :type sb-impl::index)
430 ;; sample count for samples not in any function
431 (elsewhere-count (sb-impl::missing-arg) :type sb-impl::index)
432 ;; a flat list of NODEs, sorted by sample count
433 (flat-nodes () :type list))
435 ;;; A node in a call graph, representing a function that has been
436 ;;; sampled. The edges of a node are CALL structures that represent
437 ;;; functions called from a given node.
438 (defstruct (node (:include vertex)
439 (:constructor %make-node))
440 ;; A numeric label for the node. The most frequently called function
441 ;; gets label 1. This is just for identification purposes in the
443 (index 0 :type fixnum)
444 ;; start and end address of the function's code
445 (start-pc 0 :type address)
446 (end-pc 0 :type address)
447 ;; the name of the function
449 ;; sample count for this function
450 (count 0 :type fixnum)
451 ;; count including time spent in functions called from this one
452 (accrued-count 0 :type fixnum)
453 ;; list of NODEs for functions calling this one
454 (callers () :type list))
456 ;;; A cycle in a call graph. The functions forming the cycle are
457 ;;; found in the SCC-VERTICES slot of the VERTEX structure.
458 (defstruct (cycle (:include node)))
460 ;;; An edge in a call graph. EDGE-VERTEX is the function being
462 (defstruct (call (:include edge)
463 (:constructor make-call (vertex)))
464 ;; number of times the call was sampled
465 (count 1 :type sb-impl::index))
467 ;;; Info about a function in dynamic-space. This is used to track
468 ;;; address changes of functions during GC.
469 (defstruct (dyninfo (:constructor make-dyninfo (code start end)))
470 ;; component this info is for
471 (code (sb-impl::missing-arg) :type sb-kernel::code-component)
472 ;; current start and end address of the component
473 (start (sb-impl::missing-arg) :type address)
474 (end (sb-impl::missing-arg) :type address)
475 ;; new start address of the component, after GC.
476 (new-start 0 :type address))
478 (defmethod print-object ((call-graph call-graph) stream)
479 (print-unreadable-object (call-graph stream :type t :identity t)
480 (format stream "~d samples" (call-graph-nsamples call-graph))))
482 (defmethod print-object ((node node) stream)
483 (print-unreadable-object (node stream :type t :identity t)
484 (format stream "~s [~d]" (node-name node) (node-index node))))
486 (defmethod print-object ((call call) stream)
487 (print-unreadable-object (call stream :type t :identity t)
488 (format stream "~s [~d]" (node-name (call-vertex call))
489 (node-index (call-vertex call)))))
491 (deftype report-type ()
492 '(member nil :flat :graph))
494 (defvar *sample-interval* 0.01
495 "Default number of seconds between samples.")
496 (declaim (number *sample-interval*))
498 (defvar *max-samples* 50000
499 "Default number of samples taken.")
500 (declaim (type sb-impl::index *max-samples*))
502 (defconstant +sample-size+ 2)
504 (defvar *samples* nil)
505 (declaim (type (or null (vector address)) *samples*))
507 (defvar *samples-index* 0)
508 (declaim (type sb-impl::index *samples-index*))
510 (defvar *profiling* nil)
511 (defvar *sampling* nil)
512 (declaim (type boolean *profiling* *sampling*))
514 (defvar *dynamic-space-code-info* ())
515 (declaim (type list *dynamic-space-code-info*))
517 (defvar *show-progress* nil)
519 (defvar *old-sampling* nil)
521 (defun turn-off-sampling ()
522 (setq *old-sampling* *sampling*)
523 (setq *sampling* nil))
525 (defun turn-on-sampling ()
526 (setq *sampling* *old-sampling*))
528 (defun show-progress (format-string &rest args)
529 (when *show-progress*
530 (apply #'format t format-string args)
533 (defun start-sampling ()
534 "Switch on statistical sampling."
537 (defun stop-sampling ()
538 "Switch off statistical sampling."
539 (setq *sampling* nil))
541 (defmacro with-sampling ((&optional (on t)) &body body)
542 "Evaluate body with statistical sampling turned on or off."
543 `(let ((*sampling* ,on))
546 (defun sort-samples (&key test (key :pc))
547 "Sort *Samples* using comparison Test. Key must be one of
548 :Pc or :Return-Pc for sorting by pc or return pc."
549 (declare (type (member :pc :return-pc) key))
550 (when (plusp *samples-index*)
553 :to (- *samples-index* +sample-size+)
555 :element-size +sample-size+
556 :key-offset (if (eq key :pc) 0 1))))
559 (declare (type address pc))
560 (setf (aref *samples* *samples-index*) pc)
561 (incf *samples-index*))
563 ;;; SIGPROF handler. Record current PC and return address in
566 (defun sigprof-handler (signal code scp)
567 (declare (ignore signal code) (type system-area-pointer scp))
568 (when (and *sampling*
569 (< *samples-index* (length *samples*)))
570 (sb-sys:without-gcing
571 (with-alien ((scp (* os-context-t) :local scp))
572 (locally (declare (optimize (inhibit-warnings 2)))
573 (let* ((pc-ptr (sb-vm:context-pc scp))
574 (fp (sb-vm::context-register scp #.sb-vm::ebp-offset))
575 (ra (sap-ref-32 (int-sap fp)
576 (- (* (1+ sb-vm::return-pc-save-offset)
577 sb-vm::n-word-bytes)))))
578 (record (sap-int pc-ptr))
582 (defun sigprof-handler (signal code scp)
583 (declare (ignore signal code))
584 (when (and *sampling*
585 (< *samples-index* (length *samples*)))
586 (sb-sys:without-gcing
587 (with-alien ((scp (* os-context-t) :local scp))
588 (locally (declare (optimize (inhibit-warnings 2)))
589 (let* ((pc-ptr (sb-vm:context-pc scp))
590 (fp (sb-vm::context-register scp #.sb-vm::cfp-offset))
593 (* sb-vm::lra-save-offset sb-vm::n-word-bytes))))
594 (record (sap-int pc-ptr))
597 ;;; Map function FN over code objects in dynamic-space. FN is called
598 ;;; with two arguments, the object and its size in bytes.
599 (defun map-dynamic-space-code (fn)
600 (flet ((call-if-code (obj obj-type size)
601 (declare (ignore obj-type))
602 (when (sb-kernel:code-component-p obj)
603 (funcall fn obj size))))
604 (sb-vm::map-allocated-objects #'call-if-code :dynamic)))
606 ;;; Return the start address of CODE.
607 (defun code-start (code)
608 (declare (type sb-kernel:code-component code))
609 (sap-int (sb-kernel:code-instructions code)))
611 ;;; Return start and end address of CODE as multiple values.
612 (defun code-bounds (code)
613 (declare (type sb-kernel:code-component code))
614 (let* ((start (code-start code))
615 (end (+ start (sb-kernel:%code-code-size code))))
618 ;;; Record the addresses of dynamic-space code objects in
619 ;;; *DYNAMIC-SPACE-CODE-INFO*. Call this with GC disabled.
620 (defun record-dyninfo ()
621 (flet ((record-address (code size)
622 (declare (ignore size))
623 (multiple-value-bind (start end)
625 (push (make-dyninfo code start end)
626 *dynamic-space-code-info*))))
627 (map-dynamic-space-code #'record-address)))
629 ;;; Adjust pcs or return-pcs in *SAMPLES* for address changes of
630 ;;; dynamic-space code objects. KEY being :PC means adjust pcs.
631 (defun adjust-samples (key)
632 (declare (type (member :pc :return-pc) key))
633 (sort-samples :test #'> :key key)
635 (offset (if (eq key :pc) 0 1)))
636 (declare (type sb-impl::index sidx))
637 (dolist (info *dynamic-space-code-info*)
638 (unless (= (dyninfo-new-start info) (dyninfo-start info))
639 (let ((pos (do ((i sidx (+ i +sample-size+)))
640 ((= i *samples-index*) nil)
641 (declare (type sb-impl::index i))
642 (when (<= (dyninfo-start info)
643 (aref *samples* (+ i offset))
648 (loop with delta = (- (dyninfo-new-start info)
649 (dyninfo-start info))
650 for j from sidx below *samples-index* by +sample-size+
651 as pc = (aref *samples* (+ j offset))
652 while (<= (dyninfo-start info) pc (dyninfo-end info)) do
653 (incf (aref *samples* (+ j offset)) delta)
654 (incf sidx +sample-size+))))))))
656 ;;; This runs from *AFTER-GC-HOOKS*. Adjust *SAMPLES* for address
657 ;;; changes of dynamic-space code objects.
658 (defun adjust-samples-for-address-changes ()
659 (sb-sys:without-gcing
661 (setq *dynamic-space-code-info*
662 (sort *dynamic-space-code-info* #'> :key #'dyninfo-start))
663 (dolist (info *dynamic-space-code-info*)
664 (setf (dyninfo-new-start info)
665 (code-start (dyninfo-code info))))
667 (adjust-samples :return-pc)
668 (dolist (info *dynamic-space-code-info*)
669 (let ((size (- (dyninfo-end info) (dyninfo-start info))))
670 (setf (dyninfo-start info) (dyninfo-new-start info))
671 (setf (dyninfo-end info) (+ (dyninfo-new-start info) size))))
674 (defmacro with-profiling ((&key (sample-interval '*sample-interval*)
675 (max-samples '*max-samples*)
678 (report nil report-p))
680 "Repeatedly evaluate Body with statistical profiling turned on.
681 The following keyword args are recognized:
683 :Sample-Interval <seconds>
684 Take a sample every <seconds> seconds. Default is
688 Repeat evaluating body until <max> samples are taken.
689 Default is *Max-Samples*.
692 If specified, call Report with :Type <type> at the end.
695 It true, call Reset at the beginning."
696 (declare (type report-type report))
697 `(let ((*sample-interval* ,sample-interval)
698 (*max-samples* ,max-samples))
699 ,@(when reset '((reset)))
702 (when (>= *samples-index* (length *samples*))
704 ,@(when show-progress
705 `((format t "~&===> ~d of ~d samples taken.~%"
706 (/ *samples-index* +sample-size+)
708 (let ((.last-index. *samples-index*))
710 (when (= .last-index. *samples-index*)
711 (warn "No sampling progress; possibly a profiler bug.")
714 ,@(when report-p `((report :type ,report)))))
716 (defun start-profiling (&key (max-samples *max-samples*)
717 (sample-interval *sample-interval*)
719 "Start profiling statistically if not already profiling.
720 The following keyword args are recognized:
722 :Sample-Interval <seconds>
723 Take a sample every <seconds> seconds. Default is
727 Maximum number of samples. Default is *Max-Samples*.
730 If true, the default, start sampling right away.
731 If false, Start-Sampling can be used to turn sampling on."
733 (multiple-value-bind (secs usecs)
734 (multiple-value-bind (secs rest)
735 (truncate sample-interval)
736 (values secs (truncate (* rest 1000000))))
737 (setq *samples* (make-array (* max-samples +sample-size+)
738 :element-type 'address))
739 (setq *samples-index* 0)
740 (setq *sampling* sampling)
741 ;; Disabled for now, since this was causing some problems with the
742 ;; sampling getting turned off completely. --JES, 2004-06-19
744 ;; BEFORE-GC-HOOKS have exceedingly bad interactions with
745 ;; threads. -- CSR, 2004-06-21
747 ;; (pushnew 'turn-off-sampling *before-gc-hooks*)
748 (pushnew 'adjust-samples-for-address-changes *after-gc-hooks*)
750 (sb-sys:enable-interrupt sb-unix::sigprof #'sigprof-handler)
751 (unix-setitimer :profile secs usecs secs usecs)
752 (setq *profiling* t)))
755 (defun stop-profiling ()
756 "Stop profiling if profiling."
758 (setq *after-gc-hooks*
759 (delete 'adjust-samples-for-address-changes *after-gc-hooks*))
760 (unix-setitimer :profile 0 0 0 0)
761 (sb-sys:enable-interrupt sb-unix::sigprof :default)
762 (setq *sampling* nil)
763 (setq *profiling* nil))
767 "Reset the profiler."
769 (setq *sampling* nil)
770 (setq *dynamic-space-code-info* ())
772 (setq *samples-index* 0)
775 ;;; Make a NODE for debug-info INFO.
776 (defun make-node (info)
778 (sb-kernel::code-component
779 (multiple-value-bind (start end)
781 (%make-node :name (or (sb-disassem::find-assembler-routine start)
782 (format nil "~a" info))
783 :start-pc start :end-pc end)))
784 (sb-di::compiled-debug-fun
785 (let* ((name (sb-di::debug-fun-name info))
786 (cdf (sb-di::compiled-debug-fun-compiler-debug-fun info))
787 (start-offset (sb-c::compiled-debug-fun-start-pc cdf))
788 (end-offset (sb-c::compiled-debug-fun-elsewhere-pc cdf))
789 (component (sb-di::compiled-debug-fun-component info))
790 (start-pc (code-start component)))
791 (%make-node :name name
792 :start-pc (+ start-pc start-offset)
793 :end-pc (+ start-pc end-offset))))
795 (%make-node :name (sb-di::debug-fun-name info)))))
797 ;;; Return something serving as debug info for address PC. If we can
798 ;;; get something from SB-DI:DEBUG-FUNCTION-FROM-PC, return that.
799 ;;; Otherwise, if we can determine a code component, return that.
800 ;;; Otherwise return nil.
801 (defun debug-info (pc)
802 (declare (type address pc))
803 (let ((ptr (sb-di::component-ptr-from-pc (int-sap pc))))
804 (unless (sap= ptr (int-sap 0))
805 (let* ((code (sb-di::component-from-component-ptr ptr))
806 (code-header-len (* (sb-kernel:get-header-data code)
809 (- (sb-kernel:get-lisp-obj-address code)
810 sb-vm:other-pointer-lowtag)
812 (df (ignore-errors (sb-di::debug-fun-from-pc code
816 ;;; One function can have more than one COMPILED-DEBUG-FUNCTION with
817 ;;; the same name. Reduce the number of calls to Debug-Info by first
818 ;;; looking for a given PC in a red-black tree. If not found in the
819 ;;; tree, get debug info, and look for a node in a hash-table by
820 ;;; function name. If not found in the hash-table, make a new node.
823 (defvar *name->node*)
825 (defmacro with-lookup-tables (() &body body)
826 `(let ((*node-tree* (make-aa-tree))
827 (*name->node* (make-hash-table :test 'equal)))
830 (defun tree-find (item)
831 (flet ((pc/node-= (pc node)
832 (<= (node-start-pc node) pc (node-end-pc node)))
834 (< pc (node-start-pc node))))
835 (aa-find item *node-tree* :test-= #'pc/node-= :test-< #'pc/node-<)))
837 (defun tree-insert (item)
838 (flet ((node/node-= (x y)
839 (<= (node-start-pc y) (node-start-pc x) (node-end-pc y)))
841 (< (node-start-pc x) (node-start-pc y))))
842 (aa-insert item *node-tree* :test-= #'node/node-= :test-< #'node/node-<)))
844 ;;; Find or make a new node for address PC. Value is the NODE found
845 ;;; or made; NIL if not enough information exists to make a NODE for
847 (defun lookup-node (pc)
848 (declare (type address pc))
850 (let ((info (debug-info pc)))
852 (let* ((new (make-node info))
853 (found (gethash (node-name new) *name->node*)))
855 (setf (node-start-pc found)
856 (min (node-start-pc found) (node-start-pc new)))
857 (setf (node-end-pc found)
858 (max (node-end-pc found) (node-end-pc new)))
861 (setf (gethash (node-name new) *name->node*) new)
865 ;;; Return a list of all nodes created by LOOKUP-NODE.
866 (defun collect-nodes ()
867 (loop for node being the hash-values of *name->node*
870 ;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*.
871 (defun make-call-graph-1 ()
872 (let ((elsewhere-count 0))
873 (with-lookup-tables ()
874 (loop for i below *samples-index* by +sample-size+
875 as pc = (aref *samples* i)
876 as return-pc = (aref *samples* (1+ i))
877 as callee = (lookup-node pc)
879 (when (and callee (/= return-pc +unknown-address+))
880 (let ((caller (lookup-node return-pc)))
883 when (and *show-progress* (plusp i)) do
884 (cond ((zerop (mod i 1000))
885 (show-progress "~d" i))
887 (show-progress ".")))
889 (incf (node-count callee))
891 (incf elsewhere-count)
892 when (and callee caller) do
893 (let ((call (find callee (node-edges caller)
894 :key #'call-vertex)))
895 (pushnew caller (node-callers callee))
897 (incf (call-count call))
898 (push (make-call callee) (node-edges caller)))))
899 (let ((sorted-nodes (sort (collect-nodes) #'> :key #'node-count)))
900 (loop for node in sorted-nodes and i from 1 do
901 (setf (node-index node) i))
902 (%make-call-graph :nsamples (/ *samples-index* +sample-size+)
903 :sample-interval *sample-interval*
904 :elsewhere-count elsewhere-count
905 :vertices sorted-nodes)))))
907 ;;; Reduce CALL-GRAPH to a dag, creating CYCLE structures for call
909 (defun reduce-call-graph (call-graph)
911 (flet ((make-one-cycle (vertices edges)
912 (let* ((name (format nil "<Cycle ~d>" (incf cycle-no)))
913 (count (loop for v in vertices sum (node-count v))))
914 (make-cycle :name name
917 :scc-vertices vertices
919 (reduce-graph call-graph #'make-one-cycle))))
921 ;;; For all nodes in CALL-GRAPH, compute times including the time
922 ;;; spent in functions called from them. Note that the call-graph
923 ;;; vertices are in reverse topological order, children first, so we
924 ;;; will have computed accrued counts of called functions before they
925 ;;; are used to compute accrued counts for callers.
926 (defun compute-accrued-counts (call-graph)
927 (do-vertices (from call-graph)
928 (setf (node-accrued-count from) (node-count from))
929 (do-edges (call to from)
930 (incf (node-accrued-count from)
931 (round (* (/ (call-count call) (node-count to))
932 (node-accrued-count to)))))))
934 ;;; Return a CALL-GRAPH structure for the current contents of
935 ;;; *SAMPLES*. The result contain a list of nodes sorted by self-time
936 ;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles
937 ;;; reduced to CYCLE structures.
938 (defun make-call-graph ()
940 (show-progress "~&Computing call graph ")
941 (let ((call-graph (without-gcing (make-call-graph-1))))
942 (setf (call-graph-flat-nodes call-graph)
943 (copy-list (graph-vertices call-graph)))
944 (show-progress "~&Finding cycles")
945 (reduce-call-graph call-graph)
946 (show-progress "~&Propagating counts")
947 (compute-accrued-counts call-graph)
953 (defun print-separator (&key (length 72) (char #\-))
954 (format t "~&~V,,,V<~>~%" length char))
956 (defun samples-percent (call-graph count)
957 (* 100.0 (/ count (call-graph-nsamples call-graph))))
959 (defun print-call-graph-header (call-graph)
960 (let ((nsamples (call-graph-nsamples call-graph))
961 (interval (call-graph-sample-interval call-graph))
962 (ncycles (loop for v in (graph-vertices call-graph)
964 (format t "~2&Number of samples: ~d~%~
965 Sample interval: ~f seconds~%~
966 Total sampling time: ~f seconds~%~
967 Number of cycles: ~d~2%"
970 (* nsamples interval)
973 (defun print-flat (call-graph &key (stream *standard-output*) max
974 min-percent (print-header t))
975 (let ((*standard-output* stream)
979 (min-count (if min-percent
980 (round (* (/ min-percent 100.0)
981 (call-graph-nsamples call-graph)))
984 (print-call-graph-header call-graph))
985 (format t "~& Self Total~%")
986 (format t "~& Nr Count % Count % Function~%")
988 (let ((elsewhere-count (call-graph-elsewhere-count call-graph))
990 (dolist (node (call-graph-flat-nodes call-graph))
991 (when (or (and max (> (incf i) max))
992 (< (node-count node) min-count))
994 (let* ((count (node-count node))
995 (percent (samples-percent call-graph count)))
996 (incf total-count count)
997 (incf total-percent percent)
998 (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~s~%"
1006 (format t "~& ~6d ~5,1f elsewhere~%"
1008 (samples-percent call-graph elsewhere-count)))))
1010 (defun print-cycles (call-graph)
1011 (when (some #'cycle-p (graph-vertices call-graph))
1012 (format t "~& Cycle~%")
1013 (format t "~& Count % Parts~%")
1014 (do-vertices (node call-graph)
1015 (when (cycle-p node)
1016 (flet ((print-info (indent index count percent name)
1017 (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1018 count percent indent name index)))
1020 (format t "~&~6d ~5,1f ~a...~%"
1022 (samples-percent call-graph (cycle-count node))
1024 (dolist (v (vertex-scc-vertices node))
1025 (print-info 4 (node-index v) (node-count v)
1026 (samples-percent call-graph (node-count v))
1031 (defun print-graph (call-graph &key (stream *standard-output*)
1033 (let ((*standard-output* stream)
1034 (*print-pretty* nil))
1035 (print-call-graph-header call-graph)
1036 (print-cycles call-graph)
1037 (flet ((find-call (from to)
1038 (find to (node-edges from) :key #'call-vertex))
1039 (print-info (indent index count percent name)
1040 (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
1041 count percent indent name index)))
1042 (format t "~& Callers~%")
1043 (format t "~& Cumul. Function~%")
1044 (format t "~& Count % Count % Callees~%")
1045 (do-vertices (node call-graph)
1048 ;; Print caller information.
1049 (dolist (caller (node-callers node))
1050 (let ((call (find-call caller node)))
1051 (print-info 4 (node-index caller)
1053 (samples-percent call-graph (call-count call))
1054 (node-name caller))))
1055 ;; Print the node itself.
1056 (format t "~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%"
1058 (samples-percent call-graph (node-count node))
1059 (node-accrued-count node)
1060 (samples-percent call-graph (node-accrued-count node))
1064 (do-edges (call called node)
1065 (print-info 4 (node-index called)
1067 (samples-percent call-graph (call-count call))
1068 (node-name called))))
1071 (print-flat call-graph :stream stream :max max
1072 :min-percent min-percent :print-header nil))))
1074 (defun report (&key (type :graph) max min-percent call-graph
1075 (stream *standard-output*) ((:show-progress *show-progress*)))
1076 "Report statistical profiling results. The following keyword
1077 args are recognized:
1080 Specifies the type of report to generate. If :FLAT, show
1081 flat report, if :GRAPH show a call graph and a flat report.
1082 If nil, don't print out a report.
1085 Specify a stream to print the report on. Default is
1089 Don't show more than <max> entries in the flat report.
1091 :Min-Percent <min-percent>
1092 Don't show functions taking less than <min-percent> of the
1093 total time in the flat report.
1095 :Show-Progress <bool>
1096 If true, print progress messages while generating the call graph.
1099 Print a report from <graph> instead of the latest profiling
1102 Value of this function is a Call-Graph object representing the
1103 resulting call-graph."
1104 (declare (type report-type type))
1105 (let ((graph (or call-graph (make-call-graph))))
1108 (print-flat graph :stream stream :max max :min-percent min-percent))
1110 (print-graph graph :stream stream :max max :min-percent min-percent))
1116 (defun test-0 (n &optional (depth 0))
1117 (declare (optimize (debug 3)))
1120 (test-0 n (1+ depth))
1121 (test-0 n (1+ depth)))))
1124 (with-profiling (:reset t :max-samples 1000 :report :graph)