;;; 18: pop dword ptr [ebp-8]
;;; 1B: lea esp, [ebp-32]
;;; 1E: mov edi, edx
-;;;
+;;;
;;; 20: cmp ecx, 4
;;; 23: jne L4
;;; 29: mov [ebp-12], edi
;;; the stack is something x86-call-context can't really cope with,
;;; this is not a general solution.
;;;
-;;; Random ideas for implementation:
+;;; Random ideas for implementation:
;;;
;;; * Space profiler. Sample when new pages are allocated instead of
;;; at SIGPROF.
(defpackage #:sb-sprof
(:use #:cl #:sb-ext #:sb-unix #:sb-alien #:sb-sys)
- (:export #:*sample-interval* #:*max-samples*
- #:start-sampling #:stop-sampling #:with-sampling
- #:with-profiling #:start-profiling #:stop-profiling
- #:reset #:report))
+ (:export #:*sample-interval* #:*max-samples* #:*alloc-interval*
+ #:*report-sort-by* #:*report-sort-order*
+ #:start-sampling #:stop-sampling #:with-sampling
+ #:with-profiling #:start-profiling #:stop-profiling
+ #:profile-call-counts #:unprofile-call-counts
+ #:reset #:report))
(in-package #:sb-sprof)
;;;; Graph Utilities
(defstruct (vertex (:constructor make-vertex)
- (:constructor make-scc (scc-vertices edges)))
+ (:constructor make-scc (scc-vertices edges)))
(visited nil :type boolean)
(root nil :type (or null vertex))
(dfn 0 :type fixnum)
;;; Tarjan.
(defun strong-components (vertices)
(let ((in-component (make-array (length vertices)
- :element-type 'boolean
- :initial-element nil))
- (stack ())
- (components ())
- (dfn -1))
+ :element-type 'boolean
+ :initial-element nil))
+ (stack ())
+ (components ())
+ (dfn -1))
(labels ((min-root (x y)
- (let ((rx (vertex-root x))
- (ry (vertex-root y)))
- (if (< (vertex-dfn rx) (vertex-dfn ry))
- rx
- ry)))
- (in-component (v)
- (aref in-component (vertex-dfn v)))
- ((setf in-component) (in v)
- (setf (aref in-component (vertex-dfn v)) in))
- (vertex-> (x y)
- (> (vertex-dfn x) (vertex-dfn y)))
- (visit (v)
- (setf (vertex-dfn v) (incf dfn)
- (in-component v) nil
- (vertex-root v) v
- (vertex-visited v) t)
- (do-edges (e w v)
- (unless (vertex-visited w)
- (visit w))
- (unless (in-component w)
- (setf (vertex-root v) (min-root v w))))
- (if (eq v (vertex-root v))
- (loop while (and stack (vertex-> (car stack) v))
- as w = (pop stack)
- collect w into this-component
- do (setf (in-component w) t)
- finally
- (setf (in-component v) t)
- (push (cons v this-component) components))
- (push v stack))))
+ (let ((rx (vertex-root x))
+ (ry (vertex-root y)))
+ (if (< (vertex-dfn rx) (vertex-dfn ry))
+ rx
+ ry)))
+ (in-component (v)
+ (aref in-component (vertex-dfn v)))
+ ((setf in-component) (in v)
+ (setf (aref in-component (vertex-dfn v)) in))
+ (vertex-> (x y)
+ (> (vertex-dfn x) (vertex-dfn y)))
+ (visit (v)
+ (setf (vertex-dfn v) (incf dfn)
+ (in-component v) nil
+ (vertex-root v) v
+ (vertex-visited v) t)
+ (do-edges (e w v)
+ (unless (vertex-visited w)
+ (visit w))
+ (unless (in-component w)
+ (setf (vertex-root v) (min-root v w))))
+ (if (eq v (vertex-root v))
+ (loop while (and stack (vertex-> (car stack) v))
+ as w = (pop stack)
+ collect w into this-component
+ do (setf (in-component w) t)
+ finally
+ (setf (in-component v) t)
+ (push (cons v this-component) components))
+ (push v stack))))
(map-vertices #'visit vertices)
components)))
;;; topologically, children first.
(defun topological-sort (dag)
(let ((sorted ())
- (dfn -1))
+ (dfn -1))
(labels ((rec-sort (v)
- (setf (vertex-visited v) t)
- (setf (vertex-dfn v) (incf dfn))
- (dolist (e (vertex-edges v))
- (unless (vertex-visited (edge-vertex e))
- (rec-sort (edge-vertex e))))
- (push v sorted)))
+ (setf (vertex-visited v) t)
+ (setf (vertex-dfn v) (incf dfn))
+ (dolist (e (vertex-edges v))
+ (unless (vertex-visited (edge-vertex e))
+ (rec-sort (edge-vertex e))))
+ (push v sorted)))
(map-vertices #'rec-sort dag)
(nreverse sorted))))
(sb-int:collect ((sccs) (trivial))
(dolist (c (strong-components (graph-vertices graph)))
(if (or (cdr c) (self-cycle-p (car c)))
- (sb-int:collect ((outgoing))
- (dolist (v c)
- (do-edges (e w v)
- (unless (member w c)
- (outgoing e))))
- (sccs (funcall scc-constructor c (outgoing))))
- (trivial (car c))))
+ (sb-int:collect ((outgoing))
+ (dolist (v c)
+ (do-edges (e w v)
+ (unless (member w c)
+ (outgoing e))))
+ (sccs (funcall scc-constructor c (outgoing))))
+ (trivial (car c))))
(dolist (scc (sccs))
(dolist (v (trivial))
- (do-edges (e w v)
- (when (member w (vertex-scc-vertices scc))
- (setf (edge-vertex e) scc)))))
+ (do-edges (e w v)
+ (when (member w (vertex-scc-vertices scc))
+ (setf (edge-vertex e) scc)))))
(setf (graph-vertices graph)
- (topological-sort (nconc (sccs) (trivial))))))
-
-\f
-;;;; AA Trees
-
-;;; An AA tree is a red-black tree with the extra condition that left
-;;; children may not be red. This condition simplifies the red-black
-;;; algorithm. It eliminates half of the restructuring cases, and
-;;; simplifies the delete algorithm.
-
-(defstruct (aa-node (:conc-name aa-))
- (left nil :type (or null aa-node))
- (right nil :type (or null aa-node))
- (level 0 :type integer)
- (data nil :type t))
-
-(defvar *null-node*
- (let ((node (make-aa-node)))
- (setf (aa-left node) node)
- (setf (aa-right node) node)
- node))
-
-(defstruct aa-tree
- (root *null-node* :type aa-node))
-
-(declaim (inline skew split rotate-with-left-child rotate-with-right-child))
-
-(defun rotate-with-left-child (k2)
- (let ((k1 (aa-left k2)))
- (setf (aa-left k2) (aa-right k1))
- (setf (aa-right k1) k2)
- k1))
-
-(defun rotate-with-right-child (k1)
- (let ((k2 (aa-right k1)))
- (setf (aa-right k1) (aa-left k2))
- (setf (aa-left k2) k1)
- k2))
-
-(defun skew (aa)
- (if (= (aa-level (aa-left aa)) (aa-level aa))
- (rotate-with-left-child aa)
- aa))
-
-(defun split (aa)
- (when (= (aa-level (aa-right (aa-right aa)))
- (aa-level aa))
- (setq aa (rotate-with-right-child aa))
- (incf (aa-level aa)))
- aa)
-
-(macrolet ((def (name () &body body)
- (let ((name (sb-int::symbolicate 'aa- name)))
- `(defun ,name (item tree &key
- (test-< #'<) (test-= #'=)
- (node-key #'identity) (item-key #'identity))
- (let ((.item-key. (funcall item-key item)))
- (flet ((item-< (node)
- (funcall test-< .item-key.
- (funcall node-key (aa-data node))))
- (item-= (node)
- (funcall test-= .item-key.
- (funcall node-key (aa-data node)))))
- (declare (inline item-< item-=))
- ,@body))))))
-
- (def insert ()
- (labels ((insert-into (aa)
- (cond ((eq aa *null-node*)
- (setq aa (make-aa-node :data item
- :left *null-node*
- :right *null-node*)))
- ((item-= aa)
- (return-from insert-into aa))
- ((item-< aa)
- (setf (aa-left aa) (insert-into (aa-left aa))))
- (t
- (setf (aa-right aa) (insert-into (aa-right aa)))))
- (split (skew aa))))
- (setf (aa-tree-root tree)
- (insert-into (aa-tree-root tree)))))
-
- (def delete ()
- (let ((deleted-node *null-node*)
- (last-node nil))
- (labels ((remove-from (aa)
- (unless (eq aa *null-node*)
- (setq last-node aa)
- (if (item-< aa)
- (setf (aa-left aa) (remove-from (aa-left aa)))
- (progn
- (setq deleted-node aa)
- (setf (aa-right aa) (remove-from (aa-right aa)))))
- (cond ((eq aa last-node)
- ;;
- ;; If at the bottom of the tree, and item
- ;; is present, delete it.
- (when (and (not (eq deleted-node *null-node*))
- (item-= deleted-node))
- (setf (aa-data deleted-node) (aa-data aa))
- (setq deleted-node *null-node*)
- (setq aa (aa-right aa))))
- ;;
- ;; Otherwise not at bottom of tree; rebalance.
- ((or (< (aa-level (aa-left aa))
- (1- (aa-level aa)))
- (< (aa-level (aa-right aa))
- (1- (aa-level aa))))
- (decf (aa-level aa))
- (when (> (aa-level (aa-right aa)) (aa-level aa))
- (setf (aa-level (aa-right aa)) (aa-level aa)))
- (setq aa (skew aa))
- (setf (aa-right aa) (skew (aa-right aa)))
- (setf (aa-right (aa-right aa))
- (skew (aa-right (aa-right aa))))
- (setq aa (split aa))
- (setf (aa-right aa) (split (aa-right aa))))))
- aa))
- (setf (aa-tree-root tree)
- (remove-from (aa-tree-root tree))))))
-
- (def find ()
- (let ((current (aa-tree-root tree)))
- (setf (aa-data *null-node*) item)
- (loop
- (cond ((eq current *null-node*)
- (return (values nil nil)))
- ((item-= current)
- (return (values (aa-data current) t)))
- ((item-< current)
- (setq current (aa-left current)))
- (t
- (setq current (aa-right current))))))))
-
-\f
-;;;; Other Utilities
-
-;;; Sort the subsequence of Vec in the interval [From To] using
-;;; comparison function Test. Assume each element to sort consists of
-;;; Element-Size array slots, and that the slot Key-Offset contains
-;;; the sort key.
-(defun qsort (vec &key (element-size 1) (key-offset 0)
- (from 0) (to (- (length vec) element-size)))
- (declare (type fixnum to from element-size key-offset))
- (declare (type (simple-array address) vec))
- (labels ((rotate (i j)
- (declare (fixnum i j))
- (loop repeat element-size
- for i from i and j from j do
- (rotatef (aref vec i) (aref vec j))))
- (key (i)
- (aref vec (+ i key-offset)))
- (rec-sort (from to)
- (declare (fixnum to from))
- (when (> to from)
- (let* ((mid (* element-size
- (round (+ (/ from element-size)
- (/ to element-size))
- 2)))
- (i from)
- (j (+ to element-size))
- (p (key mid)))
- (declare (fixnum mid i j))
- (rotate mid from)
- (loop
- (loop do (incf i element-size)
- until (or (> i to)
- ;; QSORT used to take a test
- ;; parameter which was funcalled
- ;; here. This caused some consing,
- ;; which is problematic since
- ;; QSORT is indirectly called in
- ;; an after-gc-hook. So just
- ;; hardcode >, which would've been
- ;; used for the test anyway.
- ;; --JES, 2004-07-09
- (> p (key i))))
- (loop do (decf j element-size)
- until (or (<= j from)
- ;; As above.
- (> (key j) p)))
- (when (< j i) (return))
- (rotate i j))
- (rotate from j)
- (rec-sort from (- j element-size))
- (rec-sort i to)))))
- (rec-sort from to)
- vec))
-
+ (topological-sort (nconc (sccs) (trivial))))))
\f
;;;; The Profiler
;;; A call graph. Vertices are NODE structures, edges are CALL
;;; structures.
(defstruct (call-graph (:include graph)
- (:constructor %make-call-graph))
- ;; the value of *Sample-Interval* at the time the graph was created
+ (:constructor %make-call-graph))
+ ;; the value of *SAMPLE-INTERVAL* or *ALLOC-INTERVAL* at the time
+ ;; the graph was created (depending on the current allocation mode)
(sample-interval (sb-impl::missing-arg) :type number)
+ ;; the sampling-mode that was used for the profiling run
+ (sampling-mode (sb-impl::missing-arg) :type (member :cpu :alloc :time))
;; number of samples taken
- (nsamples (sb-impl::missing-arg) :type sb-impl::index)
+ (nsamples (sb-impl::missing-arg) :type sb-int:index)
+ ;; threads that have been sampled
+ (sampled-threads nil :type list)
;; sample count for samples not in any function
- (elsewhere-count (sb-impl::missing-arg) :type sb-impl::index)
+ (elsewhere-count (sb-impl::missing-arg) :type sb-int:index)
;; a flat list of NODEs, sorted by sample count
(flat-nodes () :type list))
;;; sampled. The edges of a node are CALL structures that represent
;;; functions called from a given node.
(defstruct (node (:include vertex)
- (:constructor %make-node))
+ (:constructor %make-node))
;; A numeric label for the node. The most frequently called function
;; gets label 1. This is just for identification purposes in the
;; profiling report.
(index 0 :type fixnum)
- ;; start and end address of the function's code
- (start-pc 0 :type address)
- (end-pc 0 :type address)
+ ;; Start and end address of the function's code. Depending on the
+ ;; debug-info, this might be either as absolute addresses for things
+ ;; that won't move around in memory, or as relative offsets from
+ ;; some point for things that might move.
+ (start-pc-or-offset 0 :type address)
+ (end-pc-or-offset 0 :type address)
;; the name of the function
(name nil :type t)
;; sample count for this function
(count 0 :type fixnum)
;; count including time spent in functions called from this one
(accrued-count 0 :type fixnum)
+ ;; the debug-info that this node was created from
+ (debug-info nil :type t)
;; list of NODEs for functions calling this one
- (callers () :type list))
+ (callers () :type list)
+ ;; the call count for the function that corresponds to this node (or NIL
+ ;; if call counting wasn't enabled for this function)
+ (call-count nil :type (or null integer)))
;;; A cycle in a call graph. The functions forming the cycle are
;;; found in the SCC-VERTICES slot of the VERTEX structure.
;;; An edge in a call graph. EDGE-VERTEX is the function being
;;; called.
(defstruct (call (:include edge)
- (:constructor make-call (vertex)))
+ (:constructor make-call (vertex)))
;; number of times the call was sampled
- (count 1 :type sb-impl::index))
-
-;;; Info about a function in dynamic-space. This is used to track
-;;; address changes of functions during GC.
-(defstruct (dyninfo (:constructor make-dyninfo (code start end)))
- ;; component this info is for
- (code (sb-impl::missing-arg) :type sb-kernel::code-component)
- ;; current start and end address of the component
- (start (sb-impl::missing-arg) :type address)
- (end (sb-impl::missing-arg) :type address)
- ;; new start address of the component, after GC.
- (new-start 0 :type address))
+ (count 1 :type sb-int:index))
+
+(defvar *sample-interval* 0.01
+ "Default number of seconds between samples.")
+(declaim (type number *sample-interval*))
+
+(defvar *alloc-interval* 4
+ "Default number of allocation region openings between samples.")
+(declaim (type number *alloc-interval*))
+
+(defvar *max-samples* 50000
+ "Default number of traces taken. This variable is somewhat misnamed:
+each trace may actually consist of an arbitrary number of samples, depending
+on the depth of the call stack.")
+(declaim (type sb-int:index *max-samples*))
+
+;;; Encapsulate all the information about a sampling run
+(defstruct (samples)
+ ;; When this vector fills up, we allocate a new one and copy over
+ ;; the old contents.
+ (vector (make-array (* *max-samples*
+ ;; Arbitrary guess at how many samples we'll be
+ ;; taking for each trace. The exact amount doesn't
+ ;; matter, this is just to decrease the amount of
+ ;; re-allocation that will need to be done.
+ 10
+ ;; Each sample takes two cells in the vector
+ 2))
+ :type simple-vector)
+ (trace-count 0 :type sb-int:index)
+ (index 0 :type sb-int:index)
+ (mode nil :type (member :cpu :alloc :time))
+ (sample-interval (sb-int:missing-arg) :type number)
+ (alloc-interval (sb-int:missing-arg) :type number)
+ (max-depth most-positive-fixnum :type number)
+ (max-samples (sb-int:missing-arg) :type sb-int:index)
+ (sampled-threads nil :type list))
(defmethod print-object ((call-graph call-graph) stream)
(print-unreadable-object (call-graph stream :type t :identity t)
(defmethod print-object ((call call) stream)
(print-unreadable-object (call stream :type t :identity t)
(format stream "~s [~d]" (node-name (call-vertex call))
- (node-index (call-vertex call)))))
+ (node-index (call-vertex call)))))
(deftype report-type ()
'(member nil :flat :graph))
-(defvar *sample-interval* 0.01
- "Default number of seconds between samples.")
-(declaim (number *sample-interval*))
-
-(defvar *max-samples* 50000
- "Default number of samples taken.")
-(declaim (type sb-impl::index *max-samples*))
+(defvar *sampling-mode* :cpu
+ "Default sampling mode. :CPU for cpu profiling, :ALLOC for allocation
+profiling")
+(declaim (type (member :cpu :alloc :time) *sampling-mode*))
-(defconstant +sample-size+
- #+(or x86 x86-64) 8
- #-(or x86 x86-64) 2)
+(defvar *alloc-region-size*
+ #-gencgc
+ (get-page-size)
+ #+gencgc
+ (max sb-vm:gencgc-alloc-granularity sb-vm:gencgc-card-bytes))
+(declaim (type number *alloc-region-size*))
(defvar *samples* nil)
-(declaim (type (or null (vector address)) *samples*))
-
-(defvar *samples-index* 0)
-(declaim (type sb-impl::index *samples-index*))
+(declaim (type (or null samples) *samples*))
(defvar *profiling* nil)
+(declaim (type (member nil :alloc :cpu :time) *profiling*))
(defvar *sampling* nil)
-(declaim (type boolean *profiling* *sampling*))
-
-(defvar *dynamic-space-code-info* ())
-(declaim (type list *dynamic-space-code-info*))
+(declaim (type boolean *sampling*))
(defvar *show-progress* nil)
(defvar *old-sampling* nil)
+;; Call count encapsulation information
+(defvar *encapsulations* (make-hash-table :test 'equal))
+
(defun turn-off-sampling ()
(setq *old-sampling* *sampling*)
(setq *sampling* nil))
(defun show-progress (format-string &rest args)
(when *show-progress*
- (apply #'format t format-string args)
+ (apply #'format t format-string args)
(finish-output)))
(defun start-sampling ()
(defmacro with-sampling ((&optional (on t)) &body body)
"Evaluate body with statistical sampling turned on or off."
- `(let ((*sampling* ,on))
+ `(let ((*sampling* ,on)
+ (sb-vm:*alloc-signal* sb-vm:*alloc-signal*))
,@body))
-(defun sort-samples (key-offset)
- "Sort *Samples* using comparison Test. Key must be one of
- :Pc or :Return-Pc for sorting by pc or return pc."
- (when (plusp *samples-index*)
- (qsort *samples*
- :from 0
- :to (- *samples-index* +sample-size+)
- :element-size +sample-size+
- :key-offset key-offset)))
-
-(defun record (pc)
- (declare (type address pc))
- (setf (aref *samples* *samples-index*) pc)
- (incf *samples-index*))
-
-;;; SIGPROF handler. Record current PC and return address in
-;;; *SAMPLES*.
+;;; Return something serving as debug info for address PC.
+(declaim (inline debug-info))
+(defun debug-info (pc)
+ (declare (type system-area-pointer pc)
+ (muffle-conditions compiler-note))
+ (let ((ptr (sb-di::component-ptr-from-pc pc)))
+ (cond ((sap= ptr (int-sap 0))
+ (let ((name (sap-foreign-symbol pc)))
+ (if name
+ (values (format nil "foreign function ~a" name)
+ (sap-int pc))
+ (values nil (sap-int pc)))))
+ (t
+ (let* ((code (sb-di::component-from-component-ptr ptr))
+ (code-header-len (* (sb-kernel:get-header-data code)
+ sb-vm:n-word-bytes))
+ (pc-offset (- (sap-int pc)
+ (- (sb-kernel:get-lisp-obj-address code)
+ sb-vm:other-pointer-lowtag)
+ code-header-len))
+ (df (sb-di::debug-fun-from-pc code pc-offset)))
+ (cond ((typep df 'sb-di::bogus-debug-fun)
+ (values code (sap-int pc)))
+ (df
+ ;; The code component might be moved by the GC. Store
+ ;; a PC offset, and reconstruct the data in
+ ;; SAMPLE-PC-FROM-PC-OR-OFFSET.
+ (values df pc-offset))
+ (t
+ (values nil 0))))))))
+
+(defun ensure-samples-vector (samples)
+ (let ((vector (samples-vector samples))
+ (index (samples-index samples)))
+ ;; Allocate a new sample vector if the old one is full
+ (if (= (length vector) index)
+ (let ((new-vector (make-array (* 2 index))))
+ (format *trace-output* "Profiler sample vector full (~a traces / ~a samples), doubling the size~%"
+ (samples-trace-count samples)
+ (truncate index 2))
+ (replace new-vector vector)
+ (setf (samples-vector samples) new-vector))
+ vector)))
+
+(declaim (inline record))
+(defun record (samples pc)
+ (declare (type system-area-pointer pc)
+ (muffle-conditions compiler-note))
+ (multiple-value-bind (info pc-or-offset)
+ (debug-info pc)
+ (let ((vector (ensure-samples-vector samples))
+ (index (samples-index samples)))
+ (declare (type simple-vector vector))
+ ;; Allocate a new sample vector if the old one is full
+ (when (= (length vector) index)
+ (let ((new-vector (make-array (* 2 index))))
+ (format *trace-output* "Profiler sample vector full (~a traces / ~a samples), doubling the size~%"
+ (samples-trace-count samples)
+ (truncate index 2))
+ (replace new-vector vector)
+ (setf vector new-vector
+ (samples-vector samples) new-vector)))
+ ;; For each sample, store the debug-info and the PC/offset into
+ ;; adjacent cells.
+ (setf (aref vector index) info
+ (aref vector (1+ index)) pc-or-offset)))
+ (incf (samples-index samples) 2))
+
+(defun record-trace-start (samples)
+ ;; Mark the start of the trace.
+ (let ((vector (ensure-samples-vector samples)))
+ (declare (type simple-vector vector))
+ (setf (aref vector (samples-index samples))
+ 'trace-start))
+ (incf (samples-index samples) 2))
+
+;;; List of thread currently profiled, or :ALL for all threads.
+(defvar *profiled-threads* nil)
+(declaim (type (or list (member :all)) *profiled-threads*))
+
+;;; Thread which runs the wallclock timers, if any.
+(defvar *timer-thread* nil)
+
+(defun profiled-threads ()
+ (let ((profiled-threads *profiled-threads*))
+ (remove *timer-thread*
+ (if (eq :all profiled-threads)
+ (sb-thread:list-all-threads)
+ profiled-threads))))
+
+(defun profiled-thread-p (thread)
+ (let ((profiled-threads *profiled-threads*))
+ (or (and (eq :all profiled-threads)
+ (not (eq *timer-thread* thread)))
+ (member thread profiled-threads :test #'eq))))
+
#+(or x86 x86-64)
-(defun sigprof-handler (signal code scp)
- (declare (ignore signal code) (type system-area-pointer scp))
- (when (and *sampling*
- (< *samples-index* (length *samples*)))
- (sb-sys:without-gcing
- (locally (declare (optimize (inhibit-warnings 2)))
- (with-alien ((scp (* os-context-t) :local scp))
- ;; For some reason completely bogus small values for the
- ;; frame pointer are returned every now and then, leading
- ;; to segfaults. Try to avoid these cases.
- ;;
- ;; FIXME: Do a more thorough sanity check on ebp, or figure
- ;; out why this is happening.
- ;; -- JES, 2005-01-11
- (when (< (sb-vm::context-register scp #.sb-vm::ebp-offset)
- 4096)
- (dotimes (i +sample-size+)
- (record 0))
- (return-from sigprof-handler nil))
- (let* ((pc-ptr (sb-vm:context-pc scp))
- (fp (sb-vm::context-register scp #.sb-vm::ebp-offset)))
- (record (sap-int pc-ptr))
- (let ((fp (int-sap fp))
- ra)
- (dotimes (i (1- +sample-size+))
- (cond (fp
- (setf (values ra fp)
- (sb-di::x86-call-context fp :depth i))
- (record (if ra
- (sap-int ra)
- 0)))
- (t
- (record 0)))))))))))
+(progn
+ ;; Ensure that only one thread at a time will be doing profiling stuff.
+ (defvar *profiler-lock* (sb-thread:make-mutex :name "Statistical Profiler"))
+ (defvar *distribution-lock* (sb-thread:make-mutex :name "Wallclock profiling lock"))
+
+ (declaim (inline pthread-kill))
+ (define-alien-routine pthread-kill int (os-thread unsigned-long) (signal int))
+
+ ;;; A random thread will call this in response to either a timer firing,
+ ;;; This in turn will distribute the notice to those threads we are
+ ;;; interested using SIGPROF.
+ (defun thread-distribution-handler ()
+ (declare (optimize speed (space 0)))
+ (when *sampling*
+ #+sb-thread
+ (let ((lock *distribution-lock*))
+ ;; Don't flood the system with more interrupts if the last
+ ;; set is still being delivered.
+ (unless (sb-thread:mutex-value lock)
+ (sb-thread::with-system-mutex (lock)
+ (dolist (thread (profiled-threads))
+ ;; This may occasionally fail to deliver the signal, but that
+ ;; seems better then using kill_thread_safely with it's 1
+ ;; second backoff.
+ (let ((os-thread (sb-thread::thread-os-thread thread)))
+ (when os-thread
+ (pthread-kill os-thread sb-unix:sigprof)))))))
+ #-sb-thread
+ (unix-kill 0 sb-unix:sigprof)))
+
+ (defun sigprof-handler (signal code scp)
+ (declare (ignore signal code) (optimize speed (space 0))
+ (disable-package-locks sb-di::x86-call-context)
+ (muffle-conditions compiler-note)
+ (type system-area-pointer scp))
+ (let ((self sb-thread:*current-thread*)
+ (profiling *profiling*))
+ ;; Turn off allocation counter when it is not needed. Doing this in the
+ ;; signal handler means we don't have to worry about racing with the runtime
+ (unless (eq :alloc profiling)
+ (setf sb-vm::*alloc-signal* nil))
+ (when (and *sampling*
+ ;; Normal SIGPROF gets practically speaking delivered to threads
+ ;; depending on the run time they use, so we need to filter
+ ;; out those we don't care about. For :ALLOC and :TIME profiling
+ ;; only the interesting threads get SIGPROF in the first place.
+ ;;
+ ;; ...except that Darwin at least doesn't seem to work like we
+ ;; would want it to, which makes multithreaded :CPU profiling pretty
+ ;; pointless there -- though it may be that our mach magic is
+ ;; partially to blame?
+ (or (not (eq :cpu profiling)) (profiled-thread-p self)))
+ (sb-thread::with-system-mutex (*profiler-lock* :without-gcing t)
+ (let ((samples *samples*))
+ (when (and samples
+ (< (samples-trace-count samples)
+ (samples-max-samples samples)))
+ (with-alien ((scp (* os-context-t) :local scp))
+ (let* ((pc-ptr (sb-vm:context-pc scp))
+ (fp (sb-vm::context-register scp #.sb-vm::ebp-offset)))
+ ;; foreign code might not have a useful frame
+ ;; pointer in ebp/rbp, so make sure it looks
+ ;; reasonable before walking the stack
+ (unless (sb-di::control-stack-pointer-valid-p (sb-sys:int-sap fp))
+ (record samples pc-ptr)
+ (return-from sigprof-handler nil))
+ (incf (samples-trace-count samples))
+ (pushnew self (samples-sampled-threads samples))
+ (let ((fp (int-sap fp))
+ (ok t))
+ (declare (type system-area-pointer fp pc-ptr))
+ ;; FIXME: How annoying. The XC doesn't store enough
+ ;; type information about SB-DI::X86-CALL-CONTEXT,
+ ;; even if we declaim the ftype explicitly in
+ ;; src/code/debug-int. And for some reason that type
+ ;; information is needed for the inlined version to
+ ;; be compiled without boxing the returned saps. So
+ ;; we declare the correct ftype here manually, even
+ ;; if the compiler should be able to deduce this
+ ;; exact same information.
+ (declare (ftype (function (system-area-pointer)
+ (values (member nil t)
+ system-area-pointer
+ system-area-pointer))
+ sb-di::x86-call-context))
+ (record-trace-start samples)
+ (dotimes (i (samples-max-depth samples))
+ (record samples pc-ptr)
+ (setf (values ok pc-ptr fp)
+ (sb-di::x86-call-context fp))
+ (unless ok
+ (return))))))
+ ;; Reset thread-local allocation counter before interrupts
+ ;; are enabled.
+ (when (eq t sb-vm::*alloc-signal*)
+ (setf sb-vm:*alloc-signal* (1- (samples-alloc-interval samples)))))))))
+ nil))
;; FIXME: On non-x86 platforms we don't yet walk the call stack deeper
;; than one level.
#-(or x86 x86-64)
(defun sigprof-handler (signal code scp)
(declare (ignore signal code))
- (when (and *sampling*
- (< *samples-index* (length *samples*)))
- (sb-sys:without-gcing
- (with-alien ((scp (* os-context-t) :local scp))
- (locally (declare (optimize (inhibit-warnings 2)))
- (let* ((pc-ptr (sb-vm:context-pc scp))
- (fp (sb-vm::context-register scp #.sb-vm::cfp-offset))
- (ra (sap-ref-word
- (int-sap fp)
- (* sb-vm::lra-save-offset sb-vm::n-word-bytes))))
- (record (sap-int pc-ptr))
- (record ra)))))))
-
-;;; Map function FN over code objects in dynamic-space. FN is called
-;;; with two arguments, the object and its size in bytes.
-(defun map-dynamic-space-code (fn)
- (flet ((call-if-code (obj obj-type size)
- (declare (ignore obj-type))
- (when (sb-kernel:code-component-p obj)
- (funcall fn obj size))))
- (sb-vm::map-allocated-objects #'call-if-code :dynamic)))
+ (sb-sys:without-interrupts
+ (let ((samples *samples*))
+ (when (and *sampling*
+ samples
+ (< (samples-trace-count samples)
+ (samples-max-samples samples)))
+ (sb-sys:without-gcing
+ (with-alien ((scp (* os-context-t) :local scp))
+ (locally (declare (optimize (inhibit-warnings 2)))
+ (incf (samples-trace-count samples))
+ (record-trace-start samples)
+ (let* ((pc-ptr (sb-vm:context-pc scp))
+ (fp (sb-vm::context-register scp #.sb-vm::cfp-offset))
+ (ra (sap-ref-word
+ (int-sap fp)
+ (* sb-vm::lra-save-offset sb-vm::n-word-bytes))))
+ (record samples pc-ptr)
+ (record samples (int-sap ra))))))))))
;;; Return the start address of CODE.
(defun code-start (code)
(defun code-bounds (code)
(declare (type sb-kernel:code-component code))
(let* ((start (code-start code))
- (end (+ start (sb-kernel:%code-code-size code))))
+ (end (+ start (sb-kernel:%code-code-size code))))
(values start end)))
-;;; Record the addresses of dynamic-space code objects in
-;;; *DYNAMIC-SPACE-CODE-INFO*. Call this with GC disabled.
-(defun record-dyninfo ()
- (flet ((record-address (code size)
- (declare (ignore size))
- (multiple-value-bind (start end)
- (code-bounds code)
- (push (make-dyninfo code start end)
- *dynamic-space-code-info*))))
- (map-dynamic-space-code #'record-address)))
-
-(defun adjust-samples (offset)
- (sort-samples offset)
- (let ((sidx 0))
- (declare (type sb-impl::index sidx))
- (dolist (info *dynamic-space-code-info*)
- (unless (= (dyninfo-new-start info) (dyninfo-start info))
- (let ((pos (do ((i sidx (+ i +sample-size+)))
- ((= i *samples-index*) nil)
- (declare (type sb-impl::index i))
- (when (<= (dyninfo-start info)
- (aref *samples* (+ i offset))
- (dyninfo-end info))
- (return i)))))
- (when pos
- (setq sidx pos)
- (loop with delta = (- (dyninfo-new-start info)
- (dyninfo-start info))
- for j from sidx below *samples-index* by +sample-size+
- as pc = (aref *samples* (+ j offset))
- while (<= (dyninfo-start info) pc (dyninfo-end info)) do
- (incf (aref *samples* (+ j offset)) delta)
- (incf sidx +sample-size+))))))))
-
-;;; This runs from *AFTER-GC-HOOKS*. Adjust *SAMPLES* for address
-;;; changes of dynamic-space code objects.
-(defun adjust-samples-for-address-changes ()
- (sb-sys:without-gcing
- (turn-off-sampling)
- (setq *dynamic-space-code-info*
- (sort *dynamic-space-code-info* #'> :key #'dyninfo-start))
- (dolist (info *dynamic-space-code-info*)
- (setf (dyninfo-new-start info)
- (code-start (dyninfo-code info))))
- (progn
- (dotimes (i +sample-size+)
- (adjust-samples i)))
- (dolist (info *dynamic-space-code-info*)
- (let ((size (- (dyninfo-end info) (dyninfo-start info))))
- (setf (dyninfo-start info) (dyninfo-new-start info))
- (setf (dyninfo-end info) (+ (dyninfo-new-start info) size))))
- (turn-on-sampling)))
-
(defmacro with-profiling ((&key (sample-interval '*sample-interval*)
- (max-samples '*max-samples*)
- (reset nil)
- show-progress
- (report nil report-p))
- &body body)
- "Repeatedly evaluate Body with statistical profiling turned on.
+ (alloc-interval '*alloc-interval*)
+ (max-samples '*max-samples*)
+ (reset nil)
+ (mode '*sampling-mode*)
+ (loop t)
+ (max-depth most-positive-fixnum)
+ show-progress
+ (threads '(list sb-thread:*current-thread*))
+ (report nil report-p))
+ &body body)
+ "Repeatedly evaluate BODY with statistical profiling turned on.
+ In multi-threaded operation, only the thread in which WITH-PROFILING
+ was evaluated will be profiled by default. If you want to profile
+ multiple threads, invoke the profiler with START-PROFILING.
+
The following keyword args are recognized:
- :Sample-Interval <seconds>
- Take a sample every <seconds> seconds. Default is
- *Sample-Interval*.
+ :SAMPLE-INTERVAL <n>
+ Take a sample every <n> seconds. Default is *SAMPLE-INTERVAL*.
+
+ :ALLOC-INTERVAL <n>
+ Take a sample every time <n> allocation regions (approximately
+ 8kB) have been allocated since the last sample. Default is
+ *ALLOC-INTERVAL*.
- :Max-Samples <max>
+ :MODE <mode>
+ If :CPU, run the profiler in CPU profiling mode. If :ALLOC, run the
+ profiler in allocation profiling mode. If :TIME, run the profiler
+ in wallclock profiling mode.
+
+ :MAX-SAMPLES <max>
Repeat evaluating body until <max> samples are taken.
- Default is *Max-Samples*.
+ Default is *MAX-SAMPLES*.
+
+ :MAX-DEPTH <max>
+ Maximum call stack depth that the profiler should consider. Only
+ has an effect on x86 and x86-64.
+
+ :REPORT <type>
+ If specified, call REPORT with :TYPE <type> at the end.
- :Report <type>
- If specified, call Report with :Type <type> at the end.
+ :RESET <bool>
+ It true, call RESET at the beginning.
- :Reset <bool>
- It true, call Reset at the beginning."
+ :THREADS <list-form>
+ Form that evaluates to the list threads to profile, or :ALL to indicate
+ that all threads should be profiled. Defaults to the current
+ thread. (Note: START-PROFILING defaults to all threads.)
+
+ :THREADS has no effect on call-counting at the moment.
+
+ On some platforms (eg. Darwin) the signals used by the profiler are
+ not properly delivered to threads in proportion to their CPU usage
+ when doing :CPU profiling. If you see empty call graphs, or are obviously
+ missing several samples from certain threads, you may be falling afoul
+ of this.
+
+ :LOOP <bool>
+ If true (the default) repeatedly evaluate BODY. If false, evaluate
+ if only once."
(declare (type report-type report))
- `(let ((*sample-interval* ,sample-interval)
- (*max-samples* ,max-samples))
+ `(let* ((*sample-interval* ,sample-interval)
+ (*alloc-interval* ,alloc-interval)
+ (*sampling* nil)
+ (*sampling-mode* ,mode)
+ (*max-samples* ,max-samples))
,@(when reset '((reset)))
- (start-profiling)
- (loop
- (when (>= *samples-index* (length *samples*))
- (return))
- ,@(when show-progress
- `((format t "~&===> ~d of ~d samples taken.~%"
- (/ *samples-index* +sample-size+)
- *max-samples*)))
- (let ((.last-index. *samples-index*))
- ,@body
- (when (= .last-index. *samples-index*)
- (warn "No sampling progress; possibly a profiler bug.")
- (return))))
- (stop-profiling)
+ (unwind-protect
+ (progn
+ (start-profiling :max-depth ,max-depth :threads ,threads)
+ (loop
+ (when (>= (samples-trace-count *samples*)
+ (samples-max-samples *samples*))
+ (return))
+ ,@(when show-progress
+ `((format t "~&===> ~d of ~d samples taken.~%"
+ (samples-trace-count *samples*)
+ (samples-max-samples *samples*))))
+ (let ((.last-index. (samples-index *samples*)))
+ ,@body
+ (when (= .last-index. (samples-index *samples*))
+ (warn "No sampling progress; possibly a profiler bug.")
+ (return)))
+ (unless ,loop
+ (return))))
+ (stop-profiling))
,@(when report-p `((report :type ,report)))))
-(defun start-profiling (&key (max-samples *max-samples*)
- (sample-interval *sample-interval*)
- (sampling t))
- "Start profiling statistically if not already profiling.
- The following keyword args are recognized:
-
- :Sample-Interval <seconds>
- Take a sample every <seconds> seconds. Default is
- *Sample-Interval*.
+(defvar *timer* nil)
- :Max-Samples <max>
- Maximum number of samples. Default is *Max-Samples*.
+(defvar *old-alloc-interval* nil)
+(defvar *old-sample-interval* nil)
- :Sampling <bool>
+(defun start-profiling (&key (max-samples *max-samples*)
+ (mode *sampling-mode*)
+ (sample-interval *sample-interval*)
+ (alloc-interval *alloc-interval*)
+ (max-depth most-positive-fixnum)
+ (threads :all)
+ (sampling t))
+ "Start profiling statistically in the current thread if not already profiling.
+The following keyword args are recognized:
+
+ :SAMPLE-INTERVAL <n>
+ Take a sample every <n> seconds. Default is *SAMPLE-INTERVAL*.
+
+ :ALLOC-INTERVAL <n>
+ Take a sample every time <n> allocation regions (approximately
+ 8kB) have been allocated since the last sample. Default is
+ *ALLOC-INTERVAL*.
+
+ :MODE <mode>
+ If :CPU, run the profiler in CPU profiling mode. If :ALLOC, run
+ the profiler in allocation profiling mode. If :TIME, run the profiler
+ in wallclock profiling mode.
+
+ :MAX-SAMPLES <max>
+ Maximum number of samples. Default is *MAX-SAMPLES*.
+
+ :MAX-DEPTH <max>
+ Maximum call stack depth that the profiler should consider. Only
+ has an effect on x86 and x86-64.
+
+ :THREADS <list>
+ List threads to profile, or :ALL to indicate that all threads should be
+ profiled. Defaults to :ALL. (Note: WITH-PROFILING defaults to the current
+ thread.)
+
+ :THREADS has no effect on call-counting at the moment.
+
+ On some platforms (eg. Darwin) the signals used by the profiler are
+ not properly delivered to threads in proportion to their CPU usage
+ when doing :CPU profiling. If you see empty call graphs, or are obviously
+ missing several samples from certain threads, you may be falling afoul
+ of this.
+
+ :SAMPLING <bool>
If true, the default, start sampling right away.
- If false, Start-Sampling can be used to turn sampling on."
+ If false, START-SAMPLING can be used to turn sampling on."
+ #-gencgc
+ (when (eq mode :alloc)
+ (error "Allocation profiling is only supported for builds using the generational garbage collector."))
(unless *profiling*
(multiple-value-bind (secs usecs)
- (multiple-value-bind (secs rest)
- (truncate sample-interval)
- (values secs (truncate (* rest 1000000))))
- (setq *samples* (make-array (* max-samples +sample-size+)
- :element-type 'address))
- (setq *samples-index* 0)
- (setq *sampling* sampling)
- ;; Disabled for now, since this was causing some problems with the
- ;; sampling getting turned off completely. --JES, 2004-06-19
- ;;
- ;; BEFORE-GC-HOOKS have exceedingly bad interactions with
- ;; threads. -- CSR, 2004-06-21
- ;;
- ;; (pushnew 'turn-off-sampling *before-gc-hooks*)
- (pushnew 'adjust-samples-for-address-changes *after-gc-hooks*)
- (record-dyninfo)
- (sb-sys:enable-interrupt sb-unix::sigprof #'sigprof-handler)
- (unix-setitimer :profile secs usecs secs usecs)
- (setq *profiling* t)))
+ (multiple-value-bind (secs rest)
+ (truncate sample-interval)
+ (values secs (truncate (* rest 1000000))))
+ (setf *sampling* sampling
+ *samples* (make-samples :max-depth max-depth
+ :max-samples max-samples
+ :sample-interval sample-interval
+ :alloc-interval alloc-interval
+ :mode mode))
+ (enable-call-counting)
+ (setf *profiled-threads* threads)
+ (sb-sys:enable-interrupt sb-unix:sigprof #'sigprof-handler)
+ (ecase mode
+ (:alloc
+ (let ((alloc-signal (1- alloc-interval)))
+ #+sb-thread
+ (progn
+ (when (eq :all threads)
+ ;; Set the value new threads inherit.
+ (sb-thread::with-all-threads-lock
+ (setf sb-thread::*default-alloc-signal* alloc-signal)))
+ ;; Turn on allocation profiling in existing threads.
+ (dolist (thread (profiled-threads))
+ (sb-thread::%set-symbol-value-in-thread 'sb-vm::*alloc-signal* thread alloc-signal)))
+ #-sb-thread
+ (setf sb-vm:*alloc-signal* alloc-signal)))
+ (:cpu
+ (unix-setitimer :profile secs usecs secs usecs))
+ (:time
+ #+sb-thread
+ (let ((setup (sb-thread:make-semaphore :name "Timer thread setup semaphore")))
+ (setf *timer-thread*
+ (sb-thread:make-thread (lambda ()
+ (sb-thread:wait-on-semaphore setup)
+ (loop while (eq sb-thread:*current-thread* *timer-thread*)
+ do (sleep 1.0)))
+ :name "SB-SPROF wallclock timer thread"))
+ (sb-thread:signal-semaphore setup))
+ #-sb-thread
+ (setf *timer-thread* nil)
+ (setf *timer* (make-timer #'thread-distribution-handler :name "SB-PROF wallclock timer"
+ :thread *timer-thread*))
+ (schedule-timer *timer* sample-interval :repeat-interval sample-interval)))
+ (setq *profiling* mode)))
(values))
(defun stop-profiling ()
"Stop profiling if profiling."
- (when *profiling*
- (setq *after-gc-hooks*
- (delete 'adjust-samples-for-address-changes *after-gc-hooks*))
- (unix-setitimer :profile 0 0 0 0)
- (sb-sys:enable-interrupt sb-unix::sigprof :default)
- (setq *sampling* nil)
- (setq *profiling* nil))
+ (let ((profiling *profiling*))
+ (when profiling
+ ;; Even with the timers shut down we cannot be sure that there is no
+ ;; undelivered sigprof. The handler is also responsible for turning the
+ ;; *ALLOC-SIGNAL* off in individual threads.
+ (ecase profiling
+ (:alloc
+ #+sb-thread
+ (setf sb-thread::*default-alloc-signal* nil)
+ #-sb-thread
+ (setf sb-vm:*alloc-signal* nil))
+ (:cpu
+ (unix-setitimer :profile 0 0 0 0))
+ (:time
+ (unschedule-timer *timer*)
+ (setf *timer* nil
+ *timer-thread* nil)))
+ (disable-call-counting)
+ (setf *profiling* nil
+ *sampling* nil
+ *profiled-threads* nil)))
(values))
(defun reset ()
"Reset the profiler."
(stop-profiling)
(setq *sampling* nil)
- (setq *dynamic-space-code-info* ())
(setq *samples* nil)
- (setq *samples-index* 0)
(values))
;;; Make a NODE for debug-info INFO.
(defun make-node (info)
- (typecase info
- (sb-kernel::code-component
- (multiple-value-bind (start end)
- (code-bounds info)
- (%make-node :name (or (sb-disassem::find-assembler-routine start)
- (format nil "~a" info))
- :start-pc start :end-pc end)))
- (sb-di::compiled-debug-fun
- (let* ((name (sb-di::debug-fun-name info))
- (cdf (sb-di::compiled-debug-fun-compiler-debug-fun info))
- (start-offset (sb-c::compiled-debug-fun-start-pc cdf))
- (end-offset (sb-c::compiled-debug-fun-elsewhere-pc cdf))
- (component (sb-di::compiled-debug-fun-component info))
- (start-pc (code-start component)))
- (%make-node :name name
- :start-pc (+ start-pc start-offset)
- :end-pc (+ start-pc end-offset))))
- (sb-di::debug-fun
- (%make-node :name (sb-di::debug-fun-name info)))
- (t
- (%make-node :name (coerce info 'string)))))
-
-;;; Return something serving as debug info for address PC. If we can
-;;; get something from SB-DI:DEBUG-FUNCTION-FROM-PC, return that.
-;;; Otherwise, if we can determine a code component, return that.
-;;; Otherwise return nil.
-(defun debug-info (pc)
- (declare (type address pc))
- (let ((ptr (sb-di::component-ptr-from-pc (int-sap pc))))
- (cond ((sap= ptr (int-sap 0))
- (let ((name (sap-foreign-symbol (int-sap pc))))
- (when name
- (format nil "foreign function ~a" name))))
- (t
- (let* ((code (sb-di::component-from-component-ptr ptr))
- (code-header-len (* (sb-kernel:get-header-data code)
- sb-vm:n-word-bytes))
- (pc-offset (- pc
- (- (sb-kernel:get-lisp-obj-address code)
- sb-vm:other-pointer-lowtag)
- code-header-len))
- (df (ignore-errors (sb-di::debug-fun-from-pc code
- pc-offset))))
- (or df
- code))))))
-
+ (flet ((clean-name (name)
+ (if (and (consp name)
+ (member (first name)
+ '(sb-c::xep sb-c::tl-xep sb-c::&more-processor
+ sb-c::varargs-entry
+ sb-c::top-level-form
+ sb-c::hairy-arg-processor
+ sb-c::&optional-processor)))
+ (second name)
+ name)))
+ (typecase info
+ (sb-kernel::code-component
+ (multiple-value-bind (start end)
+ (code-bounds info)
+ (values
+ (%make-node :name (or (sb-disassem::find-assembler-routine start)
+ (format nil "~a" info))
+ :debug-info info
+ :start-pc-or-offset start
+ :end-pc-or-offset end)
+ info)))
+ (sb-di::compiled-debug-fun
+ (let* ((name (sb-di::debug-fun-name info))
+ (cdf (sb-di::compiled-debug-fun-compiler-debug-fun info))
+ (start-offset (sb-c::compiled-debug-fun-start-pc cdf))
+ (end-offset (sb-c::compiled-debug-fun-elsewhere-pc cdf))
+ (component (sb-di::compiled-debug-fun-component info))
+ (start-pc (code-start component)))
+ ;; Call graphs are mostly useless unless we somehow
+ ;; distinguish a gazillion different (LAMBDA ())'s.
+ (when (equal name '(lambda ()))
+ (setf name (format nil "Unknown component: #x~x" start-pc)))
+ (values (%make-node :name (clean-name name)
+ :debug-info info
+ :start-pc-or-offset start-offset
+ :end-pc-or-offset end-offset)
+ component)))
+ (sb-di::debug-fun
+ (%make-node :name (clean-name (sb-di::debug-fun-name info))
+ :debug-info info))
+ (t
+ (%make-node :name (coerce info 'string)
+ :debug-info info)))))
;;; One function can have more than one COMPILED-DEBUG-FUNCTION with
;;; the same name. Reduce the number of calls to Debug-Info by first
;;; tree, get debug info, and look for a node in a hash-table by
;;; function name. If not found in the hash-table, make a new node.
-(defvar *node-tree*)
(defvar *name->node*)
(defmacro with-lookup-tables (() &body body)
- `(let ((*node-tree* (make-aa-tree))
- (*name->node* (make-hash-table :test 'equal)))
+ `(let ((*name->node* (make-hash-table :test 'equal)))
,@body))
-(defun tree-find (item)
- (flet ((pc/node-= (pc node)
- (<= (node-start-pc node) pc (node-end-pc node)))
- (pc/node-< (pc node)
- (< pc (node-start-pc node))))
- (aa-find item *node-tree* :test-= #'pc/node-= :test-< #'pc/node-<)))
-
-(defun tree-insert (item)
- (flet ((node/node-= (x y)
- (<= (node-start-pc y) (node-start-pc x) (node-end-pc y)))
- (node/node-< (x y)
- (< (node-start-pc x) (node-start-pc y))))
- (aa-insert item *node-tree* :test-= #'node/node-= :test-< #'node/node-<)))
-
-;;; Find or make a new node for address PC. Value is the NODE found
-;;; or made; NIL if not enough information exists to make a NODE for
-;;; PC.
-(defun lookup-node (pc)
- (declare (type address pc))
- (or (tree-find pc)
- (let ((info (debug-info pc)))
- (when info
- (let* ((new (make-node info))
- (found (gethash (node-name new) *name->node*)))
- (cond (found
- (setf (node-start-pc found)
- (min (node-start-pc found) (node-start-pc new)))
- (setf (node-end-pc found)
- (max (node-end-pc found) (node-end-pc new)))
- found)
- (t
- (setf (gethash (node-name new) *name->node*) new)
- (tree-insert new)
- new)))))))
+;;; Find or make a new node for INFO. Value is the NODE found or
+;;; made; NIL if not enough information exists to make a NODE for INFO.
+(defun lookup-node (info)
+ (when info
+ (multiple-value-bind (new key)
+ (make-node info)
+ (when (eql (node-name new) 'call-counter)
+ (return-from lookup-node (values nil nil)))
+ (let* ((key (cons (node-name new) key))
+ (found (gethash key *name->node*)))
+ (cond (found
+ (setf (node-start-pc-or-offset found)
+ (min (node-start-pc-or-offset found)
+ (node-start-pc-or-offset new)))
+ (setf (node-end-pc-or-offset found)
+ (max (node-end-pc-or-offset found)
+ (node-end-pc-or-offset new)))
+ found)
+ (t
+ (let ((call-count-info (gethash (node-name new)
+ *encapsulations*)))
+ (when call-count-info
+ (setf (node-call-count new)
+ (car call-count-info))))
+ (setf (gethash key *name->node*) new)
+ new))))))
;;; Return a list of all nodes created by LOOKUP-NODE.
(defun collect-nodes ()
(loop for node being the hash-values of *name->node*
- collect node))
+ collect node))
;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*.
-(defun make-call-graph-1 (depth)
+(defun make-call-graph-1 (max-depth)
(let ((elsewhere-count 0)
- visited-nodes)
+ visited-nodes)
(with-lookup-tables ()
- (loop for i below (1- *samples-index*) ;; by +sample-size+
- as pc = (aref *samples* i)
- as return-pc = (aref *samples* (1+ i))
- as callee = (lookup-node pc)
- as caller =
- (when (and callee (/= return-pc +unknown-address+))
- (let ((caller (lookup-node return-pc)))
- (when caller
- caller)))
- do
- (when (and *show-progress* (plusp i))
- (cond ((zerop (mod i 1000))
- (show-progress "~d" i))
- ((zerop (mod i 100))
- (show-progress "."))))
- (when (< (mod i +sample-size+) depth)
- (when (= (mod i +sample-size+) 0)
- (setf visited-nodes nil)
- (cond (callee
- (incf (node-accrued-count callee))
- (incf (node-count callee)))
- (t
- (incf elsewhere-count))))
- (when callee
- (push callee visited-nodes))
- (when caller
- (unless (member caller visited-nodes)
- (incf (node-accrued-count caller)))
- (when callee
- (let ((call (find callee (node-edges caller)
- :key #'call-vertex)))
- (pushnew caller (node-callers callee))
- (if call
- (unless (member caller visited-nodes)
- (incf (call-count call)))
- (push (make-call callee) (node-edges caller))))))))
+ (loop for i below (- (samples-index *samples*) 2) by 2
+ with depth = 0
+ for debug-info = (aref (samples-vector *samples*) i)
+ for next-info = (aref (samples-vector *samples*)
+ (+ i 2))
+ do (if (eq debug-info 'trace-start)
+ (setf depth 0)
+ (let ((callee (lookup-node debug-info))
+ (caller (unless (eq next-info 'trace-start)
+ (lookup-node next-info))))
+ (when (< depth max-depth)
+ (when (zerop depth)
+ (setf visited-nodes nil)
+ (cond (callee
+ (incf (node-accrued-count callee))
+ (incf (node-count callee)))
+ (t
+ (incf elsewhere-count))))
+ (incf depth)
+ (when callee
+ (push callee visited-nodes))
+ (when caller
+ (unless (member caller visited-nodes)
+ (incf (node-accrued-count caller)))
+ (when callee
+ (let ((call (find callee (node-edges caller)
+ :key #'call-vertex)))
+ (pushnew caller (node-callers callee))
+ (if call
+ (unless (member caller visited-nodes)
+ (incf (call-count call)))
+ (push (make-call callee)
+ (node-edges caller))))))))))
(let ((sorted-nodes (sort (collect-nodes) #'> :key #'node-count)))
- (loop for node in sorted-nodes and i from 1 do
- (setf (node-index node) i))
- (%make-call-graph :nsamples (/ *samples-index* +sample-size+)
- :sample-interval *sample-interval*
- :elsewhere-count elsewhere-count
- :vertices sorted-nodes)))))
+ (loop for node in sorted-nodes and i from 1 do
+ (setf (node-index node) i))
+ (%make-call-graph :nsamples (samples-trace-count *samples*)
+ :sample-interval (if (eq (samples-mode *samples*)
+ :alloc)
+ (samples-alloc-interval *samples*)
+ (samples-sample-interval *samples*))
+ :sampling-mode (samples-mode *samples*)
+ :sampled-threads (samples-sampled-threads *samples*)
+ :elsewhere-count elsewhere-count
+ :vertices sorted-nodes)))))
;;; Reduce CALL-GRAPH to a dag, creating CYCLE structures for call
;;; cycles.
(defun reduce-call-graph (call-graph)
(let ((cycle-no 0))
(flet ((make-one-cycle (vertices edges)
- (let* ((name (format nil "<Cycle ~d>" (incf cycle-no)))
- (count (loop for v in vertices sum (node-count v))))
- (make-cycle :name name
- :index cycle-no
- :count count
- :scc-vertices vertices
- :edges edges))))
+ (let* ((name (format nil "<Cycle ~d>" (incf cycle-no)))
+ (count (loop for v in vertices sum (node-count v))))
+ (make-cycle :name name
+ :index cycle-no
+ :count count
+ :scc-vertices vertices
+ :edges edges))))
(reduce-graph call-graph #'make-one-cycle))))
;;; For all nodes in CALL-GRAPH, compute times including the time
(setf (node-accrued-count from) (node-count from))
(do-edges (call to from)
(incf (node-accrued-count from)
- (round (* (/ (call-count call) (node-count to))
- (node-accrued-count to)))))))
+ (round (* (/ (call-count call) (node-count to))
+ (node-accrued-count to)))))))
;;; Return a CALL-GRAPH structure for the current contents of
;;; *SAMPLES*. The result contain a list of nodes sorted by self-time
;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles
;;; reduced to CYCLE structures.
-(defun make-call-graph (depth)
+(defun make-call-graph (max-depth)
(stop-profiling)
(show-progress "~&Computing call graph ")
- (let ((call-graph (without-gcing (make-call-graph-1 depth))))
+ (let ((call-graph (without-gcing (make-call-graph-1 max-depth))))
(setf (call-graph-flat-nodes call-graph)
- (copy-list (graph-vertices call-graph)))
+ (copy-list (graph-vertices call-graph)))
(show-progress "~&Finding cycles")
+ #+nil
(reduce-call-graph call-graph)
(show-progress "~&Propagating counts")
- #+nil (compute-accrued-counts call-graph)
+ #+nil
+ (compute-accrued-counts call-graph)
call-graph))
\f
(format t "~&~V,,,V<~>~%" length char))
(defun samples-percent (call-graph count)
- (* 100.0 (/ count (call-graph-nsamples call-graph))))
+ (if (> count 0)
+ (* 100.0 (/ count (call-graph-nsamples call-graph)))
+ 0))
(defun print-call-graph-header (call-graph)
(let ((nsamples (call-graph-nsamples call-graph))
- (interval (call-graph-sample-interval call-graph))
- (ncycles (loop for v in (graph-vertices call-graph)
- count (scc-p v))))
- (format t "~2&Number of samples: ~d~%~
- Sample interval: ~f seconds~%~
- Total sampling time: ~f seconds~%~
- Number of cycles: ~d~2%"
- nsamples
- interval
- (* nsamples interval)
- ncycles)))
+ (interval (call-graph-sample-interval call-graph))
+ (ncycles (loop for v in (graph-vertices call-graph)
+ count (scc-p v))))
+ (if (eq (call-graph-sampling-mode call-graph) :alloc)
+ (format t "~2&Number of samples: ~d~%~
+ Alloc interval: ~a regions (approximately ~a kB)~%~
+ Total sampling amount: ~a regions (approximately ~a kB)~%~
+ Number of cycles: ~d~%~
+ Sampled threads:~{~% ~S~}~2%"
+ nsamples
+ interval
+ (truncate (* interval *alloc-region-size*) 1024)
+ (* nsamples interval)
+ (truncate (* nsamples interval *alloc-region-size*) 1024)
+ ncycles
+ (call-graph-sampled-threads call-graph))
+ (format t "~2&Number of samples: ~d~%~
+ Sample interval: ~f seconds~%~
+ Total sampling time: ~f seconds~%~
+ Number of cycles: ~d~%~
+ Sampled threads:~{~% ~S~}~2%"
+ nsamples
+ interval
+ (* nsamples interval)
+ ncycles
+ (call-graph-sampled-threads call-graph)))))
+
+(declaim (type (member :samples :cumulative-samples) *report-sort-by*))
+(defvar *report-sort-by* :samples
+ "Method for sorting the flat report: either by :SAMPLES or by :CUMULATIVE-SAMPLES.")
+
+(declaim (type (member :descending :ascending) *report-sort-order*))
+(defvar *report-sort-order* :descending
+ "Order for sorting the flat report: either :DESCENDING or :ASCENDING.")
(defun print-flat (call-graph &key (stream *standard-output*) max
- min-percent (print-header t))
+ min-percent (print-header t)
+ (sort-by *report-sort-by*)
+ (sort-order *report-sort-order*))
+ (declare (type (member :descending :ascending) sort-order)
+ (type (member :samples :cumulative-samples) sort-by))
(let ((*standard-output* stream)
- (*print-pretty* nil)
- (total-count 0)
- (total-percent 0)
- (min-count (if min-percent
- (round (* (/ min-percent 100.0)
- (call-graph-nsamples call-graph)))
- 0)))
+ (*print-pretty* nil)
+ (total-count 0)
+ (total-percent 0)
+ (min-count (if min-percent
+ (round (* (/ min-percent 100.0)
+ (call-graph-nsamples call-graph)))
+ 0)))
(when print-header
(print-call-graph-header call-graph))
- (format t "~& Self Cumul Total~%")
- (format t "~& Nr Count % Count % Count % Function~%")
+ (format t "~& Self Total Cumul~%")
+ (format t "~& Nr Count % Count % Count % Calls Function~%")
(print-separator)
(let ((elsewhere-count (call-graph-elsewhere-count call-graph))
- (i 0))
- (dolist (node (call-graph-flat-nodes call-graph))
- (when (or (and max (> (incf i) max))
- (< (node-count node) min-count))
- (return))
- (let* ((count (node-count node))
- (percent (samples-percent call-graph count))
- (accrued-count (node-accrued-count node))
- (accrued-percent (samples-percent call-graph accrued-count)))
- (incf total-count count)
- (incf total-percent percent)
- (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~s~%"
- (node-index node)
- count
- percent
- accrued-count
- accrued-percent
- total-count
- total-percent
- (node-name node))
- (finish-output)))
+ (i 0)
+ (nodes (stable-sort (copy-list (call-graph-flat-nodes call-graph))
+ (let ((cmp (if (eq :descending sort-order) #'> #'<)))
+ (multiple-value-bind (primary secondary)
+ (if (eq :samples sort-by)
+ (values #'node-count #'node-accrued-count)
+ (values #'node-accrued-count #'node-count))
+ (lambda (x y)
+ (let ((cx (funcall primary x))
+ (cy (funcall primary y)))
+ (if (= cx cy)
+ (funcall cmp (funcall secondary x) (funcall secondary y))
+ (funcall cmp cx cy)))))))))
+ (dolist (node nodes)
+ (when (or (and max (> (incf i) max))
+ (< (node-count node) min-count))
+ (return))
+ (let* ((count (node-count node))
+ (percent (samples-percent call-graph count))
+ (accrued-count (node-accrued-count node))
+ (accrued-percent (samples-percent call-graph accrued-count)))
+ (incf total-count count)
+ (incf total-percent percent)
+ (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~8@a ~s~%"
+ (incf i)
+ count
+ percent
+ accrued-count
+ accrued-percent
+ total-count
+ total-percent
+ (or (node-call-count node) "-")
+ (node-name node))
+ (finish-output)))
(print-separator)
- (format t "~& ~6d ~5,1f elsewhere~%"
- elsewhere-count
- (samples-percent call-graph elsewhere-count)))))
+ (format t "~& ~6d ~5,1f~36a elsewhere~%"
+ elsewhere-count
+ (samples-percent call-graph elsewhere-count)
+ ""))))
(defun print-cycles (call-graph)
(when (some #'cycle-p (graph-vertices call-graph))
(format t "~& Count % Parts~%")
(do-vertices (node call-graph)
(when (cycle-p node)
- (flet ((print-info (indent index count percent name)
- (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
- count percent indent name index)))
- (print-separator)
- (format t "~&~6d ~5,1f ~a...~%"
- (node-count node)
- (samples-percent call-graph (cycle-count node))
- (node-name node))
- (dolist (v (vertex-scc-vertices node))
- (print-info 4 (node-index v) (node-count v)
+ (flet ((print-info (indent index count percent name)
+ (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
+ count percent indent name index)))
+ (print-separator)
+ (format t "~&~6d ~5,1f ~a...~%"
+ (node-count node)
+ (samples-percent call-graph (cycle-count node))
+ (node-name node))
+ (dolist (v (vertex-scc-vertices node))
+ (print-info 4 (node-index v) (node-count v)
(samples-percent call-graph (node-count v))
(node-name v))))))
(print-separator)
(format t "~2%")))
(defun print-graph (call-graph &key (stream *standard-output*)
- max min-percent)
+ max min-percent)
(let ((*standard-output* stream)
- (*print-pretty* nil))
+ (*print-pretty* nil))
(print-call-graph-header call-graph)
(print-cycles call-graph)
(flet ((find-call (from to)
- (find to (node-edges from) :key #'call-vertex))
- (print-info (indent index count percent name)
- (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
- count percent indent name index)))
+ (find to (node-edges from) :key #'call-vertex))
+ (print-info (indent index count percent name)
+ (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%"
+ count percent indent name index)))
(format t "~& Callers~%")
- (format t "~& Cumul. Function~%")
+ (format t "~& Total. Function~%")
(format t "~& Count % Count % Callees~%")
(do-vertices (node call-graph)
- (print-separator)
- ;;
- ;; Print caller information.
- (dolist (caller (node-callers node))
- (let ((call (find-call caller node)))
- (print-info 4 (node-index caller)
+ (print-separator)
+ ;;
+ ;; Print caller information.
+ (dolist (caller (node-callers node))
+ (let ((call (find-call caller node)))
+ (print-info 4 (node-index caller)
(call-count call)
(samples-percent call-graph (call-count call))
(node-name caller))))
- ;; Print the node itself.
- (format t "~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%"
- (node-count node)
- (samples-percent call-graph (node-count node))
- (node-accrued-count node)
- (samples-percent call-graph (node-accrued-count node))
- (node-name node)
- (node-index node))
- ;; Print callees.
- (do-edges (call called node)
- (print-info 4 (node-index called)
+ ;; Print the node itself.
+ (format t "~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%"
+ (node-count node)
+ (samples-percent call-graph (node-count node))
+ (node-accrued-count node)
+ (samples-percent call-graph (node-accrued-count node))
+ (node-name node)
+ (node-index node))
+ ;; Print callees.
+ (do-edges (call called node)
+ (print-info 4 (node-index called)
(call-count call)
(samples-percent call-graph (call-count call))
(node-name called))))
(print-separator)
(format t "~2%")
(print-flat call-graph :stream stream :max max
- :min-percent min-percent :print-header nil))))
+ :min-percent min-percent :print-header nil))))
(defun report (&key (type :graph) max min-percent call-graph
- (stream *standard-output*) ((:show-progress *show-progress*)))
+ ((:sort-by *report-sort-by*) *report-sort-by*)
+ ((:sort-order *report-sort-order*) *report-sort-order*)
+ (stream *standard-output*) ((:show-progress *show-progress*)))
"Report statistical profiling results. The following keyword
args are recognized:
- :Type <type>
+ :TYPE <type>
Specifies the type of report to generate. If :FLAT, show
flat report, if :GRAPH show a call graph and a flat report.
If nil, don't print out a report.
- :Stream <stream>
+ :STREAM <stream>
Specify a stream to print the report on. Default is
- *Standard-Output*.
+ *STANDARD-OUTPUT*.
- :Max <max>
+ :MAX <max>
Don't show more than <max> entries in the flat report.
- :Min-Percent <min-percent>
+ :MIN-PERCENT <min-percent>
Don't show functions taking less than <min-percent> of the
total time in the flat report.
- :Show-Progress <bool>
+ :SORT-BY <column>
+ If :SAMPLES, sort flat report by number of samples taken.
+ If :CUMULATIVE-SAMPLES, sort flat report by cumulative number of samples
+ taken (shows how much time each function spent on stack.) Default
+ is *REPORT-SORT-BY*.
+
+ :SORT-ORDER <order>
+ If :DESCENDING, sort flat report in descending order. If :ASCENDING,
+ sort flat report in ascending order. Default is *REPORT-SORT-ORDER*.
+
+ :SHOW-PROGRESS <bool>
If true, print progress messages while generating the call graph.
- :Call-Graph <graph>
+ :CALL-GRAPH <graph>
Print a report from <graph> instead of the latest profiling
results.
- Value of this function is a Call-Graph object representing the
- resulting call-graph."
- (let ((graph (or call-graph (make-call-graph (1- +sample-size+)))))
- (ecase type
- (:flat
- (print-flat graph :stream stream :max max :min-percent min-percent))
- (:graph
- (print-graph graph :stream stream :max max :min-percent min-percent))
- ((nil)))
- graph))
+Value of this function is a CALL-GRAPH object representing the
+resulting call-graph, or NIL if there are no samples (eg. right after
+calling RESET.)
+
+Profiling is stopped before the call graph is generated."
+ (cond (*samples*
+ (let ((graph (or call-graph (make-call-graph most-positive-fixnum))))
+ (ecase type
+ (:flat
+ (print-flat graph :stream stream :max max :min-percent min-percent))
+ (:graph
+ (print-graph graph :stream stream :max max :min-percent min-percent))
+ ((nil)))
+ graph))
+ (t
+ (format stream "~&; No samples to report.~%")
+ nil)))
;;; Interface to DISASSEMBLE
+(defun sample-pc-from-pc-or-offset (sample pc-or-offset)
+ (etypecase sample
+ ;; Assembly routines or foreign functions don't move around, so we've
+ ;; stored a raw PC
+ ((or sb-kernel:code-component string)
+ pc-or-offset)
+ ;; Lisp functions might move, so we've stored a offset from the
+ ;; start of the code component.
+ (sb-di::compiled-debug-fun
+ (let* ((component (sb-di::compiled-debug-fun-component sample))
+ (start-pc (code-start component)))
+ (+ start-pc pc-or-offset)))))
+
(defun add-disassembly-profile-note (chunk stream dstate)
(declare (ignore chunk stream))
- (unless (zerop *samples-index*)
- (let* ((location
- (+ (sb-disassem::seg-virtual-location
- (sb-disassem:dstate-segment dstate))
- (sb-disassem::dstate-cur-offs dstate)))
- (samples (loop for x from 0 below *samples-index* by +sample-size+
- summing (if (= (aref *samples* x) location)
- 1
- 0))))
+ (when *samples*
+ (let* ((location (+ (sb-disassem::seg-virtual-location
+ (sb-disassem:dstate-segment dstate))
+ (sb-disassem::dstate-cur-offs dstate)))
+ (samples (loop with index = (samples-index *samples*)
+ for x from 0 below (- index 2) by 2
+ for last-sample = nil then sample
+ for sample = (aref (samples-vector *samples*) x)
+ for pc-or-offset = (aref (samples-vector *samples*)
+ (1+ x))
+ when (and sample (eq last-sample 'trace-start))
+ count (= location
+ (sample-pc-from-pc-or-offset sample
+ pc-or-offset)))))
(unless (zerop samples)
- (sb-disassem::note (format nil "~A/~A samples"
- samples (/ *samples-index* +sample-size+))
- dstate)))))
+ (sb-disassem::note (format nil "~A/~A samples"
+ samples (samples-trace-count *samples*))
+ dstate)))))
(pushnew 'add-disassembly-profile-note sb-disassem::*default-dstate-hooks*)
+\f
+;;;; Call counting
+
+;;; The following functions tell sb-sprof to do call count profiling
+;;; for the named functions in addition to normal statistical
+;;; profiling. The benefit of this over using SB-PROFILE is that this
+;;; encapsulation is a lot more lightweight, due to not needing to
+;;; track cpu usage / consing. (For example, compiling asdf 20 times
+;;; took 13s normally, 15s with call counting for all functions in
+;;; SB-C, and 94s with SB-PROFILE profiling SB-C).
+
+(defun profile-call-counts (&rest names)
+ "Mark the functions named by NAMES as being subject to call counting
+during statistical profiling. If a string is used as a name, it will
+be interpreted as a package name. In this case call counting will be
+done for all functions with names like X or (SETF X), where X is a symbol
+with the package as its home package."
+ (dolist (name names)
+ (if (stringp name)
+ (let ((package (find-package name)))
+ (do-symbols (symbol package)
+ (when (eql (symbol-package symbol) package)
+ (dolist (function-name (list symbol (list 'setf symbol)))
+ (profile-call-counts-for-function function-name)))))
+ (profile-call-counts-for-function name))))
+
+(defun profile-call-counts-for-function (function-name)
+ (unless (gethash function-name *encapsulations*)
+ (setf (gethash function-name *encapsulations*) nil)))
+
+(defun unprofile-call-counts ()
+ "Clear all call counting information. Call counting will be done for no
+functions during statistical profiling."
+ (clrhash *encapsulations*))
+
+;;; Called when profiling is started to enable the call counting
+;;; encapsulation. Wrap all the call counted functions
+(defun enable-call-counting ()
+ (maphash (lambda (k v)
+ (declare (ignore v))
+ (enable-call-counting-for-function k))
+ *encapsulations*))
+
+;;; Called when profiling is stopped to disable the encapsulation. Restore
+;;; the original functions.
+(defun disable-call-counting ()
+ (maphash (lambda (k v)
+ (when v
+ (assert (cdr v))
+ (without-package-locks
+ (setf (fdefinition k) (cdr v)))
+ (setf (cdr v) nil)))
+ *encapsulations*))
+
+(defun enable-call-counting-for-function (function-name)
+ (let ((info (gethash function-name *encapsulations*)))
+ ;; We should never try to encapsulate an fdefn multiple times.
+ (assert (or (null info)
+ (null (cdr info))))
+ (when (and (fboundp function-name)
+ (or (not (symbolp function-name))
+ (and (not (special-operator-p function-name))
+ (not (macro-function function-name)))))
+ (let* ((original-fun (fdefinition function-name))
+ (info (cons 0 original-fun)))
+ (setf (gethash function-name *encapsulations*) info)
+ (without-package-locks
+ (setf (fdefinition function-name)
+ (sb-int:named-lambda call-counter (sb-int:&more more-context more-count)
+ (declare (optimize speed (safety 0)))
+ ;; 2^59 calls should be enough for anybody, and it
+ ;; allows using fixnum arithmetic on x86-64. 2^32
+ ;; isn't enough, so we can't do that on 32 bit platforms.
+ (incf (the (unsigned-byte 59)
+ (car info)))
+ (multiple-value-call original-fun
+ (sb-c:%more-arg-values more-context
+ 0
+ more-count)))))))))
+
+\f
;;; silly examples
(defun test-0 (n &optional (depth 0))