X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=contrib%2Fsb-sprof%2Fsb-sprof.lisp;h=cffa4d5945bb1f7b1bb4dac9b1e8fb1b7bfae0eb;hb=905a0fc4c21ff6c8c752b9436e0616b868f1dfcc;hp=fdeba626bfd086a0f44259590f517ad69a02edc7;hpb=8b64d57b865fec6ba082dda965146b5e8aa877b3;p=sbcl.git diff --git a/contrib/sb-sprof/sb-sprof.lisp b/contrib/sb-sprof/sb-sprof.lisp index fdeba62..cffa4d5 100644 --- a/contrib/sb-sprof/sb-sprof.lisp +++ b/contrib/sb-sprof/sb-sprof.lisp @@ -84,9 +84,6 @@ ;;; ;;; Random ideas for implementation: ;;; -;;; * Show a disassembly of a function annotated with sampling -;;; information. -;;; ;;; * Space profiler. Sample when new pages are allocated instead of ;;; at SIGPROF. ;;; @@ -202,14 +199,14 @@ (defun topological-sort (dag) (let ((sorted ()) (dfn -1)) - (labels ((sort (v) + (labels ((rec-sort (v) (setf (vertex-visited v) t) (setf (vertex-dfn v) (incf dfn)) (dolist (e (vertex-edges v)) (unless (vertex-visited (edge-vertex e)) - (sort (edge-vertex e)))) + (rec-sort (edge-vertex e)))) (push v sorted))) - (map-vertices #'sort dag) + (map-vertices #'rec-sort dag) (nreverse sorted)))) ;;; Reduce graph G to a dag by coalescing strongly connected components @@ -372,18 +369,20 @@ ;;; comparison function Test. Assume each element to sort consists of ;;; Element-Size array slots, and that the slot Key-Offset contains ;;; the sort key. -(defun qsort (vec &key (test #'<) (element-size 1) (key-offset 0) +(defun qsort (vec &key (element-size 1) (key-offset 0) (from 0) (to (- (length vec) element-size))) - (declare (fixnum to from element-size) - (function test)) + (declare (type fixnum to from element-size key-offset)) + (declare (type (simple-array address) vec)) (labels ((rotate (i j) + (declare (fixnum i j)) (loop repeat element-size for i from i and j from j do (rotatef (aref vec i) (aref vec j)))) (key (i) (aref vec (+ i key-offset))) - (sort (from to) - (when (> to from) + (rec-sort (from to) + (declare (fixnum to from)) + (when (> to from) (let* ((mid (* element-size (round (+ (/ from element-size) (/ to element-size)) @@ -391,21 +390,31 @@ (i from) (j (+ to element-size)) (p (key mid))) - (declare (fixnum i j)) + (declare (fixnum mid i j)) (rotate mid from) (loop (loop do (incf i element-size) until (or (> i to) - (funcall test p (key i)))) + ;; QSORT used to take a test + ;; parameter which was funcalled + ;; here. This caused some consing, + ;; which is problematic since + ;; QSORT is indirectly called in + ;; an after-gc-hook. So just + ;; hardcode >, which would've been + ;; used for the test anyway. + ;; --JES, 2004-07-09 + (> p (key i)))) (loop do (decf j element-size) until (or (<= j from) - (funcall test (key j) p))) + ;; As above. + (> (key j) p))) (when (< j i) (return)) (rotate i j)) (rotate from j) - (sort from (- j element-size)) - (sort i to))))) - (sort from to) + (rec-sort from (- j element-size)) + (rec-sort i to))))) + (rec-sort from to) vec)) @@ -414,7 +423,7 @@ (deftype address () "Type used for addresses, for instance, program counters, code start/end locations etc." - '(unsigned-byte #+alpha 64 #-alpha 32)) + '(unsigned-byte #.sb-vm::n-machine-word-bits)) (defconstant +unknown-address+ 0 "Constant representing an address that cannot be determined.") @@ -499,7 +508,9 @@ "Default number of samples taken.") (declaim (type sb-impl::index *max-samples*)) -(defconstant +sample-size+ 2) +(defconstant +sample-size+ + #+(or x86 x86-64) 8 + #-(or x86 x86-64) 2) (defvar *samples* nil) (declaim (type (or null (vector address)) *samples*)) @@ -527,7 +538,7 @@ (defun show-progress (format-string &rest args) (when *show-progress* - (apply #'format t format-string args) + (apply #'format t format-string args) (finish-output))) (defun start-sampling () @@ -543,17 +554,15 @@ `(let ((*sampling* ,on)) ,@body)) -(defun sort-samples (&key test (key :pc)) +(defun sort-samples (key-offset) "Sort *Samples* using comparison Test. Key must be one of :Pc or :Return-Pc for sorting by pc or return pc." - (declare (type (member :pc :return-pc) key)) (when (plusp *samples-index*) (qsort *samples* :from 0 :to (- *samples-index* +sample-size+) - :test test :element-size +sample-size+ - :key-offset (if (eq key :pc) 0 1)))) + :key-offset key-offset))) (defun record (pc) (declare (type address pc)) @@ -562,23 +571,44 @@ ;;; SIGPROF handler. Record current PC and return address in ;;; *SAMPLES*. -#+x86 +#+(or x86 x86-64) (defun sigprof-handler (signal code scp) (declare (ignore signal code) (type system-area-pointer scp)) (when (and *sampling* (< *samples-index* (length *samples*))) (sb-sys:without-gcing - (with-alien ((scp (* os-context-t) :local scp)) - (locally (declare (optimize (inhibit-warnings 2))) - (let* ((pc-ptr (sb-vm:context-pc scp)) - (fp (sb-vm::context-register scp #.sb-vm::ebp-offset)) - (ra (sap-ref-32 (int-sap fp) - (- (* (1+ sb-vm::return-pc-save-offset) - sb-vm::n-word-bytes))))) - (record (sap-int pc-ptr)) - (record ra))))))) - -#-x86 + (locally (declare (optimize (inhibit-warnings 2))) + (with-alien ((scp (* os-context-t) :local scp)) + ;; For some reason completely bogus small values for the + ;; frame pointer are returned every now and then, leading + ;; to segfaults. Try to avoid these cases. + ;; + ;; FIXME: Do a more thorough sanity check on ebp, or figure + ;; out why this is happening. + ;; -- JES, 2005-01-11 + (when (< (sb-vm::context-register scp #.sb-vm::ebp-offset) + 4096) + (dotimes (i +sample-size+) + (record 0)) + (return-from sigprof-handler nil)) + (let* ((pc-ptr (sb-vm:context-pc scp)) + (fp (sb-vm::context-register scp #.sb-vm::ebp-offset))) + (record (sap-int pc-ptr)) + (let ((fp (int-sap fp)) + ra) + (dotimes (i (1- +sample-size+)) + (cond (fp + (setf (values ra fp) + (sb-di::x86-call-context fp :depth i)) + (record (if ra + (sap-int ra) + 0))) + (t + (record 0))))))))))) + +;; FIXME: On non-x86 platforms we don't yet walk the call stack deeper +;; than one level. +#-(or x86 x86-64) (defun sigprof-handler (signal code scp) (declare (ignore signal code)) (when (and *sampling* @@ -588,7 +618,7 @@ (locally (declare (optimize (inhibit-warnings 2))) (let* ((pc-ptr (sb-vm:context-pc scp)) (fp (sb-vm::context-register scp #.sb-vm::cfp-offset)) - (ra (sap-ref-32 + (ra (sap-ref-word (int-sap fp) (* sb-vm::lra-save-offset sb-vm::n-word-bytes)))) (record (sap-int pc-ptr)) @@ -626,13 +656,9 @@ *dynamic-space-code-info*)))) (map-dynamic-space-code #'record-address))) -;;; Adjust pcs or return-pcs in *SAMPLES* for address changes of -;;; dynamic-space code objects. KEY being :PC means adjust pcs. -(defun adjust-samples (key) - (declare (type (member :pc :return-pc) key)) - (sort-samples :test #'> :key key) - (let ((sidx 0) - (offset (if (eq key :pc) 0 1))) +(defun adjust-samples (offset) + (sort-samples offset) + (let ((sidx 0)) (declare (type sb-impl::index sidx)) (dolist (info *dynamic-space-code-info*) (unless (= (dyninfo-new-start info) (dyninfo-start info)) @@ -663,8 +689,9 @@ (dolist (info *dynamic-space-code-info*) (setf (dyninfo-new-start info) (code-start (dyninfo-code info)))) - (adjust-samples :pc) - (adjust-samples :return-pc) + (progn + (dotimes (i +sample-size+) + (adjust-samples i))) (dolist (info *dynamic-space-code-info*) (let ((size (- (dyninfo-end info) (dyninfo-start info)))) (setf (dyninfo-start info) (dyninfo-new-start info)) @@ -791,8 +818,10 @@ (%make-node :name name :start-pc (+ start-pc start-offset) :end-pc (+ start-pc end-offset)))) - (t - (%make-node :name (sb-di::debug-fun-name info))))) + (sb-di::debug-fun + (%make-node :name (sb-di::debug-fun-name info))) + (t + (%make-node :name (coerce info 'string))))) ;;; Return something serving as debug info for address PC. If we can ;;; get something from SB-DI:DEBUG-FUNCTION-FROM-PC, return that. @@ -801,17 +830,23 @@ (defun debug-info (pc) (declare (type address pc)) (let ((ptr (sb-di::component-ptr-from-pc (int-sap pc)))) - (unless (sap= ptr (int-sap 0)) - (let* ((code (sb-di::component-from-component-ptr ptr)) - (code-header-len (* (sb-kernel:get-header-data code) - sb-vm:n-word-bytes)) - (pc-offset (- pc - (- (sb-kernel:get-lisp-obj-address code) - sb-vm:other-pointer-lowtag) - code-header-len)) - (df (ignore-errors (sb-di::debug-fun-from-pc code - pc-offset)))) - (or df code))))) + (cond ((sap= ptr (int-sap 0)) + (let ((name (foreign-symbol-in-address (int-sap pc)))) + (when name + (format nil "foreign function ~a" name)))) + (t + (let* ((code (sb-di::component-from-component-ptr ptr)) + (code-header-len (* (sb-kernel:get-header-data code) + sb-vm:n-word-bytes)) + (pc-offset (- pc + (- (sb-kernel:get-lisp-obj-address code) + sb-vm:other-pointer-lowtag) + code-header-len)) + (df (ignore-errors (sb-di::debug-fun-from-pc code + pc-offset)))) + (or df + code)))))) + ;;; One function can have more than one COMPILED-DEBUG-FUNCTION with ;;; the same name. Reduce the number of calls to Debug-Info by first @@ -868,10 +903,11 @@ collect node)) ;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*. -(defun make-call-graph-1 () - (let ((elsewhere-count 0)) +(defun make-call-graph-1 (depth) + (let ((elsewhere-count 0) + visited-nodes) (with-lookup-tables () - (loop for i below *samples-index* by +sample-size+ + (loop for i below (1- *samples-index*) ;; by +sample-size+ as pc = (aref *samples* i) as return-pc = (aref *samples* (1+ i)) as callee = (lookup-node pc) @@ -880,22 +916,33 @@ (let ((caller (lookup-node return-pc))) (when caller caller))) - when (and *show-progress* (plusp i)) do + do + (when (and *show-progress* (plusp i)) (cond ((zerop (mod i 1000)) (show-progress "~d" i)) ((zerop (mod i 100)) - (show-progress "."))) - if callee do - (incf (node-count callee)) - else do - (incf elsewhere-count) - when (and callee caller) do - (let ((call (find callee (node-edges caller) - :key #'call-vertex))) - (pushnew caller (node-callers callee)) - (if call - (incf (call-count call)) - (push (make-call callee) (node-edges caller))))) + (show-progress ".")))) + (when (< (mod i +sample-size+) depth) + (when (= (mod i +sample-size+) 0) + (setf visited-nodes nil) + (cond (callee + (incf (node-accrued-count callee)) + (incf (node-count callee))) + (t + (incf elsewhere-count)))) + (when callee + (push callee visited-nodes)) + (when caller + (unless (member caller visited-nodes) + (incf (node-accrued-count caller))) + (when callee + (let ((call (find callee (node-edges caller) + :key #'call-vertex))) + (pushnew caller (node-callers callee)) + (if call + (unless (member caller visited-nodes) + (incf (call-count call))) + (push (make-call callee) (node-edges caller)))))))) (let ((sorted-nodes (sort (collect-nodes) #'> :key #'node-count))) (loop for node in sorted-nodes and i from 1 do (setf (node-index node) i)) @@ -935,16 +982,16 @@ ;;; *SAMPLES*. The result contain a list of nodes sorted by self-time ;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles ;;; reduced to CYCLE structures. -(defun make-call-graph () +(defun make-call-graph (depth) (stop-profiling) (show-progress "~&Computing call graph ") - (let ((call-graph (without-gcing (make-call-graph-1)))) + (let ((call-graph (without-gcing (make-call-graph-1 depth)))) (setf (call-graph-flat-nodes call-graph) (copy-list (graph-vertices call-graph))) (show-progress "~&Finding cycles") (reduce-call-graph call-graph) (show-progress "~&Propagating counts") - (compute-accrued-counts call-graph) + #+nil (compute-accrued-counts call-graph) call-graph)) @@ -982,8 +1029,8 @@ 0))) (when print-header (print-call-graph-header call-graph)) - (format t "~& Self Total~%") - (format t "~& Nr Count % Count % Function~%") + (format t "~& Self Cumul Total~%") + (format t "~& Nr Count % Count % Count % Function~%") (print-separator) (let ((elsewhere-count (call-graph-elsewhere-count call-graph)) (i 0)) @@ -992,16 +1039,21 @@ (< (node-count node) min-count)) (return)) (let* ((count (node-count node)) - (percent (samples-percent call-graph count))) + (percent (samples-percent call-graph count)) + (accrued-count (node-accrued-count node)) + (accrued-percent (samples-percent call-graph accrued-count))) (incf total-count count) (incf total-percent percent) - (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~s~%" + (format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~s~%" (node-index node) count percent + accrued-count + accrued-percent total-count total-percent - (node-name node)))) + (node-name node)) + (finish-output))) (print-separator) (format t "~& ~6d ~5,1f elsewhere~%" elsewhere-count @@ -1013,7 +1065,7 @@ (format t "~& Count % Parts~%") (do-vertices (node call-graph) (when (cycle-p node) - (flet ((print (indent index count percent name) + (flet ((print-info (indent index count percent name) (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%" count percent indent name index))) (print-separator) @@ -1022,9 +1074,9 @@ (samples-percent call-graph (cycle-count node)) (node-name node)) (dolist (v (vertex-scc-vertices node)) - (print 4 (node-index v) (node-count v) - (samples-percent call-graph (node-count v)) - (node-name v)))))) + (print-info 4 (node-index v) (node-count v) + (samples-percent call-graph (node-count v)) + (node-name v)))))) (print-separator) (format t "~2%"))) @@ -1036,7 +1088,7 @@ (print-cycles call-graph) (flet ((find-call (from to) (find to (node-edges from) :key #'call-vertex)) - (print (indent index count percent name) + (print-info (indent index count percent name) (format t "~&~6d ~5,1f ~11@t ~V@t ~s [~d]~%" count percent indent name index))) (format t "~& Callers~%") @@ -1048,10 +1100,10 @@ ;; Print caller information. (dolist (caller (node-callers node)) (let ((call (find-call caller node))) - (print 4 (node-index caller) - (call-count call) - (samples-percent call-graph (call-count call)) - (node-name caller)))) + (print-info 4 (node-index caller) + (call-count call) + (samples-percent call-graph (call-count call)) + (node-name caller)))) ;; Print the node itself. (format t "~&~6d ~5,1f ~6d ~5,1f ~s [~d]~%" (node-count node) @@ -1062,10 +1114,10 @@ (node-index node)) ;; Print callees. (do-edges (call called node) - (print 4 (node-index called) - (call-count call) - (samples-percent call-graph (call-count call)) - (node-name called)))) + (print-info 4 (node-index called) + (call-count call) + (samples-percent call-graph (call-count call)) + (node-name called)))) (print-separator) (format t "~2%") (print-flat call-graph :stream stream :max max @@ -1101,8 +1153,7 @@ Value of this function is a Call-Graph object representing the resulting call-graph." - (declare (type report-type type)) - (let ((graph (or call-graph (make-call-graph)))) + (let ((graph (or call-graph (make-call-graph (1- +sample-size+))))) (ecase type (:flat (print-flat graph :stream stream :max max :min-percent min-percent)) @@ -1111,6 +1162,26 @@ ((nil))) graph)) +;;; Interface to DISASSEMBLE + +(defun add-disassembly-profile-note (chunk stream dstate) + (declare (ignore chunk stream)) + (unless (zerop *samples-index*) + (let* ((location + (+ (sb-disassem::seg-virtual-location + (sb-disassem:dstate-segment dstate)) + (sb-disassem::dstate-cur-offs dstate))) + (samples (loop for x from 0 below *samples-index* by +sample-size+ + summing (if (= (aref *samples* x) location) + 1 + 0)))) + (unless (zerop samples) + (sb-disassem::note (format nil "~A/~A samples" + samples (/ *samples-index* +sample-size+)) + dstate))))) + +(pushnew 'add-disassembly-profile-note sb-disassem::*default-dstate-hooks*) + ;;; silly examples (defun test-0 (n &optional (depth 0))