From: Nikodemus Siivola Date: Mon, 1 Aug 2011 13:46:26 +0000 (+0300) Subject: more robust backtraces for syscalls on x86 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=e7b2c507c364da9395ad1f9591210dac44f24afd;p=sbcl.git more robust backtraces for syscalls on x86 * new optimization policy: ALIEN-FUNCALL-SAVES-FP-AND-PC Set to 3 for self-build on x86 to get reliable more backtraces there, and 0 for other platforms. (1 matches the old SPEED <= DEBUG behaviour.) * When using a saved FP, and an interrupt context has a bogus FP, assume it is an interrupted syscall frame. --- diff --git a/NEWS b/NEWS index 8507a73..1c9327b 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ changes relative to sbcl-1.0.50: (lp#811386) * bug fix: using GCC >= 4.6 to build SBCL on x86 no longer breaks backtraces. (lp#818460) + * bug fix: better backtraces for interrupted syscall frames on x86. + (lp#549673) changes in sbcl-1.0.50 relative to sbcl-1.0.49: * enhancement: errors from FD handlers now provide a restart to remove diff --git a/make-host-2.lisp b/make-host-2.lisp index 5af5a98..72ce598 100644 --- a/make-host-2.lisp +++ b/make-host-2.lisp @@ -27,7 +27,9 @@ ;; sbcl-internal optimization declarations: ;; ;; never insert stepper conditions - (sb!c:insert-step-conditions 0))))) + (sb!c:insert-step-conditions 0) + ;; save FP and PC for alien calls -- or not + (sb!c:alien-funcall-saves-fp-and-pc #!+x86 3 #!-x86 0))))) (compile 'proclaim-target-optimization) (defun in-target-cross-compilation-mode (fun) diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index a48e6d9..e5bffc9 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -219,6 +219,7 @@ of SBCL which maintained the CMU-CL-style split into two packages.)" #!+x86 "SET-FPU-WORD-FOR-C" #!+x86 "SET-FPU-WORD-FOR-LISP" "ALIGN-STACK-POINTER" + "ALIEN-FUNCALL-SAVES-FP-AND-PC" "ALLOC-ALIEN-STACK-SPACE" "ALLOC-NUMBER-STACK-SPACE" "ALLOCATE-CODE-OBJECT" "ALLOCATE-FRAME" "ALLOCATE-DYNAMIC-CODE-OBJECT" "ALLOCATE-FULL-CALL-FRAME" diff --git a/src/code/debug-int.lisp b/src/code/debug-int.lisp index f0e34f8..5059ce9 100644 --- a/src/code/debug-int.lisp +++ b/src/code/debug-int.lisp @@ -634,7 +634,8 @@ (when saved-fp (compute-calling-frame (descriptor-sap saved-fp) (descriptor-sap saved-pc) - up-frame)))) + up-frame + t)))) ;;; Return the frame immediately below FRAME on the stack; or when ;;; FRAME is the bottom of the stack, return NIL. @@ -788,13 +789,14 @@ escaped)))))) #!+(or x86 x86-64) -(defun compute-calling-frame (caller ra up-frame) +(defun compute-calling-frame (caller ra up-frame &optional savedp) (declare (type system-area-pointer caller ra)) (/noshow0 "entering COMPUTE-CALLING-FRAME") (when (control-stack-pointer-valid-p caller) (/noshow0 "in WHEN") ;; First check for an escaped frame. - (multiple-value-bind (code pc-offset escaped) (find-escaped-frame caller) + (multiple-value-bind (code pc-offset escaped off-stack) + (find-escaped-frame caller) (/noshow0 "at COND") (cond (code ;; If it's escaped it may be a function end breakpoint trap. @@ -828,7 +830,11 @@ (code-location-from-pc d-fun pc-offset escaped) (if up-frame (1+ (frame-number up-frame)) 0) - escaped))))) + ;; If we have an interrupt-context that's not on + ;; our stack at all, and we're computing the + ;; from from a saved FP, we're probably looking + ;; at an interrupted syscall. + (or escaped (and savedp off-stack))))))) (defun nth-interrupt-context (n) (declare (type (unsigned-byte 32) n) @@ -844,101 +850,101 @@ (declare (type system-area-pointer frame-pointer)) (/noshow0 "entering FIND-ESCAPED-FRAME") (dotimes (index *free-interrupt-context-index* (values nil 0 nil)) - (/noshow0 "at head of WITH-ALIEN") - (let ((context (nth-interrupt-context index))) - (/noshow0 "got CONTEXT") - (when (= (sap-int frame-pointer) - (sb!vm:context-register context sb!vm::cfp-offset)) - (without-gcing - (/noshow0 "in WITHOUT-GCING") - (let* ((component-ptr (component-ptr-from-pc - (sb!vm:context-pc context))) - (code (unless (sap= component-ptr (int-sap #x0)) - (component-from-component-ptr component-ptr)))) - (/noshow0 "got CODE") - (when (null code) - (return (values code 0 context))) - (let* ((code-header-len (* (get-header-data code) - sb!vm:n-word-bytes)) - (pc-offset + (let* ((context (nth-interrupt-context index)) + (cfp (int-sap (sb!vm:context-register context sb!vm::cfp-offset)))) + (/noshow0 "got CONTEXT") + (unless (control-stack-pointer-valid-p cfp) + (return (values nil nil nil t))) + (when (sap= frame-pointer cfp) + (without-gcing + (/noshow0 "in WITHOUT-GCING") + (let* ((component-ptr (component-ptr-from-pc + (sb!vm:context-pc context))) + (code (unless (sap= component-ptr (int-sap #x0)) + (component-from-component-ptr component-ptr)))) + (/noshow0 "got CODE") + (when (null code) + (return (values code 0 context))) + (let* ((code-header-len (* (get-header-data code) + sb!vm:n-word-bytes)) + (pc-offset (- (sap-int (sb!vm:context-pc context)) (- (get-lisp-obj-address code) sb!vm:other-pointer-lowtag) code-header-len))) - (/noshow "got PC-OFFSET") - (unless (<= 0 pc-offset - (* (code-header-ref code sb!vm:code-code-size-slot) - sb!vm:n-word-bytes)) - ;; We were in an assembly routine. Therefore, use the - ;; LRA as the pc. - ;; - ;; FIXME: Should this be WARN or ERROR or what? - (format t "** pc-offset ~S not in code obj ~S?~%" - pc-offset code)) - (/noshow0 "returning from FIND-ESCAPED-FRAME") - (return - (values code pc-offset context))))))))) + (/noshow "got PC-OFFSET") + (unless (<= 0 pc-offset + (* (code-header-ref code sb!vm:code-code-size-slot) + sb!vm:n-word-bytes)) + ;; We were in an assembly routine. Therefore, use the + ;; LRA as the pc. + ;; + ;; FIXME: Should this be WARN or ERROR or what? + (format t "** pc-offset ~S not in code obj ~S?~%" + pc-offset code)) + (/noshow0 "returning from FIND-ESCAPED-FRAME") + (return + (values code pc-offset context))))))))) #!-(or x86 x86-64) (defun find-escaped-frame (frame-pointer) (declare (type system-area-pointer frame-pointer)) (/noshow0 "entering FIND-ESCAPED-FRAME") (dotimes (index *free-interrupt-context-index* (values nil 0 nil)) - (/noshow0 "at head of WITH-ALIEN") (let ((scp (nth-interrupt-context index))) - (/noshow0 "got SCP") + (/noshow0 "got SCP") (when (= (sap-int frame-pointer) (sb!vm:context-register scp sb!vm::cfp-offset)) (without-gcing - (/noshow0 "in WITHOUT-GCING") - (let ((code (code-object-from-bits - (sb!vm:context-register scp sb!vm::code-offset)))) - (/noshow0 "got CODE") - (when (symbolp code) - (return (values code 0 scp))) - (let* ((code-header-len (* (get-header-data code) - sb!vm:n-word-bytes)) - (pc-offset - (- (sap-int (sb!vm:context-pc scp)) - (- (get-lisp-obj-address code) - sb!vm:other-pointer-lowtag) - code-header-len))) - (let ((code-size (* (code-header-ref code - sb!vm:code-code-size-slot) - sb!vm:n-word-bytes))) - (unless (<= 0 pc-offset code-size) - ;; We were in an assembly routine. - (multiple-value-bind (new-pc-offset computed-return) - (find-pc-from-assembly-fun code scp) - (setf pc-offset new-pc-offset) - (unless (<= 0 pc-offset code-size) - (cerror - "Set PC-OFFSET to zero and continue backtrace." - 'bug - :format-control - "~@" - :format-arguments - (list pc-offset - (sap-int (sb!vm:context-pc scp)) - code - (%code-entry-points code) - (sb!vm:context-register scp sb!vm::lra-offset) - computed-return)) - ;; We failed to pinpoint where PC is, but set - ;; pc-offset to 0 to keep the backtrace from - ;; exploding. - (setf pc-offset 0))))) - (/noshow0 "returning from FIND-ESCAPED-FRAME") - (return - (if (eq (%code-debug-info code) :bogus-lra) - (let ((real-lra (code-header-ref code - real-lra-slot))) - (values (lra-code-header real-lra) - (get-header-data real-lra) - nil)) - (values code pc-offset scp)))))))))) + :format-arguments + (list pc-offset + (sap-int (sb!vm:context-pc scp)) + code + (%code-entry-points code) + (sb!vm:context-register scp sb!vm::lra-offset) + computed-return)) + ;; We failed to pinpoint where PC is, but set + ;; pc-offset to 0 to keep the backtrace from + ;; exploding. + (setf pc-offset 0))))) + (/noshow0 "returning from FIND-ESCAPED-FRAME") + (return + (if (eq (%code-debug-info code) :bogus-lra) + (let ((real-lra (code-header-ref code + real-lra-slot))) + (values (lra-code-header real-lra) + (get-header-data real-lra) + nil)) + (values code pc-offset scp)))))))))) #!-(or x86 x86-64) (defun find-pc-from-assembly-fun (code scp) diff --git a/src/compiler/aliencomp.lisp b/src/compiler/aliencomp.lisp index 5ca25ac..513c003 100644 --- a/src/compiler/aliencomp.lisp +++ b/src/compiler/aliencomp.lisp @@ -702,7 +702,7 @@ ;; to it later regardless of how the foreign stack looks ;; like. #!+:c-stack-is-control-stack - (when (policy node (<= speed debug)) + (when (policy node (= 3 alien-funcall-saves-fp-and-pc)) (setf body `(invoke-with-saved-fp-and-pc (lambda () ,body)))) (/noshow "returning from DEFTRANSFORM ALIEN-FUNCALL" (params) body) `(lambda (function ,@(params)) diff --git a/src/compiler/policies.lisp b/src/compiler/policies.lisp index 736d287..18437bb 100644 --- a/src/compiler/policies.lisp +++ b/src/compiler/policies.lisp @@ -51,6 +51,12 @@ Enabling this option can increase heap consing of closures.") "Control conversion of &REST argments to &MORE arguments when only used as the final argument to APPLY.") +(define-optimization-quality alien-funcall-saves-fp-and-pc + (if (<= speed debug) 3 0) + ("no" "maybe" "yes" "yes") + "Control ALIEN-FUNCALL saving frame-pointer and program counter for +more reliable bactracing across foreign calls.") + (define-optimization-quality verify-arg-count (if (zerop safety) 0 3) ("no" "maybe" "yes" "yes")) diff --git a/tests/debug.impure.lisp b/tests/debug.impure.lisp index 74aec92..ec0719b 100644 --- a/tests/debug.impure.lisp +++ b/tests/debug.impure.lisp @@ -173,6 +173,18 @@ (list '(flet not-optimized)) (list '(flet test) #'not-optimized)))))) +(with-test (:name :interrupted-syscall) + (let ((m (sb-thread:make-mutex)) + (q (sb-thread:make-waitqueue))) + (assert (verify-backtrace + (lambda () + (sb-thread:with-mutex (m) + (handler-bind ((timeout (lambda (c) + (error "foo")))) + (with-timeout 0.1 + (sb-thread:condition-wait q m))))) + `((sb-thread:condition-wait ,q ,m)))))) + ;;; Division by zero was a common error on PPC. It depended on the ;;; return function either being before INTEGER-/-INTEGER in memory, ;;; or more than MOST-POSITIVE-FIXNUM bytes ahead. It also depends on