more robust backtraces for syscalls on x86
authorNikodemus Siivola <nikodemus@sb-studio.net>
Mon, 1 Aug 2011 13:46:26 +0000 (16:46 +0300)
committerNikodemus Siivola <nikodemus@sb-studio.net>
Mon, 1 Aug 2011 15:30:15 +0000 (18:30 +0300)
 * new optimization policy: ALIEN-FUNCALL-SAVES-FP-AND-PC Set to 3 for
   self-build on x86 to get reliable more backtraces there, and 0 for
   other platforms. (1 matches the old SPEED <= DEBUG behaviour.)

 * When using a saved FP, and an interrupt context has a bogus
   FP, assume it is an interrupted syscall frame.

NEWS
make-host-2.lisp
package-data-list.lisp-expr
src/code/debug-int.lisp
src/compiler/aliencomp.lisp
src/compiler/policies.lisp
tests/debug.impure.lisp

diff --git a/NEWS b/NEWS
index 8507a73..1c9327b 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,8 @@ changes relative to sbcl-1.0.50:
     (lp#811386)
   * bug fix: using GCC >= 4.6 to build SBCL on x86 no longer breaks
     backtraces. (lp#818460)
+  * bug fix: better backtraces for interrupted syscall frames on x86.
+    (lp#549673)
 
 changes in sbcl-1.0.50 relative to sbcl-1.0.49:
   * enhancement: errors from FD handlers now provide a restart to remove
index 5af5a98..72ce598 100644 (file)
@@ -27,7 +27,9 @@
        ;; sbcl-internal optimization declarations:
        ;;
        ;; never insert stepper conditions
-       (sb!c:insert-step-conditions 0)))))
+       (sb!c:insert-step-conditions 0)
+       ;; save FP and PC for alien calls -- or not
+       (sb!c:alien-funcall-saves-fp-and-pc #!+x86 3 #!-x86 0)))))
 (compile 'proclaim-target-optimization)
 
 (defun in-target-cross-compilation-mode (fun)
index a48e6d9..e5bffc9 100644 (file)
@@ -219,6 +219,7 @@ of SBCL which maintained the CMU-CL-style split into two packages.)"
                #!+x86 "SET-FPU-WORD-FOR-C"
                #!+x86 "SET-FPU-WORD-FOR-LISP"
                "ALIGN-STACK-POINTER"
+               "ALIEN-FUNCALL-SAVES-FP-AND-PC"
                "ALLOC-ALIEN-STACK-SPACE" "ALLOC-NUMBER-STACK-SPACE"
                "ALLOCATE-CODE-OBJECT" "ALLOCATE-FRAME"
                "ALLOCATE-DYNAMIC-CODE-OBJECT" "ALLOCATE-FULL-CALL-FRAME"
index f0e34f8..5059ce9 100644 (file)
     (when saved-fp
       (compute-calling-frame (descriptor-sap saved-fp)
                              (descriptor-sap saved-pc)
-                             up-frame))))
+                             up-frame
+                             t))))
 
 ;;; Return the frame immediately below FRAME on the stack; or when
 ;;; FRAME is the bottom of the stack, return NIL.
                                  escaped))))))
 
 #!+(or x86 x86-64)
-(defun compute-calling-frame (caller ra up-frame)
+(defun compute-calling-frame (caller ra up-frame &optional savedp)
   (declare (type system-area-pointer caller ra))
   (/noshow0 "entering COMPUTE-CALLING-FRAME")
   (when (control-stack-pointer-valid-p caller)
     (/noshow0 "in WHEN")
     ;; First check for an escaped frame.
-    (multiple-value-bind (code pc-offset escaped) (find-escaped-frame caller)
+    (multiple-value-bind (code pc-offset escaped off-stack)
+        (find-escaped-frame caller)
       (/noshow0 "at COND")
       (cond (code
              ;; If it's escaped it may be a function end breakpoint trap.
                              (code-location-from-pc d-fun pc-offset
                                                     escaped)
                              (if up-frame (1+ (frame-number up-frame)) 0)
-                             escaped)))))
+                             ;; If we have an interrupt-context that's not on
+                             ;; our stack at all, and we're computing the
+                             ;; from from a saved FP, we're probably looking
+                             ;; at an interrupted syscall.
+                             (or escaped (and savedp off-stack)))))))
 
 (defun nth-interrupt-context (n)
   (declare (type (unsigned-byte 32) n)
   (declare (type system-area-pointer frame-pointer))
   (/noshow0 "entering FIND-ESCAPED-FRAME")
   (dotimes (index *free-interrupt-context-index* (values nil 0 nil))
-      (/noshow0 "at head of WITH-ALIEN")
-    (let ((context (nth-interrupt-context index)))
-        (/noshow0 "got CONTEXT")
-        (when (= (sap-int frame-pointer)
-                 (sb!vm:context-register context sb!vm::cfp-offset))
-          (without-gcing
-           (/noshow0 "in WITHOUT-GCING")
-           (let* ((component-ptr (component-ptr-from-pc
-                                  (sb!vm:context-pc context)))
-                  (code (unless (sap= component-ptr (int-sap #x0))
-                          (component-from-component-ptr component-ptr))))
-             (/noshow0 "got CODE")
-             (when (null code)
-               (return (values code 0 context)))
-             (let* ((code-header-len (* (get-header-data code)
-                                        sb!vm:n-word-bytes))
-                    (pc-offset
+    (let* ((context (nth-interrupt-context index))
+           (cfp (int-sap (sb!vm:context-register context sb!vm::cfp-offset))))
+      (/noshow0 "got CONTEXT")
+      (unless (control-stack-pointer-valid-p cfp)
+        (return (values nil nil nil t)))
+      (when (sap= frame-pointer cfp)
+        (without-gcing
+          (/noshow0 "in WITHOUT-GCING")
+          (let* ((component-ptr (component-ptr-from-pc
+                                 (sb!vm:context-pc context)))
+                 (code (unless (sap= component-ptr (int-sap #x0))
+                         (component-from-component-ptr component-ptr))))
+            (/noshow0 "got CODE")
+            (when (null code)
+              (return (values code 0 context)))
+            (let* ((code-header-len (* (get-header-data code)
+                                       sb!vm:n-word-bytes))
+                   (pc-offset
                      (- (sap-int (sb!vm:context-pc context))
                         (- (get-lisp-obj-address code)
                            sb!vm:other-pointer-lowtag)
                         code-header-len)))
-               (/noshow "got PC-OFFSET")
-               (unless (<= 0 pc-offset
-                           (* (code-header-ref code sb!vm:code-code-size-slot)
-                              sb!vm:n-word-bytes))
-                 ;; We were in an assembly routine. Therefore, use the
-                 ;; LRA as the pc.
-                 ;;
-                 ;; FIXME: Should this be WARN or ERROR or what?
-                 (format t "** pc-offset ~S not in code obj ~S?~%"
-                         pc-offset code))
-               (/noshow0 "returning from FIND-ESCAPED-FRAME")
-               (return
-               (values code pc-offset context)))))))))
+              (/noshow "got PC-OFFSET")
+              (unless (<= 0 pc-offset
+                          (* (code-header-ref code sb!vm:code-code-size-slot)
+                             sb!vm:n-word-bytes))
+                ;; We were in an assembly routine. Therefore, use the
+                ;; LRA as the pc.
+                ;;
+                ;; FIXME: Should this be WARN or ERROR or what?
+                (format t "** pc-offset ~S not in code obj ~S?~%"
+                        pc-offset code))
+              (/noshow0 "returning from FIND-ESCAPED-FRAME")
+              (return
+                (values code pc-offset context)))))))))
 
 #!-(or x86 x86-64)
 (defun find-escaped-frame (frame-pointer)
   (declare (type system-area-pointer frame-pointer))
   (/noshow0 "entering FIND-ESCAPED-FRAME")
   (dotimes (index *free-interrupt-context-index* (values nil 0 nil))
-      (/noshow0 "at head of WITH-ALIEN")
     (let ((scp (nth-interrupt-context index)))
-        (/noshow0 "got SCP")
+      (/noshow0 "got SCP")
       (when (= (sap-int frame-pointer)
                (sb!vm:context-register scp sb!vm::cfp-offset))
         (without-gcing
-         (/noshow0 "in WITHOUT-GCING")
-         (let ((code (code-object-from-bits
-                      (sb!vm:context-register scp sb!vm::code-offset))))
-           (/noshow0 "got CODE")
-           (when (symbolp code)
-             (return (values code 0 scp)))
-           (let* ((code-header-len (* (get-header-data code)
-                                      sb!vm:n-word-bytes))
-                  (pc-offset
-                   (- (sap-int (sb!vm:context-pc scp))
-                      (- (get-lisp-obj-address code)
-                         sb!vm:other-pointer-lowtag)
-                      code-header-len)))
-             (let ((code-size (* (code-header-ref code
-                                                  sb!vm:code-code-size-slot)
-                                 sb!vm:n-word-bytes)))
-               (unless (<= 0 pc-offset code-size)
-                 ;; We were in an assembly routine.
-                 (multiple-value-bind (new-pc-offset computed-return)
-                     (find-pc-from-assembly-fun code scp)
-                   (setf pc-offset new-pc-offset)
-                   (unless (<= 0 pc-offset code-size)
-                     (cerror
-                      "Set PC-OFFSET to zero and continue backtrace."
-                      'bug
-                      :format-control
-                      "~@<PC-OFFSET (~D) not in code object. Frame details:~
+          (/noshow0 "in WITHOUT-GCING")
+          (let ((code (code-object-from-bits
+                       (sb!vm:context-register scp sb!vm::code-offset))))
+            (/noshow0 "got CODE")
+            (when (symbolp code)
+              (return (values code 0 scp)))
+            (let* ((code-header-len (* (get-header-data code)
+                                       sb!vm:n-word-bytes))
+                   (pc-offset
+                     (- (sap-int (sb!vm:context-pc scp))
+                        (- (get-lisp-obj-address code)
+                           sb!vm:other-pointer-lowtag)
+                        code-header-len)))
+              (let ((code-size (* (code-header-ref code
+                                                   sb!vm:code-code-size-slot)
+                                  sb!vm:n-word-bytes)))
+                (unless (<= 0 pc-offset code-size)
+                  ;; We were in an assembly routine.
+                  (multiple-value-bind (new-pc-offset computed-return)
+                      (find-pc-from-assembly-fun code scp)
+                    (setf pc-offset new-pc-offset)
+                    (unless (<= 0 pc-offset code-size)
+                      (cerror
+                       "Set PC-OFFSET to zero and continue backtrace."
+                       'bug
+                       :format-control
+                       "~@<PC-OFFSET (~D) not in code object. Frame details:~
                        ~2I~:@_PC: #X~X~:@_CODE: ~S~:@_CODE FUN: ~S~:@_LRA: ~
                        #X~X~:@_COMPUTED RETURN: #X~X.~:>"
-                      :format-arguments
-                      (list pc-offset
-                            (sap-int (sb!vm:context-pc scp))
-                            code
-                            (%code-entry-points code)
-                            (sb!vm:context-register scp sb!vm::lra-offset)
-                            computed-return))
-                     ;; We failed to pinpoint where PC is, but set
-                     ;; pc-offset to 0 to keep the backtrace from
-                     ;; exploding.
-                     (setf pc-offset 0)))))
-             (/noshow0 "returning from FIND-ESCAPED-FRAME")
-             (return
-               (if (eq (%code-debug-info code) :bogus-lra)
-                   (let ((real-lra (code-header-ref code
-                                                    real-lra-slot)))
-                     (values (lra-code-header real-lra)
-                             (get-header-data real-lra)
-                             nil))
-                   (values code pc-offset scp))))))))))
+                       :format-arguments
+                       (list pc-offset
+                             (sap-int (sb!vm:context-pc scp))
+                             code
+                             (%code-entry-points code)
+                             (sb!vm:context-register scp sb!vm::lra-offset)
+                             computed-return))
+                      ;; We failed to pinpoint where PC is, but set
+                      ;; pc-offset to 0 to keep the backtrace from
+                      ;; exploding.
+                      (setf pc-offset 0)))))
+              (/noshow0 "returning from FIND-ESCAPED-FRAME")
+              (return
+                (if (eq (%code-debug-info code) :bogus-lra)
+                    (let ((real-lra (code-header-ref code
+                                                     real-lra-slot)))
+                      (values (lra-code-header real-lra)
+                              (get-header-data real-lra)
+                              nil))
+                    (values code pc-offset scp))))))))))
 
 #!-(or x86 x86-64)
 (defun find-pc-from-assembly-fun (code scp)
index 5ca25ac..513c003 100644 (file)
             ;; to it later regardless of how the foreign stack looks
             ;; like.
             #!+:c-stack-is-control-stack
-            (when (policy node (<= speed debug))
+            (when (policy node (= 3 alien-funcall-saves-fp-and-pc))
               (setf body `(invoke-with-saved-fp-and-pc (lambda () ,body))))
             (/noshow "returning from DEFTRANSFORM ALIEN-FUNCALL" (params) body)
             `(lambda (function ,@(params))
index 736d287..18437bb 100644 (file)
@@ -51,6 +51,12 @@ Enabling this option can increase heap consing of closures.")
   "Control conversion of &REST argments to &MORE arguments when
 only used as the final argument to APPLY.")
 
+(define-optimization-quality alien-funcall-saves-fp-and-pc
+    (if (<= speed debug) 3 0)
+  ("no" "maybe" "yes" "yes")
+  "Control ALIEN-FUNCALL saving frame-pointer and program counter for
+more reliable bactracing across foreign calls.")
+
 (define-optimization-quality verify-arg-count
     (if (zerop safety) 0 3)
   ("no" "maybe" "yes" "yes"))
index 74aec92..ec0719b 100644 (file)
                    (list '(flet not-optimized))
                    (list '(flet test) #'not-optimized))))))
 
+(with-test (:name :interrupted-syscall)
+  (let ((m (sb-thread:make-mutex))
+        (q (sb-thread:make-waitqueue)))
+    (assert (verify-backtrace
+            (lambda ()
+              (sb-thread:with-mutex (m)
+                (handler-bind ((timeout (lambda (c)
+                                          (error "foo"))))
+                  (with-timeout 0.1
+                    (sb-thread:condition-wait q m)))))
+            `((sb-thread:condition-wait ,q ,m))))))
+
 ;;; Division by zero was a common error on PPC. It depended on the
 ;;; return function either being before INTEGER-/-INTEGER in memory,
 ;;; or more than MOST-POSITIVE-FIXNUM bytes ahead. It also depends on