1.0.25.33: protect against recursive gcs
authorGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:52:04 +0000 (21:52 +0000)
committerGabor Melis <mega@hotpop.com>
Mon, 16 Feb 2009 21:52:04 +0000 (21:52 +0000)
... while holding the *already-in-gc* lock instead of allowing gc to
trigger and then punting.

src/code/gc.lisp
src/runtime/backtrace.c
src/runtime/gencgc.c
src/runtime/interrupt.c
version.lisp-expr

index e94a392..f3b9615 100644 (file)
@@ -196,57 +196,72 @@ run in any thread.")
 (defvar *gc-epoch* (cons nil nil))
 
 (defun sub-gc (&key (gen 0))
-  (unless (sb!thread:holding-mutex-p *already-in-gc*)
-    ;; With gencgc, unless *GC-PENDING* every allocation in this
-    ;; function triggers another gc, potentially exceeding maximum
-    ;; interrupt nesting. If *GC-INHIBIT* is not true, however,
-    ;; there is no guarantee that we would ever check for pending
-    ;; GC -- so in that case we must first disable interrupts, which
-    ;; needs to be done for GC anyways...
-    (cond (*gc-inhibit*
-           (setf *gc-pending* t))
-          (t
-           (without-interrupts
-             (setf *gc-pending* t)
-             (sb!thread:with-mutex (*already-in-gc*)
-               (let ((old-usage (dynamic-usage))
-                     (new-usage 0))
-                 (unsafe-clear-roots)
-
-                 (gc-stop-the-world)
-                 (let ((start-time (get-internal-run-time)))
-                   (collect-garbage gen)
-                   (setf *gc-epoch* (cons nil nil))
-                   (incf *gc-run-time*
-                         (- (get-internal-run-time) start-time)))
-                 (setf *gc-pending* nil
-                       new-usage (dynamic-usage))
-                 (gc-start-the-world)
-
-                 ;; In a multithreaded environment the other threads will
-                 ;; see *n-b-f-o-p* change a little late, but that's OK.
-                 (let ((freed (- old-usage new-usage)))
-                   ;; GENCGC occasionally reports negative here, but the
-                   ;; current belief is that it is part of the normal order
-                   ;; of things and not a bug.
-                   (when (plusp freed)
-                     (incf *n-bytes-freed-or-purified* freed))))))
-
-           ;; Outside the mutex, interrupts enabled: these may cause
-           ;; another GC. FIXME: it can potentially exceed maximum
-           ;; interrupt nesting by triggering GCs.
-           ;;
-           ;; Can that be avoided by having the finalizers and hooks
-           ;; run only from the outermost SUB-GC?
+  (cond (*gc-inhibit*
+         (setf *gc-pending* t))
+        (t
+         (without-interrupts
+           (setf *gc-pending* :in-progress)
+           ;; Tricks to to prevent triggerring a recursive gc. This is
+           ;; like a WITHOUT-GCING inside the lock except that we
+           ;; cannot call MAYBE-HANDLE-PENDING-GC at the end, because
+           ;; that would lead to a recursive attempt on the lock. In
+           ;; case you are wondering, wrapping the lock in a
+           ;; WITHOUT-GCING would also deadlock. The
+           ;; *IN-WITHOUT-GCING* part is used to tell the runtime that
+           ;; it's ok to have a pending gc even though *GC-INHIBIT* is
+           ;; NIL.
            ;;
-           ;; KLUDGE: Don't run the hooks in GC's triggered by dying
-           ;; threads, so that user-code never runs with
-           ;;   (thread-alive-p *current-thread*) => nil
-           ;; The long-term solution will be to keep a separate thread
-           ;; for finalizers and after-gc hooks.
-           (when (sb!thread:thread-alive-p sb!thread:*current-thread*)
-             (run-pending-finalizers)
-             (call-hooks "after-GC" *after-gc-hooks* :on-error :warn))))))
+           ;; Now, if GET-MUTEX did not cons, that would be enough.
+           ;; Because it does, we need the :IN-PROGRESS bit above to
+           ;; tell the runtime not to trigger gcs.
+           (let ((sb!impl::*in-without-gcing* t))
+             (sb!thread:with-mutex (*already-in-gc*)
+               (let ((*gc-inhibit* t))
+                 (let ((old-usage (dynamic-usage))
+                       (new-usage 0))
+                   (unsafe-clear-roots)
+                   (gc-stop-the-world)
+                   (let ((start-time (get-internal-run-time)))
+                     (collect-garbage gen)
+                     (setf *gc-epoch* (cons nil nil))
+                     (incf *gc-run-time*
+                           (- (get-internal-run-time) start-time)))
+                   (setf *gc-pending* nil
+                         new-usage (dynamic-usage))
+                   #!+sb-thread
+                   (assert (not *stop-for-gc-pending*))
+                   (gc-start-the-world)
+                   ;; In a multithreaded environment the other threads
+                   ;; will see *n-b-f-o-p* change a little late, but
+                   ;; that's OK.
+                   (let ((freed (- old-usage new-usage)))
+                     ;; GENCGC occasionally reports negative here, but
+                     ;; the current belief is that it is part of the
+                     ;; normal order of things and not a bug.
+                     (when (plusp freed)
+                       (incf *n-bytes-freed-or-purified* freed)))))))
+           ;; While holding the mutex we were protected from
+           ;; SIG_STOP_FOR_GC and recursive GCs. Now, in order to
+           ;; preserve the invariant (*GC-PENDING* ->
+           ;; pseudo-atomic-interrupted or *GC-INHIBIT*), let's check
+           ;; explicitly for a pending gc before interrupts are
+           ;; enabled again.
+           (maybe-handle-pending-gc))
+         ;; Outside the mutex, interrupts enabled: these may cause
+         ;; another GC. FIXME: it can potentially exceed maximum
+         ;; interrupt nesting by triggering GCs.
+         ;;
+         ;; Can that be avoided by having the finalizers and hooks
+         ;; run only from the outermost SUB-GC?
+         ;;
+         ;; KLUDGE: Don't run the hooks in GC's triggered by dying
+         ;; threads, so that user-code never runs with
+         ;;   (thread-alive-p *current-thread*) => nil
+         ;; The long-term solution will be to keep a separate thread
+         ;; for finalizers and after-gc hooks.
+         (when (sb!thread:thread-alive-p sb!thread:*current-thread*)
+           (run-pending-finalizers)
+           (call-hooks "after-GC" *after-gc-hooks* :on-error :warn)))))
 
 ;;; This is the user-advertised garbage collection function.
 (defun gc (&key (gen 0) (full nil) &allow-other-keys)
index 90ae58d..e0e1285 100644 (file)
@@ -548,7 +548,10 @@ describe_thread_state(void)
 #endif
     printf("Specials:\n");
     printf(" *GC-INHIBIT* = %s\n", (SymbolValue(GC_INHIBIT, thread) == T) ? "T" : "NIL");
-    printf(" *GC-PENDING* = %s\n", (SymbolValue(GC_PENDING, thread) == T) ? "T" : "NIL");
+    printf(" *GC-PENDING* = %s\n",
+           (SymbolValue(GC_PENDING, thread) == T) ?
+           "T" : ((SymbolValue(GC_PENDING, thread) == NIL) ?
+                  "NIL" : ":IN-PROGRESS"));
     printf(" *INTERRUPTS-ENABLED* = %s\n", (SymbolValue(INTERRUPTS_ENABLED, thread) == T) ? "T" : "NIL");
 #ifdef STOP_FOR_GC_PENDING
     printf(" *STOP-FOR-GC-PENDING* = %s\n", (SymbolValue(STOP_FOR_GC_PENDING, thread) == T) ? "T" : "NIL");
index 895ced2..60db5a8 100644 (file)
@@ -1172,7 +1172,9 @@ gc_heap_exhausted_error_or_lose (long available, long requested)
         fprintf(stderr, "GC control variables:\n");
         fprintf(stderr, "          *GC-INHIBIT* = %s\n          *GC-PENDING* = %s\n",
                 SymbolValue(GC_INHIBIT,thread)==NIL ? "false" : "true",
-                SymbolValue(GC_PENDING,thread)==NIL ? "false" : "true");
+                (SymbolValue(GC_PENDING, thread) == T) ?
+                "true" : ((SymbolValue(GC_PENDING, thread) == NIL) ?
+                  "false" : "in progress"));
 #ifdef LISP_FEATURE_SB_THREAD
         fprintf(stderr, " *STOP-FOR-GC-PENDING* = %s\n",
                 SymbolValue(STOP_FOR_GC_PENDING,thread)==NIL ? "false" : "true");
index b63f6fb..f63dbf9 100644 (file)
@@ -539,7 +539,10 @@ interrupt_handle_pending(os_context_t *context)
             sig_stop_for_gc_handler(SIG_STOP_FOR_GC,NULL,context);
         } else
 #endif
-        if (SymbolValue(GC_PENDING,thread) != NIL) {
+         /* Test for T and not for != NIL since the value :IN-PROGRESS
+          * is used in SUB-GC as part of the mechanism to supress
+          * recursive gcs.*/
+        if (SymbolValue(GC_PENDING,thread) == T) {
             /* GC_PENDING is cleared in SUB-GC, or if another thread
              * is doing a gc already we will get a SIG_STOP_FOR_GC and
              * that will clear it. */
index 71d5467..85ff558 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.25.32"
+"1.0.25.33"