0.9.13.26: Life after heap exhaustion on GENCGC
authorNikodemus Siivola <nikodemus@random-state.net>
Mon, 5 Jun 2006 12:25:54 +0000 (12:25 +0000)
committerNikodemus Siivola <nikodemus@random-state.net>
Mon, 5 Jun 2006 12:25:54 +0000 (12:25 +0000)
 * If gc_find_freeish_pages fails, but managed to find _some_
   memory, and we are not in GC, signal a HEAP-EXHAUSTED-ERROR.
 * A few stray s/0/FREE_PAGE_FLAG/'s.
 * Fix fprintf control string in print_generation_stats.
 * *CONTROL-STACK-EXHAUSTION-SAP* is unused, get rid of it.

NEWS
package-data-list.lisp-expr
src/code/error.lisp
src/code/gc.lisp
src/code/interr.lisp
src/compiler/generic/parms.lisp
src/runtime/gencgc.c
version.lisp-expr

diff --git a/NEWS b/NEWS
index 8a04c42..4a3386d 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,8 @@
 changes in sbcl-0.9.14 relative to sbcl-0.9.13:
   * feature: thread support on Solaris/x86, and experimental thread support
     on OS X/x86.
+  * feature: SBCL now tries to signal a STORAGE-CONDITION when running out
+    of heap.
   * minor incompatible change: prevent the user from specializing the
     new-value argument to SB-MOP:SLOT-VALUE-USING-CLASS.  It's
     somewhat counter to the intent of the protocol, I (CSR) think, and
index f9289ef..e5a878e 100644 (file)
@@ -1171,7 +1171,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "*CURRENT-LEVEL-IN-PRINT*"
                "*EMPTY-TYPE*" "*GC-INHIBIT*" "*GC-PENDING*"
                #!+sb-thread "*STOP-FOR-GC-PENDING*"
-               "*CONTROL-STACK-EXHAUSTION-SAP*" "*UNIVERSAL-TYPE*"
+               "*UNIVERSAL-TYPE*"
                "*UNIVERSAL-FUN-TYPE*" "*UNPARSE-FUN-TYPE-SIMPLIFY*"
                "*WILD-TYPE*" "WORD-LOGICAL-AND" "WORD-LOGICAL-ANDC1"
                "WORD-LOGICAL-ANDC2" "WORD-LOGICAL-EQV"
index 3a02c24..8666181 100644 (file)
       (format stream
              "Control stack exhausted (no more space for function call frames).  This is probably due to heavily nested or infinitely recursive function calls, or a tail call that SBCL cannot or has not optimized away."))))
 
+(define-condition heap-exhausted-error (storage-condition)
+  ()
+  (:report
+   (lambda (condition stream)
+     (declare (special *heap-exhausted-error-available-bytes*
+                       *heap-exhausted-error-requested-bytes*))
+     ;; See comments in interr.lisp -- there is a method to this madness.
+     (if (and (boundp '*heap-exhausted-error-available-bytes*)
+              (boundp '*heap-exhausted-error-requested-bytes*))
+         (format stream
+                 "Heap exhausted: ~D bytes available, ~D requested. PROCEED WITH CAUTION!"
+                 *heap-exhausted-error-available-bytes*
+                 *heap-exhausted-error-requested-bytes*)
+         (print-unreadable-object (condition stream))))))
+
 (define-condition memory-fault-error (error)
   ()
   (:report
    (lambda (condition stream)
      (declare (ignore condition))
-     (format stream "memory fault"))))
\ No newline at end of file
+     (format stream "memory fault"))))
index cf5451e..04562f0 100644 (file)
 (defun gc-reinit ()
   (gc-on)
   (gc)
-  (setf *n-bytes-freed-or-purified* 0))
+  (setf *n-bytes-freed-or-purified* 0
+        ;; See comment in interr.lisp
+        *heap-exhausted-error-condition* (make-condition 'heap-exhausted-error)))
 
 (declaim (ftype (function () unsigned-byte) get-bytes-consed))
 (defun get-bytes-consed ()
@@ -211,6 +213,9 @@ environment these hooks may run in any thread.")
       ;; Outside the mutex, these may cause another GC. FIXME: it can
       ;; potentially exceed maximum interrupt nesting by triggering
       ;; GCs.
+      ;;
+      ;; Can that be avoided by having the finalizers and hooks run only
+      ;; from the outermost SUB-GC?
       (run-pending-finalizers)
       (dolist (hook *after-gc-hooks*)
         (handler-case
index 5bb783f..f5b46bc 100644 (file)
              "Control stack guard page temporarily disabled: proceed with caution~%")
      (error 'control-stack-exhausted))))
 
+;;; KLUDGE: we keep a single HEAP-EXHAUSTED-ERROR object around, so
+;;; that we don't need to allocate it when running out of memory. Similarly
+;;; we pass the amounts in special variables as there may be multiple threads
+;;; running into trouble at the same time. The condition is created by GC-REINIT.
+(defvar *heap-exhausted-error-condition*)
+(defvar *heap-exhausted-error-available-bytes*)
+(defvar *heap-exhausted-error-requested-bytes*)
+
+(defun heap-exhausted-error (available requested)
+  (infinite-error-protect
+   (let ((*heap-exhausted-error-available-bytes* available)
+         (*heap-exhausted-error-requested-bytes* requested))
+     (error *heap-exhausted-error-condition*))))
+
 (defun undefined-alien-variable-error ()
   (error 'undefined-alien-variable-error))
 
index b75954a..79ebe65 100644 (file)
@@ -16,6 +16,7 @@
   '(sub-gc
     sb!kernel::internal-error
     sb!kernel::control-stack-exhausted-error
+    sb!kernel::heap-exhausted-error
     sb!kernel::undefined-alien-variable-error
     sb!kernel::undefined-alien-function-error
     sb!kernel::memory-fault-error
@@ -52,4 +53,4 @@
     sb!unix::*interrupts-enabled*
     sb!unix::*interrupt-pending*
     *gc-inhibit*
-    *gc-pending*))
\ No newline at end of file
+    *gc-pending*))
index 66f181b..c8c6919 100644 (file)
@@ -43,6 +43,7 @@
 #include "thread.h"
 #include "genesis/vector.h"
 #include "genesis/weak-pointer.h"
+#include "genesis/fdefn.h"
 #include "genesis/simple-fun.h"
 #include "save.h"
 #include "genesis/hash-table.h"
@@ -151,6 +152,9 @@ unsigned long auto_gc_trigger = 0;
 generation_index_t from_space;
 generation_index_t new_space;
 
+/* Set to 1 when in GC */
+boolean gc_active_p = 0;
+
 /* should the GC be conservative on stack. If false (only right before
  * saving a core), don't scan the stack / mark pages dont_move. */
 static boolean conservative_stack = 1;
@@ -313,7 +317,7 @@ count_generation_pages(generation_index_t generation)
     long count = 0;
 
     for (i = 0; i < last_free_page; i++)
-        if ((page_table[i].allocated != 0)
+        if ((page_table[i].allocated != FREE_PAGE_FLAG)
             && (page_table[i].gen == generation))
             count++;
     return count;
@@ -326,7 +330,8 @@ count_dont_move_pages(void)
     page_index_t i;
     long count = 0;
     for (i = 0; i < last_free_page; i++) {
-        if ((page_table[i].allocated != 0) && (page_table[i].dont_move != 0)) {
+        if ((page_table[i].allocated != FREE_PAGE_FLAG)
+            && (page_table[i].dont_move != 0)) {
             ++count;
         }
     }
@@ -342,7 +347,8 @@ count_generation_bytes_allocated (generation_index_t gen)
     page_index_t i;
     long result = 0;
     for (i = 0; i < last_free_page; i++) {
-        if ((page_table[i].allocated != 0) && (page_table[i].gen == gen))
+        if ((page_table[i].allocated != FREE_PAGE_FLAG)
+            && (page_table[i].gen == gen))
             result += page_table[i].bytes_used;
     }
     return result;
@@ -422,17 +428,19 @@ print_generation_stats(int verbose) /* FIXME: should take FILE argument */
         gc_assert(generations[i].bytes_allocated
                   == count_generation_bytes_allocated(i));
         fprintf(stderr,
-                "   %1d: %5ld %5ld %5ld %5ld %5ld %5ld %5ld %5ld %8ld %5ld %8ld %4ld %3d %7.4f\n",
+                "   %1d: %5ld %5ld %5ld %5ld %5ld %5ld %5ld %5ld %5ld %8ld %5ld %8ld %4ld %3d %7.4f\n",
                 i,
                 generations[i].alloc_start_page,
                 generations[i].alloc_unboxed_start_page,
                 generations[i].alloc_large_start_page,
                 generations[i].alloc_large_unboxed_start_page,
-                boxed_cnt, unboxed_cnt, large_boxed_cnt, large_unboxed_cnt,
+                boxed_cnt,
+                unboxed_cnt,
+                large_boxed_cnt,
+                large_unboxed_cnt,
                 pinned_cnt,
                 generations[i].bytes_allocated,
-                (count_generation_pages(i)*PAGE_BYTES
-                 - generations[i].bytes_allocated),
+                (count_generation_pages(i)*PAGE_BYTES - generations[i].bytes_allocated),
                 generations[i].gc_trigger,
                 count_write_protect_generation_pages(i),
                 generations[i].num_gc,
@@ -1064,6 +1072,32 @@ gc_alloc_large(long nbytes, int unboxed, struct alloc_region *alloc_region)
 
 static page_index_t gencgc_alloc_start_page = -1;
 
+void
+gc_heap_exhausted_error_or_lose (long available, long requested)
+{
+    /* Write basic information before doing anything else: if we don't
+     * call to lisp this is a must, and even if we do there is always the
+     * danger that we bounce back here before the error has been handled,
+     * or indeed even printed.
+     */
+    fprintf(stderr, "Heap exhausted during %s: %ld bytes available, %ld requested.\n",
+            gc_active_p ? "garbage collection" : "allocation", available, requested);
+    if (gc_active_p || (available == 0)) {
+        /* If we are in GC, or totally out of memory there is no way
+         * to sanely transfer control to the lisp-side of things.
+         */
+        print_generation_stats(1);
+        lose("Heap exhausted, game over.");
+    }
+    else {
+        /* FIXME: assert free_pages_lock held */
+        thread_mutex_unlock(&free_pages_lock);
+        funcall2(SymbolFunction(HEAP_EXHAUSTED_ERROR),
+                 make_fixnum(available), make_fixnum(requested));
+        lose("HEAP-EXHAUSTED-ERROR fell through");
+    }
+}
+
 page_index_t
 gc_find_freeish_pages(page_index_t *restart_page_ptr, long nbytes, int unboxed)
 {
@@ -1106,13 +1140,8 @@ gc_find_freeish_pages(page_index_t *restart_page_ptr, long nbytes, int unboxed)
                 first_page++;
             }
 
-        if (first_page >= NUM_PAGES) {
-            fprintf(stderr,
-                    "Argh! gc_find_free_space failed (first_page), nbytes=%ld.\n",
-                    nbytes);
-            print_generation_stats(1);
-            lose("\n");
-        }
+        if (first_page >= NUM_PAGES)
+            gc_heap_exhausted_error_or_lose(0, nbytes);
 
         gc_assert(page_table[first_page].write_protected == 0);
 
@@ -1137,13 +1166,9 @@ gc_find_freeish_pages(page_index_t *restart_page_ptr, long nbytes, int unboxed)
     } while ((restart_page < NUM_PAGES) && (bytes_found < nbytes));
 
     /* Check for a failure */
-    if ((restart_page >= NUM_PAGES) && (bytes_found < nbytes)) {
-        fprintf(stderr,
-                "Argh! gc_find_freeish_pages failed (restart_page), nbytes=%ld.\n",
-                nbytes);
-        print_generation_stats(1);
-        lose("\n");
-    }
+    if ((restart_page >= NUM_PAGES) && (bytes_found < nbytes))
+        gc_heap_exhausted_error_or_lose(bytes_found, nbytes);
+
     *restart_page_ptr=first_page;
 
     return last_page;
@@ -4386,6 +4411,8 @@ collect_garbage(generation_index_t last_gen)
 
     FSHOW((stderr, "/entering collect_garbage(%d)\n", last_gen));
 
+    gc_active_p = 1;
+
     if (last_gen > HIGHEST_NORMAL_GENERATION+1) {
         FSHOW((stderr,
                "/collect_garbage: last_gen = %d, doing a level 0 GC\n",
@@ -4505,6 +4532,8 @@ collect_garbage(generation_index_t last_gen)
         high_water_mark = 0;
     }
 
+    gc_active_p = 0;
+
     SHOW("returning from collect_garbage");
 }
 
index a843b22..7ba8bbe 100644 (file)
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.13.25"
+"0.9.13.26"