gencgc: bogus memory fault handling
[sbcl.git] / src / runtime / gencgc.c
index fea3194..c143054 100644 (file)
@@ -1454,8 +1454,9 @@ general_copy_large_object(lispobj object, long nwords, boolean boxedp)
         page_index_t next_page;
         page_bytes_t old_bytes_used;
 
-        /* BOXED ONLY? */
-        /* Note: Any page write-protection must be removed, else a
+        /* FIXME: This comment is somewhat stale.
+         *
+         * Note: Any page write-protection must be removed, else a
          * later scavenge_newspace may incorrectly not scavenge these
          * pages. This would not be necessary if they are added to the
          * new areas, but let's do it for them all (they'll probably
@@ -1511,13 +1512,17 @@ general_copy_large_object(lispobj object, long nwords, boolean boxedp)
         next_page++;
         while ((old_bytes_used == GENCGC_CARD_BYTES) &&
                (page_table[next_page].gen == from_space) &&
+               /* FIXME: It is not obvious to me why this is necessary
+                * as a loop condition: it seems to me that the
+                * region_start_offset test should be sufficient, but
+                * experimentally that is not the case. --NS
+                * 2011-11-28 */
+               (boxedp ?
+                page_boxed_p(next_page) :
+                page_allocated_no_region_p(next_page)) &&
                page_table[next_page].large_object &&
                (page_table[next_page].region_start_offset ==
                 npage_bytes(next_page - first_page))) {
-            if (boxedp)
-                gc_assert(page_boxed_p(next_page));
-            else
-                gc_assert(page_allocated_no_region_p(next_page));
             /* Checks out OK, free the page. Don't need to both zeroing
              * pages as this should have been done before shrinking the
              * object. These pages shouldn't be write-protected, even if
@@ -4242,7 +4247,17 @@ void unhandled_sigmemoryfault(void* addr);
  *
  * Return true if this signal is a normal generational GC thing that
  * we were able to handle, or false if it was abnormal and control
- * should fall through to the general SIGSEGV/SIGBUS/whatever logic. */
+ * should fall through to the general SIGSEGV/SIGBUS/whatever logic.
+ *
+ * We have two control flags for this: one causes us to ignore faults
+ * on unprotected pages completely, and the second complains to stderr
+ * but allows us to continue without losing.
+ */
+extern boolean ignore_memoryfaults_on_unprotected_pages;
+boolean ignore_memoryfaults_on_unprotected_pages = 0;
+
+extern boolean continue_after_memoryfault_on_unprotected_pages;
+boolean continue_after_memoryfault_on_unprotected_pages = 0;
 
 int
 gencgc_handle_wp_violation(void* fault_addr)
@@ -4273,17 +4288,39 @@ gencgc_handle_wp_violation(void* fault_addr)
             os_protect(page_address(page_index), GENCGC_CARD_BYTES, OS_VM_PROT_ALL);
             page_table[page_index].write_protected_cleared = 1;
             page_table[page_index].write_protected = 0;
-        } else {
+        } else if (!ignore_memoryfaults_on_unprotected_pages) {
             /* The only acceptable reason for this signal on a heap
              * access is that GENCGC write-protected the page.
              * However, if two CPUs hit a wp page near-simultaneously,
              * we had better not have the second one lose here if it
              * does this test after the first one has already set wp=0
              */
-            if(page_table[page_index].write_protected_cleared != 1)
-                lose("fault in heap page %d not marked as write-protected\nboxed_region.first_page: %d, boxed_region.last_page %d\n",
-                     page_index, boxed_region.first_page,
-                     boxed_region.last_page);
+            if(page_table[page_index].write_protected_cleared != 1) {
+                void lisp_backtrace(int frames);
+                lisp_backtrace(10);
+                fprintf(stderr,
+                        "Fault @ %p, page %"PAGE_INDEX_FMT" not marked as write-protected:\n"
+                        "  boxed_region.first_page: %"PAGE_INDEX_FMT","
+                        "  boxed_region.last_page %"PAGE_INDEX_FMT"\n"
+                        "  page.region_start_offset: %"OS_VM_SIZE_FMT"\n"
+                        "  page.bytes_used: %"PAGE_BYTES_FMT"\n"
+                        "  page.allocated: %d\n"
+                        "  page.write_protected: %d\n"
+                        "  page.write_protected_cleared: %d\n"
+                        "  page.generation: %d\n",
+                        fault_addr,
+                        page_index,
+                        boxed_region.first_page,
+                        boxed_region.last_page,
+                        page_table[page_index].region_start_offset,
+                        page_table[page_index].bytes_used,
+                        page_table[page_index].allocated,
+                        page_table[page_index].write_protected,
+                        page_table[page_index].write_protected_cleared,
+                        page_table[page_index].gen);
+                if (!continue_after_memoryfault_on_unprotected_pages)
+                    lose("Feh.\n");
+            }
         }
         ret = thread_mutex_unlock(&free_pages_lock);
         gc_assert(ret == 0);