1.0.41.40: ppc: Shorten the gencgc allocation sequence.

author Alastair Bridgewater <lisphacker@users.sourceforge.net>

Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)

committer Alastair Bridgewater <lisphacker@users.sourceforge.net>

Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)
author Alastair Bridgewater <lisphacker@users.sourceforge.net>
Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)
committer Alastair Bridgewater <lisphacker@users.sourceforge.net>
Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)
diff --git a/src/compiler/ppc/macros.lisp b/src/compiler/ppc/macros.lisp

index 7921078..c52d4d5 100644 (file)
--- a/src/compiler/ppc/macros.lisp
+++ b/src/compiler/ppc/macros.lisp
@@ -201,67 +201,68 @@
               (inst addi alloc-tn alloc-tn ,alloc-size)
               (inst add alloc-tn alloc-tn ,alloc-size))))
      #!+gencgc
-    (let ((fix-addr (gensym))
-          (inline-alloc (gensym)))
-      `(let ((,fix-addr (gen-label))
-             (,inline-alloc (gen-label)))
-         ;; Make temp-tn be the size
-         (cond ((numberp ,size)
-                (inst lr ,temp-tn ,size))
-               (t
-                (move ,temp-tn ,size)))
-
-         #!-sb-thread
-         (inst lr ,flag-tn (make-fixup "boxed_region" :foreign))
-         #!-sb-thread
-         (inst lwz ,result-tn ,flag-tn 0)
-         #!+sb-thread
-         (inst lwz ,result-tn thread-base-tn (* thread-alloc-region-slot
-                                                n-word-bytes))
-
-         ;; we can optimize this to only use one fixup here, once we get
-         ;; it working
-         ;; (inst lr ,flag-tn (make-fixup "boxed_region" :foreign 4))
-         ;; (inst lwz ,flag-tn ,flag-tn 0)
-         #!-sb-thread
-         (inst lwz ,flag-tn ,flag-tn 4)
-         #!+sb-thread
-         (inst lwz ,flag-tn thread-base-tn (* (1+ thread-alloc-region-slot)
+    `(progn
+       ;; Make temp-tn be the size
+       (cond ((numberp ,size)
+              (inst lr ,temp-tn ,size))
+             (t
+              (move ,temp-tn ,size)))
+
+       #!-sb-thread
+       (inst lr ,flag-tn (make-fixup "boxed_region" :foreign))
+       #!-sb-thread
+       (inst lwz ,result-tn ,flag-tn 0)
+       #!+sb-thread
+       (inst lwz ,result-tn thread-base-tn (* thread-alloc-region-slot
                                                n-word-bytes))
  
-         (without-scheduling ()
-           ;; CAUTION: The C code depends on the exact order of
-           ;; instructions here.  In particular, three instructions before
-           ;; the TW instruction must be an ADD or ADDI instruction, so it
-           ;; can figure out the size of the desired allocation.
-           ;; Now make result-tn point at the end of the object, to
-           ;; figure out if we overflowed the current region.
-           (inst add ,result-tn ,result-tn ,temp-tn)
-           ;; result-tn points to the new end of the region.  Did we go past
-           ;; the actual end of the region?  If so, we need a full alloc.
-           ;; The C code depends on this exact form of instruction.  If
-           ;; either changes, you have to change the other appropriately!
-           (inst cmpw ,result-tn ,flag-tn)
-
-           (inst bng ,inline-alloc)
-           (inst tw :lge ,result-tn ,flag-tn))
-         (inst b ,fix-addr)
-
-         (emit-label ,inline-alloc)
+       ;; we can optimize this to only use one fixup here, once we get
+       ;; it working
+       ;; (inst lr ,flag-tn (make-fixup "boxed_region" :foreign 4))
+       ;; (inst lwz ,flag-tn ,flag-tn 0)
+       #!-sb-thread
+       (inst lwz ,flag-tn ,flag-tn 4)
+       #!+sb-thread
+       (inst lwz ,flag-tn thread-base-tn (* (1+ thread-alloc-region-slot)
+                                            n-word-bytes))
+
+       (without-scheduling ()
+         ;; CAUTION: The C code depends on the exact order of
+         ;; instructions here.  In particular, immediately before the
+         ;; TW instruction must be an ADD or ADDI instruction, so it
+         ;; can figure out the size of the desired allocation and
+         ;; storing the new base pointer back to the allocation region
+         ;; must take two instructions (one on threaded targets).
+
+         ;; Now make result-tn point at the end of the object, to
+         ;; figure out if we overflowed the current region.
+         (inst add ,result-tn ,result-tn ,temp-tn)
+         ;; result-tn points to the new end of the region.  Did we go past
+         ;; the actual end of the region?  If so, we need a full alloc.
+         ;; The C code depends on this exact form of instruction.  If
+         ;; either changes, you have to change the other appropriately!
+         (inst tw :lge ,result-tn ,flag-tn)
+
+         ;; The C code depends on this instruction sequence taking up
+         ;; #!-sb-thread three #!+sb-thread one machine instruction.
+         ;; The lr of a fixup counts as two instructions.
           #!-sb-thread
           (inst lr ,flag-tn (make-fixup "boxed_region" :foreign))
           #!-sb-thread
           (inst stw ,result-tn ,flag-tn 0)
           #!+sb-thread
           (inst stw ,result-tn thread-base-tn (* thread-alloc-region-slot
-                                                n-word-bytes))
-
-         (emit-label ,fix-addr)
-         ;; At this point, result-tn points at the end of the object.
-         ;; Adjust to point to the beginning.
-         (inst sub ,result-tn ,result-tn ,temp-tn)
-         ;; Set the lowtag appropriately
-         (inst ori ,result-tn ,result-tn ,lowtag))))
+                                                n-word-bytes)))
+
+       ;; Should the allocation trap above have fired, the runtime
+       ;; arranges for execution to resume here, just after where we
+       ;; would have updated the free pointer in the alloc region.
+
+       ;; At this point, result-tn points at the end of the object.
+       ;; Adjust to point to the beginning.
+       (inst sub ,result-tn ,result-tn ,temp-tn)
+       ;; Set the lowtag appropriately
+       (inst ori ,result-tn ,result-tn ,lowtag)))
  
  (defmacro with-fixed-allocation ((result-tn flag-tn temp-tn type-code size
                                              &key (lowtag other-pointer-lowtag))
diff --git a/src/runtime/ppc-arch.c b/src/runtime/ppc-arch.c

index a4df6f5..281de3c 100644 (file)
--- a/src/runtime/ppc-arch.c
+++ b/src/runtime/ppc-arch.c
@@ -300,15 +300,15 @@ allocation_trap_p(os_context_t * context)
          && (4 == ((inst >> 1) & 0x3ff))) {
          /*
           * We got the instruction.  Now, look back to make sure it was
-         * proceeded by what we expected.  2 instructions back should be
-         * an ADD or ADDI instruction.
+         * proceeded by what we expected.  The previous instruction
+         * should be an ADD or ADDI instruction.
           */
          unsigned int add_inst;
  
-        add_inst = pc[-3];
+        add_inst = pc[-1];
  #if 0
          fprintf(stderr, "   add inst at %p:  inst = 0x%08x\n",
-                pc - 3, add_inst);
+                pc - 1, add_inst);
  #endif
          opcode = add_inst >> 26;
          if ((opcode == 31) && (266 == ((add_inst >> 1) & 0x1ff))) {
@@ -389,7 +389,7 @@ handle_allocation_trap(os_context_t * context)
       * is the size of the allocation.  Get it and call alloc to allocate
       * new space.
       */
-    inst = pc[-3];
+    inst = pc[-1];
      opcode = inst >> 26;
  #if 0
      fprintf(stderr, "  add inst  = 0x%08x, opcode = %d\n", inst, opcode);
@@ -491,6 +491,15 @@ handle_allocation_trap(os_context_t * context)
          undo_fake_foreign_function_call(context);
      }
  
+    /* Skip the allocation trap and the write of the updated free
+     * pointer back to the allocation region.  This is two
+     * instructions when threading is enabled and four instructions
+     * otherwise. */
+#ifdef LISP_FEATURE_SB_THREAD
+    (*os_context_pc_addr(context)) = pc + 2;
+#else
+    (*os_context_pc_addr(context)) = pc + 4;
+#endif
  
  }
  #endif
@@ -550,7 +559,6 @@ sigtrap_handler(int signal, siginfo_t *siginfo, os_context_t *context)
      /* Is this an allocation trap? */
      if (allocation_trap_p(context)) {
          handle_allocation_trap(context);
-        arch_skip_instruction(context);
          return;
      }
  #endif
diff --git a/version.lisp-expr b/version.lisp-expr

index c7105bd..a57a651 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.41.39"
+"1.0.41.40"
author	Alastair Bridgewater <lisphacker@users.sourceforge.net>
	Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)
committer	Alastair Bridgewater <lisphacker@users.sourceforge.net>
	Sun, 8 Aug 2010 01:14:04 +0000 (01:14 +0000)
src/compiler/ppc/macros.lisp		patch \| blob \| history
src/runtime/ppc-arch.c		patch \| blob \| history
version.lisp-expr		patch \| blob \| history