0.9.6.38:

author Gabor Melis <mega@hotpop.com>

Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)

committer Gabor Melis <mega@hotpop.com>

Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)
author Gabor Melis <mega@hotpop.com>
Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)
committer Gabor Melis <mega@hotpop.com>
Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)
diff --git a/src/compiler/x86/macros.lisp b/src/compiler/x86/macros.lisp

index e61732b..0b8f234 100644 (file)
--- a/src/compiler/x86/macros.lisp
+++ b/src/compiler/x86/macros.lisp
@@ -125,19 +125,17 @@
  \f
  ;;;; allocation helpers
  
-;;; All allocation is done by calls to assembler routines that
-;;; eventually invoke the C alloc() function.  Once upon a time
-;;; (before threads) allocation within an alloc_region could also be
-;;; done inline, with the aid of two C symbols storing the current
-;;; allocation region boundaries; however, C symbols are global.
+;;; Allocation within alloc_region (which is thread local) can be done
+;;; inline.  If the alloc_region is overflown allocation is done by
+;;; calling the C alloc() function.
  
  ;;; C calls for allocation don't /seem/ to make an awful lot of
-;;; difference to speed.  Guessing from historical context, it looks
-;;; like inline allocation was introduced before pseudo-atomic, at
-;;; which time all calls to alloc() would have needed a syscall to
-;;; mask signals for the duration.  Now we have pseudoatomic there's
-;;; no need for that overhead.  Still, inline alloc would be a neat
-;;; addition someday (except see below).
+;;; difference to speed. On pure consing it's about a 25%
+;;; gain. Guessing from historical context, it looks like inline
+;;; allocation was introduced before pseudo-atomic, at which time all
+;;; calls to alloc() would have needed a syscall to mask signals for
+;;; the duration.  Now we have pseudoatomic there's no need for that
+;;; overhead.
  
  (defun allocation-dynamic-extent (alloc-tn size)
    (inst sub esp-tn size)
@@ -175,6 +173,7 @@
  
  (defun allocation-inline (alloc-tn size)
    (let ((ok (gen-label))
+        (done (gen-label))
          (free-pointer
           (make-ea :dword :disp
                    #!+sb-thread (* n-word-bytes thread-alloc-region-slot)
@@ -191,7 +190,7 @@
      (inst add alloc-tn free-pointer)
      #!+sb-thread (inst fs-segment-prefix)
      (inst cmp alloc-tn end-addr)
-    (inst jmp :be OK)
+    (inst jmp :be ok)
      (let ((dst (ecase (tn-offset alloc-tn)
                   (#.eax-offset "alloc_overflow_eax")
                   (#.ecx-offset "alloc_overflow_ecx")
@@ -200,9 +199,23 @@
                   (#.esi-offset "alloc_overflow_esi")
                   (#.edi-offset "alloc_overflow_edi"))))
        (inst call (make-fixup dst :foreign)))
+    (inst jmp-short done)
      (emit-label ok)
-    #!+sb-thread (inst fs-segment-prefix)
-    (inst xchg free-pointer alloc-tn))
+    ;; Swap ALLOC-TN and FREE-POINTER
+    (cond ((and (tn-p size) (location= alloc-tn size))
+           ;; XCHG is extremely slow, use the xor swap trick
+           #!+sb-thread (inst fs-segment-prefix)
+           (inst xor alloc-tn free-pointer)
+           #!+sb-thread (inst fs-segment-prefix)
+           (inst xor free-pointer alloc-tn)
+           #!+sb-thread (inst fs-segment-prefix)
+           (inst xor alloc-tn free-pointer))
+          (t
+           ;; It's easier if SIZE is still available.
+           #!+sb-thread (inst fs-segment-prefix)
+           (inst mov free-pointer alloc-tn)
+           (inst sub alloc-tn size)))
+    (emit-label done))
    (values))
  
  
@@ -219,13 +232,7 @@
  (defun allocation (alloc-tn size &optional inline dynamic-extent)
    (cond
      (dynamic-extent (allocation-dynamic-extent alloc-tn size))
-    ;; FIXME: for reasons unknown, inline allocation is a speed win on
-    ;; non-P4s, and a speed loss on P4s (and probably other such
-    ;; high-spec high-cache machines).  :INLINE-ALLOCATION-IS-GOOD is
-    ;; a bit of a KLUDGE, really.  -- CSR, 2004-08-05 (following
-    ;; observations made by ASF and Juho Snellman)
-    ((and (member :inline-allocation-is-good *backend-subfeatures*)
-          (or (null inline) (policy inline (>= speed space))))
+    ((or (null inline) (policy inline (>= speed space)))
       (allocation-inline alloc-tn size))
      (t (allocation-notinline alloc-tn size)))
    (values))
diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S

index c16b3ef..ad9e089 100644 (file)
--- a/src/runtime/x86-assem.S
+++ b/src/runtime/x86-assem.S
@@ -669,10 +669,8 @@ GNAME(alloc_16_to_edi):
  
  #ifdef LISP_FEATURE_SB_THREAD
  #define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET
-#define DISPLACEMENT $7
  #else
  #define START_REGION boxed_region
-#define DISPLACEMENT $6
  #endif
                 
  /* This routine handles an overflow with eax=crfp+size. So the
@@ -690,7 +688,6 @@ GNAME(alloc_overflow_eax):
          addl    $4,%esp # pop the size arg.
          popl    %edx    # Restore edx.
          popl    %ecx    # Restore ecx.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_eax),.-GNAME(alloc_overflow_eax)
  
@@ -708,7 +705,6 @@ GNAME(alloc_overflow_ecx):
          movl    %eax,%ecx       # setup the destination.
          popl    %edx    # Restore edx.
          popl    %eax    # Restore eax.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_ecx),.-GNAME(alloc_overflow_ecx)
  
@@ -726,7 +722,6 @@ GNAME(alloc_overflow_edx):
          movl    %eax,%edx       # setup the destination.
          popl    %ecx    # Restore ecx.
          popl    %eax    # Restore eax.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_edx),.-GNAME(alloc_overflow_edx)
  
@@ -748,7 +743,6 @@ GNAME(alloc_overflow_ebx):
          popl    %edx    # Restore edx.
          popl    %ecx    # Restore ecx.
          popl    %eax    # Restore eax.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_ebx),.-GNAME(alloc_overflow_ebx)
  
@@ -770,7 +764,6 @@ GNAME(alloc_overflow_esi):
          popl    %edx    # Restore edx.
          popl    %ecx    # Restore ecx.
          popl    %eax    # Restore eax.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_esi),.-GNAME(alloc_overflow_esi)
  
@@ -790,7 +783,6 @@ GNAME(alloc_overflow_edi):
          popl    %edx    # Restore edx.
          popl    %ecx    # Restore ecx.
          popl    %eax    # Restore eax.
-       addl    DISPLACEMENT,(%esp) # Adjust the return address to skip the next inst.
          ret
          .size    GNAME(alloc_overflow_edi),.-GNAME(alloc_overflow_edi)
  
diff --git a/version.lisp-expr b/version.lisp-expr

index cdd8eaf..f2961b1 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.6.37"
+"0.9.6.38"
author	Gabor Melis <mega@hotpop.com>
	Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)
committer	Gabor Melis <mega@hotpop.com>
	Sat, 12 Nov 2005 19:50:48 +0000 (19:50 +0000)
src/compiler/x86/macros.lisp		patch \| blob \| history
src/runtime/x86-assem.S		patch \| blob \| history
version.lisp-expr		patch \| blob \| history