Micro-optimize calling alloc_tramp on x86_64.

[sbcl.git] / src / compiler / x86-64 / call.lisp
diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp

index e8fdb8a..645f745 100644 (file)
--- a/src/compiler/x86-64/call.lisp
+++ b/src/compiler/x86-64/call.lisp
@@ -15,7 +15,7 @@
  
  ;;; Return a wired TN describing the N'th full call argument passing
  ;;; location.
-(!def-vm-support-routine standard-arg-location (n)
+(defun standard-arg-location (n)
    (declare (type unsigned-byte n))
    (if (< n register-arg-count)
        (make-wired-tn *backend-t-primitive-type* descriptor-reg-sc-number
@@ -26,7 +26,7 @@
  ;;;
  ;;; Always wire the return PC location to the stack in its standard
  ;;; location.
-(!def-vm-support-routine make-return-pc-passing-location (standard)
+(defun make-return-pc-passing-location (standard)
    (declare (ignore standard))
    (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
                   sap-stack-sc-number return-pc-save-offset))
@@ -38,7 +38,7 @@
  ;;; because we want to be able to assume it's always there. Besides,
  ;;; the x86 doesn't have enough registers to really make it profitable
  ;;; to pass it in a register.
-(!def-vm-support-routine make-old-fp-passing-location (standard)
+(defun make-old-fp-passing-location (standard)
    (declare (ignore standard))
    (make-wired-tn *fixnum-primitive-type* control-stack-sc-number
                   ocfp-save-offset))
@@ -49,12 +49,12 @@
  ;;;
  ;;; Without using a save-tn - which does not make much sense if it is
  ;;; wired to the stack?
-(!def-vm-support-routine make-old-fp-save-location (physenv)
+(defun make-old-fp-save-location (physenv)
    (physenv-debug-live-tn (make-wired-tn *fixnum-primitive-type*
                                          control-stack-sc-number
                                          ocfp-save-offset)
                           physenv))
-(!def-vm-support-routine make-return-pc-save-location (physenv)
+(defun make-return-pc-save-location (physenv)
    (physenv-debug-live-tn
     (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
                    sap-stack-sc-number return-pc-save-offset)
@@ -63,23 +63,23 @@
  ;;; Make a TN for the standard argument count passing location. We only
  ;;; need to make the standard location, since a count is never passed when we
  ;;; are using non-standard conventions.
-(!def-vm-support-routine make-arg-count-location ()
+(defun make-arg-count-location ()
    (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rcx-offset))
  
  ;;; Make a TN to hold the number-stack frame pointer. This is allocated
  ;;; once per component, and is component-live.
-(!def-vm-support-routine make-nfp-tn ()
+(defun make-nfp-tn ()
    (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number))
  
-(!def-vm-support-routine make-stack-pointer-tn ()
+(defun make-stack-pointer-tn ()
    (make-normal-tn *fixnum-primitive-type*))
  
-(!def-vm-support-routine make-number-stack-pointer-tn ()
+(defun make-number-stack-pointer-tn ()
    (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number))
  
  ;;; Return a list of TNs that can be used to represent an unknown-values
  ;;; continuation within a function.
-(!def-vm-support-routine make-unknown-values-locations ()
+(defun make-unknown-values-locations ()
    (list (make-stack-pointer-tn)
          (make-normal-tn *fixnum-primitive-type*)))
  
@@ -87,19 +87,9 @@
  ;;; VM-dependent initialization of the IR2-COMPONENT structure. We
  ;;; push placeholder entries in the CONSTANTS to leave room for
  ;;; additional noise in the code object header.
-(!def-vm-support-routine select-component-format (component)
+(defun select-component-format (component)
    (declare (type component component))
-  ;; The 1+ here is because for the x86 the first constant is a
-  ;; pointer to a list of fixups, or NIL if the code object has none.
-  ;; (If I understand correctly, the fixups are needed at GC copy
-  ;; time because the X86 code isn't relocatable.)
-  ;;
-  ;; KLUDGE: It'd be cleaner to have the fixups entry be a named
-  ;; element of the CODE (aka component) primitive object. However,
-  ;; it's currently a large, tricky, error-prone chore to change
-  ;; the layout of any primitive object, so for the foreseeable future
-  ;; we'll just live with this ugliness. -- WHN 2002-01-02
-  (dotimes (i (1+ code-constants-offset))
+  (dotimes (i code-constants-offset)
      (vector-push-extend nil
                          (ir2-component-constants (component-info component))))
    (values))
@@ -575,7 +565,7 @@
      (emit-label trampoline-label)
      (popw rbp-tn (frame-word-offset return-pc-save-offset)))
    (when alignp
-    (emit-alignment n-lowtag-bits #x90))
+    (emit-alignment n-lowtag-bits :long-nop))
    (emit-label start-label))
  
  ;;; Non-TR local call for a fixed number of values passed according to
@@ -951,9 +941,7 @@
      (move rsi args)
      (move rax function)
      ;; And jump to the assembly routine.
-    (inst lea call-target
-          (make-ea :qword
-                   :disp (make-fixup 'tail-call-variable :assembly-routine)))
+    (inst mov call-target (make-fixup 'tail-call-variable :assembly-routine))
      (inst jmp call-target)))
  \f
  ;;;; unknown values return
@@ -1099,9 +1087,7 @@
          (emit-label not-single)))
      (move rsi vals)
      (move rcx nvals)
-    (inst lea return-asm
-          (make-ea :qword :disp (make-fixup 'return-multiple
-                                            :assembly-routine)))
+    (inst mov return-asm (make-fixup 'return-multiple :assembly-routine))
      (inst jmp return-asm)
      (trace-table-entry trace-table-normal)))
  \f
@@ -1162,12 +1148,12 @@
             ;; We must stop when we run out of stack args, not when we
             ;; run out of more args.
             ;; Number to copy = nargs-3
-           (inst sub rcx-tn (fixnumize register-arg-count))
+           (inst sub rbx-tn (fixnumize register-arg-count))
             ;; Everything of interest in registers.
             (inst jmp :be DO-REGS))
            (t
             ;; Number to copy = nargs-fixed
-           (inst sub rcx-tn (fixnumize fixed))))
+           (inst sub rbx-tn (fixnumize fixed))))
  
      ;; Initialize R8 to be the end of args.
      (inst lea source (make-ea :qword :base rbp-tn
@@ -1176,7 +1162,7 @@
  
      ;; We need to copy from downwards up to avoid overwriting some of
      ;; the yet uncopied args. So we need to use R9 as the copy index
-    ;; and RCX as the loop counter, rather than using RCX for both.
+    ;; and RBX as the loop counter, rather than using RBX for both.
      (zeroize copy-index)
  
      ;; We used to use REP MOVS here, but on modern x86 it performs
@@ -1185,14 +1171,11 @@
      (inst mov temp (make-ea :qword :base source :index copy-index))
      (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
      (inst add copy-index n-word-bytes)
-    (inst sub rcx-tn (fixnumize 1))
+    (inst sub rbx-tn (fixnumize 1))
      (inst jmp :nz COPY-LOOP)
  
      DO-REGS
  
-    ;; Restore RCX
-    (inst mov rcx-tn rbx-tn)
-
      ;; Here: nargs>=1 && nargs>fixed
      (when (< fixed register-arg-count)
        ;; Now we have to deposit any more args that showed up in
@@ -1247,7 +1230,7 @@
                                  :disp n-word-bytes))))
  
  (define-vop (more-arg)
-    (:translate sb!c::%more-arg)
+  (:translate sb!c::%more-arg)
    (:policy :fast-safe)
    (:args (object :scs (descriptor-reg) :to (:result 1))
           (index :scs (any-reg) :to (:result 1) :target value))
@@ -1286,8 +1269,6 @@
        (inst lea dst (make-ea :qword :index rcx :scale (ash 2 (- word-shift n-fixnum-tag-bits))))
        (maybe-pseudo-atomic stack-allocate-p
         (allocation dst dst node stack-allocate-p list-pointer-lowtag)
-       ;; Set decrement mode (successive args at lower addresses)
-       (inst std)
         ;; Set up the result.
         (move result dst)
         ;; Jump into the middle of the loop, 'cause that's where we want
@@ -1307,8 +1288,7 @@
         (inst sub rcx (fixnumize 1))
         (inst jmp :nz loop)
         ;; NIL out the last cons.
-       (storew nil-value dst 1 list-pointer-lowtag)
-       (inst cld))
+       (storew nil-value dst 1 list-pointer-lowtag))
        (emit-label done))))
  
  ;;; Return the location and size of the &MORE arg glob created by