X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86-64%2Fcall.lisp;h=4c4fdab84d4715e56d81f4d2ae3cd304dc16d911;hb=69e6aef5e6fb3bd682c7a2cbf774034d2ea58ee8;hp=58538fe9798ba75843b64180347b3ccb620888a6;hpb=c8617f57d0413beb2890e94dabe227cef9c5ddad;p=sbcl.git

diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp
index 58538fe..4c4fdab 100644
--- a/src/compiler/x86-64/call.lisp
+++ b/src/compiler/x86-64/call.lisp
@@ -15,7 +15,7 @@
 
 ;;; Return a wired TN describing the N'th full call argument passing
 ;;; location.
-(!def-vm-support-routine standard-arg-location (n)
+(defun standard-arg-location (n)
   (declare (type unsigned-byte n))
   (if (< n register-arg-count)
       (make-wired-tn *backend-t-primitive-type* descriptor-reg-sc-number
@@ -26,7 +26,7 @@
 ;;;
 ;;; Always wire the return PC location to the stack in its standard
 ;;; location.
-(!def-vm-support-routine make-return-pc-passing-location (standard)
+(defun make-return-pc-passing-location (standard)
   (declare (ignore standard))
   (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
                  sap-stack-sc-number return-pc-save-offset))
@@ -38,7 +38,7 @@
 ;;; because we want to be able to assume it's always there. Besides,
 ;;; the x86 doesn't have enough registers to really make it profitable
 ;;; to pass it in a register.
-(!def-vm-support-routine make-old-fp-passing-location (standard)
+(defun make-old-fp-passing-location (standard)
   (declare (ignore standard))
   (make-wired-tn *fixnum-primitive-type* control-stack-sc-number
                  ocfp-save-offset))
@@ -49,12 +49,12 @@
 ;;;
 ;;; Without using a save-tn - which does not make much sense if it is
 ;;; wired to the stack?
-(!def-vm-support-routine make-old-fp-save-location (physenv)
+(defun make-old-fp-save-location (physenv)
   (physenv-debug-live-tn (make-wired-tn *fixnum-primitive-type*
                                         control-stack-sc-number
                                         ocfp-save-offset)
                          physenv))
-(!def-vm-support-routine make-return-pc-save-location (physenv)
+(defun make-return-pc-save-location (physenv)
   (physenv-debug-live-tn
    (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
                   sap-stack-sc-number return-pc-save-offset)
@@ -63,23 +63,23 @@
 ;;; Make a TN for the standard argument count passing location. We only
 ;;; need to make the standard location, since a count is never passed when we
 ;;; are using non-standard conventions.
-(!def-vm-support-routine make-arg-count-location ()
+(defun make-arg-count-location ()
   (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rcx-offset))
 
 ;;; Make a TN to hold the number-stack frame pointer. This is allocated
 ;;; once per component, and is component-live.
-(!def-vm-support-routine make-nfp-tn ()
+(defun make-nfp-tn ()
   (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number))
 
-(!def-vm-support-routine make-stack-pointer-tn ()
+(defun make-stack-pointer-tn ()
   (make-normal-tn *fixnum-primitive-type*))
 
-(!def-vm-support-routine make-number-stack-pointer-tn ()
+(defun make-number-stack-pointer-tn ()
   (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number))
 
 ;;; Return a list of TNs that can be used to represent an unknown-values
 ;;; continuation within a function.
-(!def-vm-support-routine make-unknown-values-locations ()
+(defun make-unknown-values-locations ()
   (list (make-stack-pointer-tn)
         (make-normal-tn *fixnum-primitive-type*)))
 
@@ -87,7 +87,7 @@
 ;;; VM-dependent initialization of the IR2-COMPONENT structure. We
 ;;; push placeholder entries in the CONSTANTS to leave room for
 ;;; additional noise in the code object header.
-(!def-vm-support-routine select-component-format (component)
+(defun select-component-format (component)
   (declare (type component component))
   (dotimes (i code-constants-offset)
     (vector-push-extend nil
@@ -941,9 +941,7 @@
     (move rsi args)
     (move rax function)
     ;; And jump to the assembly routine.
-    (inst lea call-target
-          (make-ea :qword
-                   :disp (make-fixup 'tail-call-variable :assembly-routine)))
+    (inst mov call-target (make-fixup 'tail-call-variable :assembly-routine))
     (inst jmp call-target)))
 
 ;;;; unknown values return
@@ -1089,9 +1087,7 @@
         (emit-label not-single)))
     (move rsi vals)
     (move rcx nvals)
-    (inst lea return-asm
-          (make-ea :qword :disp (make-fixup 'return-multiple
-                                            :assembly-routine)))
+    (inst mov return-asm (make-fixup 'return-multiple :assembly-routine))
     (inst jmp return-asm)
     (trace-table-entry trace-table-normal)))
 
@@ -1136,12 +1132,18 @@
 
     ;; Allocate the space on the stack.
     ;; stack = rbp + sp->fp-offset - (max 3 frame-size) - (nargs - fixed)
-    (inst lea rsp-tn
+    ;; if we'd move SP backward, swap the meaning of rsp and source;
+    ;; otherwise, we'd be accessing values below SP, and that's no good
+    ;; if a signal interrupts this code sequence.  In that case, store
+    ;; the final value in rsp after the stack-stack memmove loop.
+    (inst lea (if (<= fixed (max 3 (sb-allocated-size 'stack)))
+                  rsp-tn
+                  source)
           (make-ea :qword :base rbp-tn
-                   :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
-                   :disp (* n-word-bytes
-                            (- (+ sp->fp-offset fixed)
-                               (max 3 (sb-allocated-size 'stack))))))
+                          :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
+                          :disp (* n-word-bytes
+                                   (- (+ sp->fp-offset fixed)
+                                      (max 3 (sb-allocated-size 'stack))))))
 
     ;; Now: nargs>=1 && nargs>fixed
 
@@ -1149,40 +1151,64 @@
     (inst mov rbx-tn rcx-tn)
 
     (cond ((< fixed register-arg-count)
+           ;; the code above only moves the final value of rsp in
+           ;; rsp directly if that condition is satisfied.  Currently,
+           ;; r-a-c is 3, so the aver is OK. If the calling convention
+           ;; ever changes, the logic above with LEA will have to be
+           ;; adjusted.
+           (aver (<= fixed (max 3 (sb-allocated-size 'stack))))
            ;; We must stop when we run out of stack args, not when we
            ;; run out of more args.
            ;; Number to copy = nargs-3
-           (inst sub rcx-tn (fixnumize register-arg-count))
+           (inst sub rbx-tn (fixnumize register-arg-count))
            ;; Everything of interest in registers.
            (inst jmp :be DO-REGS))
           (t
            ;; Number to copy = nargs-fixed
-           (inst sub rcx-tn (fixnumize fixed))))
+           (inst sub rbx-tn (fixnumize fixed))))
 
     ;; Initialize R8 to be the end of args.
-    (inst lea source (make-ea :qword :base rbp-tn
-                              :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
-                              :disp (* sp->fp-offset n-word-bytes)))
-
-    ;; We need to copy from downwards up to avoid overwriting some of
-    ;; the yet uncopied args. So we need to use R9 as the copy index
-    ;; and RCX as the loop counter, rather than using RCX for both.
-    (zeroize copy-index)
-
-    ;; We used to use REP MOVS here, but on modern x86 it performs
-    ;; much worse than an explicit loop for small blocks.
-    COPY-LOOP
-    (inst mov temp (make-ea :qword :base source :index copy-index))
-    (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
-    (inst add copy-index n-word-bytes)
-    (inst sub rcx-tn (fixnumize 1))
-    (inst jmp :nz COPY-LOOP)
+    ;; Swap with SP if necessary to mirror the previous condition
+    (inst lea (if (<= fixed (max 3 (sb-allocated-size 'stack)))
+                  source
+                  rsp-tn)
+          (make-ea :qword :base rbp-tn
+                          :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
+                          :disp (* sp->fp-offset n-word-bytes)))
 
+    ;; src: rbp + temp + sp->fp
+    ;; dst: rbp + temp + sp->fp + (fixed - (max 3 [stack-size]))
+    (let ((delta (- fixed (max 3 (sb-allocated-size 'stack))))
+          (loop (gen-label))
+          (fixnum->word (ash 1 (- word-shift n-fixnum-tag-bits))))
+      (cond ((zerop delta)) ; no-op move
+            ((minusp delta)
+             ;; dst is lower than src, copy forward
+             (zeroize copy-index)
+             ;; We used to use REP MOVS here, but on modern x86 it performs
+             ;; much worse than an explicit loop for small blocks.
+
+             (emit-label loop)
+             (inst mov temp (make-ea :qword :base source :index copy-index))
+             (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
+             (inst add copy-index n-word-bytes)
+             (inst sub rbx-tn (fixnumize 1))
+             (inst jmp :nz loop))
+            ((plusp delta)
+             ;; dst is higher than src; copy backward
+             (emit-label loop)
+             (inst sub rbx-tn (fixnumize 1))
+             (inst mov temp (make-ea :qword :base rsp-tn
+                                     :index rbx-tn :scale fixnum->word))
+             (inst mov (make-ea :qword :base source
+                                :index rbx-tn :scale fixnum->word)
+                   temp)
+             (inst jmp :nz loop)
+             ;; done with the stack--stack copy. Reset RSP to its final
+             ;; value
+             (inst mov rsp-tn source))))
     DO-REGS
 
-    ;; Restore RCX
-    (inst mov rcx-tn rbx-tn)
-
     ;; Here: nargs>=1 && nargs>fixed
     (when (< fixed register-arg-count)
       ;; Now we have to deposit any more args that showed up in
@@ -1237,7 +1263,7 @@
                                 :disp n-word-bytes))))
 
 (define-vop (more-arg)
-    (:translate sb!c::%more-arg)
+  (:translate sb!c::%more-arg)
   (:policy :fast-safe)
   (:args (object :scs (descriptor-reg) :to (:result 1))
          (index :scs (any-reg) :to (:result 1) :target value))
@@ -1276,8 +1302,6 @@
       (inst lea dst (make-ea :qword :index rcx :scale (ash 2 (- word-shift n-fixnum-tag-bits))))
       (maybe-pseudo-atomic stack-allocate-p
        (allocation dst dst node stack-allocate-p list-pointer-lowtag)
-       ;; Set decrement mode (successive args at lower addresses)
-       (inst std)
        ;; Set up the result.
        (move result dst)
        ;; Jump into the middle of the loop, 'cause that's where we want
@@ -1297,8 +1321,7 @@
        (inst sub rcx (fixnumize 1))
        (inst jmp :nz loop)
        ;; NIL out the last cons.
-       (storew nil-value dst 1 list-pointer-lowtag)
-       (inst cld))
+       (storew nil-value dst 1 list-pointer-lowtag))
       (emit-label done))))
 
 ;;; Return the location and size of the &MORE arg glob created by