;; Allocate the space on the stack.
;; stack = rbp + sp->fp-offset - (max 3 frame-size) - (nargs - fixed)
- (inst lea rsp-tn
+ ;; if we'd move SP backward, swap the meaning of rsp and source;
+ ;; otherwise, we'd be accessing values below SP, and that's no good
+ ;; if a signal interrupts this code sequence. In that case, store
+ ;; the final value in rsp after the stack-stack memmove loop.
+ (inst lea (if (<= fixed (max 3 (sb-allocated-size 'stack)))
+ rsp-tn
+ source)
(make-ea :qword :base rbp-tn
- :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
- :disp (* n-word-bytes
- (- (+ sp->fp-offset fixed)
- (max 3 (sb-allocated-size 'stack))))))
+ :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
+ :disp (* n-word-bytes
+ (- (+ sp->fp-offset fixed)
+ (max 3 (sb-allocated-size 'stack))))))
;; Now: nargs>=1 && nargs>fixed
(inst mov rbx-tn rcx-tn)
(cond ((< fixed register-arg-count)
+ ;; the code above only moves the final value of rsp in
+ ;; rsp directly if that condition is satisfied. Currently,
+ ;; r-a-c is 3, so the aver is OK. If the calling convention
+ ;; ever changes, the logic above with LEA will have to be
+ ;; adjusted.
+ (aver (<= fixed (max 3 (sb-allocated-size 'stack))))
;; We must stop when we run out of stack args, not when we
;; run out of more args.
;; Number to copy = nargs-3
- (inst sub rcx-tn (fixnumize register-arg-count))
+ (inst sub rbx-tn (fixnumize register-arg-count))
;; Everything of interest in registers.
(inst jmp :be DO-REGS))
(t
;; Number to copy = nargs-fixed
- (inst sub rcx-tn (fixnumize fixed))))
+ (inst sub rbx-tn (fixnumize fixed))))
;; Initialize R8 to be the end of args.
- (inst lea source (make-ea :qword :base rbp-tn
- :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
- :disp (* sp->fp-offset n-word-bytes)))
-
- ;; We need to copy from downwards up to avoid overwriting some of
- ;; the yet uncopied args. So we need to use R9 as the copy index
- ;; and RCX as the loop counter, rather than using RCX for both.
- (zeroize copy-index)
-
- ;; We used to use REP MOVS here, but on modern x86 it performs
- ;; much worse than an explicit loop for small blocks.
- COPY-LOOP
- (inst mov temp (make-ea :qword :base source :index copy-index))
- (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
- (inst add copy-index n-word-bytes)
- (inst sub rcx-tn (fixnumize 1))
- (inst jmp :nz COPY-LOOP)
+ ;; Swap with SP if necessary to mirror the previous condition
+ (inst lea (if (<= fixed (max 3 (sb-allocated-size 'stack)))
+ source
+ rsp-tn)
+ (make-ea :qword :base rbp-tn
+ :index temp :scale (ash 1 (- word-shift n-fixnum-tag-bits))
+ :disp (* sp->fp-offset n-word-bytes)))
+ ;; src: rbp + temp + sp->fp
+ ;; dst: rbp + temp + sp->fp + (fixed - (max 3 [stack-size]))
+ (let ((delta (- fixed (max 3 (sb-allocated-size 'stack))))
+ (loop (gen-label))
+ (fixnum->word (ash 1 (- word-shift n-fixnum-tag-bits))))
+ (cond ((zerop delta)) ; no-op move
+ ((minusp delta)
+ ;; dst is lower than src, copy forward
+ (zeroize copy-index)
+ ;; We used to use REP MOVS here, but on modern x86 it performs
+ ;; much worse than an explicit loop for small blocks.
+
+ (emit-label loop)
+ (inst mov temp (make-ea :qword :base source :index copy-index))
+ (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
+ (inst add copy-index n-word-bytes)
+ (inst sub rbx-tn (fixnumize 1))
+ (inst jmp :nz loop))
+ ((plusp delta)
+ ;; dst is higher than src; copy backward
+ (emit-label loop)
+ (inst sub rbx-tn (fixnumize 1))
+ (inst mov temp (make-ea :qword :base rsp-tn
+ :index rbx-tn :scale fixnum->word))
+ (inst mov (make-ea :qword :base source
+ :index rbx-tn :scale fixnum->word)
+ temp)
+ (inst jmp :nz loop)
+ ;; done with the stack--stack copy. Reset RSP to its final
+ ;; value
+ (inst mov rsp-tn source))))
DO-REGS
- ;; Restore RCX
- (inst mov rcx-tn rbx-tn)
-
;; Here: nargs>=1 && nargs>fixed
(when (< fixed register-arg-count)
;; Now we have to deposit any more args that showed up in
:disp n-word-bytes))))
(define-vop (more-arg)
- (:translate sb!c::%more-arg)
+ (:translate sb!c::%more-arg)
(:policy :fast-safe)
(:args (object :scs (descriptor-reg) :to (:result 1))
(index :scs (any-reg) :to (:result 1) :target value))
(inst lea dst (make-ea :qword :index rcx :scale (ash 2 (- word-shift n-fixnum-tag-bits))))
(maybe-pseudo-atomic stack-allocate-p
(allocation dst dst node stack-allocate-p list-pointer-lowtag)
- ;; Set decrement mode (successive args at lower addresses)
- (inst std)
;; Set up the result.
(move result dst)
;; Jump into the middle of the loop, 'cause that's where we want
(inst sub rcx (fixnumize 1))
(inst jmp :nz loop)
;; NIL out the last cons.
- (storew nil-value dst 1 list-pointer-lowtag)
- (inst cld))
+ (storew nil-value dst 1 list-pointer-lowtag))
(emit-label done))))
;;; Return the location and size of the &MORE arg glob created by