+ ;; src: rbp + temp + sp->fp
+ ;; dst: rbp + temp + sp->fp + (fixed - (max 3 [stack-size]))
+ (let ((delta (- fixed (max 3 (sb-allocated-size 'stack))))
+ (loop (gen-label))
+ (fixnum->word (ash 1 (- word-shift n-fixnum-tag-bits))))
+ (cond ((zerop delta)) ; no-op move
+ ((minusp delta)
+ ;; dst is lower than src, copy forward
+ (zeroize copy-index)
+ ;; We used to use REP MOVS here, but on modern x86 it performs
+ ;; much worse than an explicit loop for small blocks.
+
+ (emit-label loop)
+ (inst mov temp (make-ea :qword :base source :index copy-index))
+ (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
+ (inst add copy-index n-word-bytes)
+ (inst sub rbx-tn (fixnumize 1))
+ (inst jmp :nz loop))
+ ((plusp delta)
+ ;; dst is higher than src; copy backward
+ (emit-label loop)
+ (inst sub rbx-tn (fixnumize 1))
+ (inst mov temp (make-ea :qword :base rsp-tn
+ :index rbx-tn :scale fixnum->word))
+ (inst mov (make-ea :qword :base source
+ :index rbx-tn :scale fixnum->word)
+ temp)
+ (inst jmp :nz loop)
+ ;; done with the stack--stack copy. Reset RSP to its final
+ ;; value
+ (inst mov rsp-tn source))))