+ ;; We need to copy from downwards up to avoid overwriting some of
+ ;; the yet uncopied args. So we need to use EBX as the copy index
+ ;; and ECX as the loop counter, rather than using ECX for both.
+ (inst xor ebx-tn ebx-tn)
+
+ ;; We used to use REP MOVS here, but on modern x86 it performs
+ ;; much worse than an explicit loop for small blocks.
+ COPY-LOOP
+ (inst mov edi-tn (make-ea :dword :base esi-tn :index ebx-tn))
+ ;; The :DISP is to account for the registers saved on the stack
+ (inst mov (make-ea :dword :base esp-tn :disp (* 3 n-word-bytes)
+ :index ebx-tn)
+ edi-tn)
+ (inst add ebx-tn n-word-bytes)
+ (inst sub ecx-tn n-word-bytes)
+ (inst jmp :nz COPY-LOOP)