-#ifdef WANT_CGC
-/* This is a copy function which is optimized for the Pentium and
- * works OK on 486 as well. This assumes (does not check) that the
- * input byte count is a multiple of 8 bytes (one Lisp object).
- * This code takes advantage of pairing in the Pentium as well
- * as the 128-bit cache line.
- */
- .global GNAME(fastcopy16)
- .type GNAME(fastcopy16),@function
- .align align_4byte,0x90
-GNAME(fastcopy16):
- pushl %ebp
- movl %esp,%ebp
- movl 8(%ebp), %edx # dst
- movl 12(%ebp),%eax # src
- movl 16(%ebp),%ecx # bytes
- pushl %ebx
- pushl %esi
- pushl %edi
- movl %edx,%edi
- movl %eax,%esi
- sarl $3,%ecx # number 8-byte units
- testl $1,%ecx # odd?
- jz Lquad
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl %eax,(%edi)
- movl %ebx,4(%edi)
- leal 8(%esi),%esi
- leal 8(%edi),%edi
-Lquad: sarl $1,%ecx # count 16-byte units
- jz Lend
- movl %ecx,%ebp # use ebp for loop counter
- .align align_16byte,0x90
-Ltop:
- movl (%edi),%eax # prefetch! MAJOR Pentium win..
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edx
- movl %eax, (%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx,12(%edi)
- leal 16(%esi),%esi
- leal 16(%edi),%edi
- decl %ebp
- jnz Ltop # non-prefixed jump saves cycles
-Lend:
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
- .size GNAME(fastcopy16),.-GNAME(fastcopy16)
-#endif
-\f
-#ifdef GENCGC
-/* This is a fast bzero using the FPU. The first argument is the start
- * address which needs to be aligned on an 8 byte boundary, the second
- * argument is the number of bytes, which must be a nonzero multiple
- * of 8 bytes. */
- .text
- .globl GNAME(i586_bzero)
- .type GNAME(i586_bzero),@function
- .align align_4byte,0x90
-GNAME(i586_bzero):
- movl 4(%esp),%edx # Load the start address.
- movl 8(%esp),%eax # Load the number of bytes.
- fldz
-l1: fstl 0(%edx)
- addl $8,%edx
- subl $8,%eax
- jnz l1
- fstp %st(0)
- ret
- .size GNAME(i586_bzero),.-GNAME(i586_bzero)
-#endif
-\f