2 * very-low-level utilities for runtime support
6 * This software is part of the SBCL system. See the README file for
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
16 #define LANGUAGE_ASSEMBLY
19 #include "genesis/closure.h"
20 #include "genesis/fdefn.h"
21 #include "genesis/static-symbols.h"
22 #include "genesis/symbol.h"
23 #include "genesis/thread.h"
25 /* Minimize conditionalization for different OS naming schemes.
27 * (As of sbcl-0.8.10, this seems no longer to be much of an issue,
28 * since everyone has converged on ELF. If this generality really
29 * turns out not to matter, perhaps it's just clutter we could get
30 * rid of? -- WHN 2004-04-18)
32 * (Except Win32, which is unlikely ever to be ELF, sorry. -- AB 2005-12-08)
34 #if defined __linux__ || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __sun
35 #define GNAME(var) var
37 #define GNAME(var) _##var
40 /* Get the right type of alignment. Linux, FreeBSD and NetBSD (but not OpenBSD)
41 * want alignment in bytes.
43 * (As in the GNAME() definitions above, as of sbcl-0.8.10, this seems
44 * no longer to be much of an issue, since everyone has converged on
45 * the same value. If this generality really turns out not to
46 * matter any more, perhaps it's just clutter we could get
47 * rid of? -- WHN 2004-04-18)
49 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__sun) || defined(LISP_FEATURE_WIN32)
52 #define align_16byte 16
56 #define align_16byte 4
60 * The assembler used for win32 doesn't like .type or .size directives,
61 * so we want to conditionally kill them out. So let's wrap them in macros
62 * that are defined to be no-ops on win32. Hopefully this still works on
65 #if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
66 #define TYPE(name) .type name,@function
67 #define SIZE(name) .size name,.-name
74 * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal
75 * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
76 * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
77 * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
78 * for this instruction in the SIGILL handler and if we see it, we
79 * advance the EIP by two bytes to skip over ud2 instruction and
80 * call sigtrap_handler. */
81 #if defined(LISP_FEATURE_DARWIN)
90 .globl GNAME(foreign_function_call_active)
91 .globl GNAME(all_threads)
94 * A call to call_into_c preserves esi, edi, and ebp.
95 * (The C function will preserve ebx, esi, edi, and ebp across its
96 * function call, but we trash ebx ourselves by using it to save the
97 * return Lisp address.)
99 * Return values are in eax and maybe edx for quads, or st(0) for
102 * This should work for Lisp calls C calls Lisp calls C..
105 .align align_16byte,0x90
106 .globl GNAME(call_into_c)
107 TYPE(GNAME(call_into_c))
109 movl $1,GNAME(foreign_function_call_active)
111 /* Save the return Lisp address in ebx. */
114 /* Setup the NPX for C */
124 #ifdef LISP_FEATURE_WIN32
128 #ifdef LISP_FEATURE_DARWIN
129 andl $0xfffffff0,%esp # align stack to 16-byte boundary before calling C
131 call *%eax # normal callout using Lisp stack
133 movl %eax,%ecx # remember integer return value
135 /* Check for a return FP value. */
142 /* The return value is in eax, or eax,edx? */
143 /* Set up the NPX stack for Lisp. */
144 fldz # Ensure no regs are empty.
153 /* Restore the return value. */
154 movl %ecx,%eax # maybe return value
156 movl $0,GNAME(foreign_function_call_active)
161 /* The return result is in st(0). */
162 /* Set up the NPX stack for Lisp, placing the result in st(0). */
163 fldz # Ensure no regs are empty.
170 fxch %st(7) # Move the result back to st(0).
172 /* We don't need to restore eax, because the result is in st(0). */
174 movl $0,GNAME(foreign_function_call_active)
178 SIZE(GNAME(call_into_c))
182 .globl GNAME(call_into_lisp_first_time)
183 TYPE(GNAME(call_into_lisp_first_time))
185 /* The *ALIEN-STACK* pointer is set up on the first call_into_lisp when
186 * the stack changes. We don't worry too much about saving registers
187 * here, because we never expect to return from the initial call to lisp
190 .align align_16byte,0x90
191 GNAME(call_into_lisp_first_time):
192 pushl %ebp # Save old frame pointer.
193 movl %esp,%ebp # Establish new frame.
194 #ifndef LISP_FEATURE_WIN32
195 movl %esp,ALIEN_STACK + SYMBOL_VALUE_OFFSET
196 movl GNAME(all_threads),%eax
197 movl THREAD_CONTROL_STACK_START_OFFSET(%eax) ,%esp
198 /* don't think too hard about what happens if we get interrupted
200 addl $(THREAD_CONTROL_STACK_SIZE),%esp
202 /* Win32 -really- doesn't like you switching stacks out from under it. */
203 movl GNAME(all_threads),%eax
208 .globl GNAME(call_into_lisp)
209 TYPE(GNAME(call_into_lisp))
211 /* The C conventions require that ebx, esi, edi, and ebp be preserved
212 * across function calls. */
214 .align align_16byte,0x90
215 GNAME(call_into_lisp):
216 pushl %ebp # Save old frame pointer.
217 movl %esp,%ebp # Establish new frame.
219 /* Save the NPX state */
220 fwait # Catch any pending NPX exceptions.
221 subl $108,%esp # Make room for the NPX state.
222 fnsave (%esp) # save and reset NPX
224 movl (%esp),%eax # Load NPX control word.
225 andl $0xfffff2ff,%eax # Set rounding mode to nearest.
226 orl $0x00000200,%eax # Set precision to 64 bits. (53-bit mantissa)
228 fldcw (%esp) # Recover modes.
231 fldz # Ensure no FP regs are empty.
240 /* Save C regs: ebx esi edi. */
245 /* Clear descriptor regs. */
246 xorl %eax,%eax # lexenv
247 xorl %ebx,%ebx # available
248 xorl %ecx,%ecx # arg count
249 xorl %edx,%edx # first arg
250 xorl %edi,%edi # second arg
251 xorl %esi,%esi # third arg
253 /* no longer in function call */
254 movl %eax, GNAME(foreign_function_call_active)
256 movl %esp,%ebx # remember current stack
257 pushl %ebx # Save entry stack on (maybe) new stack.
259 /* Establish Lisp args. */
260 movl 8(%ebp),%eax # lexenv?
261 movl 12(%ebp),%ebx # address of arg vec
262 movl 16(%ebp),%ecx # num args
263 shll $2,%ecx # Make num args into fixnum.
266 movl (%ebx),%edx # arg0
269 movl 4(%ebx),%edi # arg1
272 movl 8(%ebx),%esi # arg2
274 /* Registers eax, ecx, edx, edi, and esi are now live. */
276 /* Alloc new frame. */
277 mov %esp,%ebx # The current sp marks start of new frame.
278 push %ebp # fp in save location S0
279 sub $8,%esp # Ensure 3 slots are allocated, one above.
280 mov %ebx,%ebp # Switch to new frame.
282 call *CLOSURE_FUN_OFFSET(%eax)
284 /* If the function returned multiple values, it will return to
285 this point. Lose them */
289 /* A singled value function returns here */
291 /* Restore the stack, in case there was a stack change. */
294 /* Restore C regs: ebx esi edi. */
299 /* Restore the NPX state. */
304 movl %edx,%eax # c-val
306 SIZE(GNAME(call_into_lisp))
308 /* support for saving and restoring the NPX state from C */
310 .globl GNAME(fpu_save)
311 TYPE(GNAME(fpu_save))
315 fnsave (%eax) # Save the NPX state. (resets NPX)
317 SIZE(GNAME(fpu_save))
319 .globl GNAME(fpu_restore)
320 TYPE(GNAME(fpu_restore))
324 frstor (%eax) # Restore the NPX state.
326 SIZE(GNAME(fpu_restore))
329 * the undefined-function trampoline
332 .align align_4byte,0x90
333 .globl GNAME(undefined_tramp)
334 TYPE(GNAME(undefined_tramp))
335 .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
336 GNAME(undefined_tramp):
340 .byte UNDEFINED_FUN_ERROR
341 .byte sc_DescriptorReg # eax in the Descriptor-reg SC
343 SIZE(GNAME(undefined_tramp))
346 * the closure trampoline
349 .align align_4byte,0x90
350 .globl GNAME(closure_tramp)
351 TYPE(GNAME(closure_tramp))
352 .byte 0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
353 GNAME(closure_tramp):
354 movl FDEFN_FUN_OFFSET(%eax),%eax
355 /* FIXME: The '*' after "jmp" in the next line is from PVE's
356 * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
357 * reasonable, and it certainly seems as though if CMU CL needs it,
358 * SBCL needs it too, but I haven't actually verified that it's
359 * right. It would be good to find a way to force the flow of
360 * control through here to test it. */
361 jmp *CLOSURE_FUN_OFFSET(%eax)
362 SIZE(GNAME(closure_tramp))
365 * fun-end breakpoint magic
368 .globl GNAME(fun_end_breakpoint_guts)
370 GNAME(fun_end_breakpoint_guts):
371 /* Multiple Value return */
372 jc multiple_value_return
373 /* Single value return: The eventual return will now use the
374 multiple values return convention but with a return values
376 movl %esp,%ebx # Setup ebx - the ofp.
377 subl $4,%esp # Allocate one stack slot for the return value
378 movl $4,%ecx # Setup ecx for one return value.
379 movl $(NIL),%edi # default second value
380 movl $(NIL),%esi # default third value
382 multiple_value_return:
384 .globl GNAME(fun_end_breakpoint_trap)
385 GNAME(fun_end_breakpoint_trap):
387 .byte trap_FunEndBreakpoint
388 hlt # We should never return here.
390 .globl GNAME(fun_end_breakpoint_end)
391 GNAME(fun_end_breakpoint_end):
394 .globl GNAME(do_pending_interrupt)
395 TYPE(GNAME(do_pending_interrupt))
396 .align align_4byte,0x90
397 GNAME(do_pending_interrupt):
399 .byte trap_PendingInterrupt
401 SIZE(GNAME(do_pending_interrupt))
405 * Allocate bytes and return the start of the allocated space
406 * in the specified destination register.
408 * In the general case the size will be in the destination register.
410 * All registers must be preserved except the destination.
411 * The C conventions will preserve ebx, esi, edi, and ebp.
412 * So only eax, ecx, and edx need special care here.
415 .globl GNAME(alloc_to_eax)
416 TYPE(GNAME(alloc_to_eax))
417 .align align_4byte,0x90
419 pushl %ecx # Save ecx and edx as C could destroy them.
421 pushl %eax # Push the size.
423 addl $4,%esp # Pop the size arg.
424 popl %edx # Restore ecx and edx.
427 SIZE(GNAME(alloc_to_eax))
429 .globl GNAME(alloc_8_to_eax)
430 TYPE(GNAME(alloc_8_to_eax))
431 .align align_4byte,0x90
432 GNAME(alloc_8_to_eax):
433 pushl %ecx # Save ecx and edx as C could destroy them.
435 pushl $8 # Push the size.
437 addl $4,%esp # Pop the size arg.
438 popl %edx # Restore ecx and edx.
441 SIZE(GNAME(alloc_8_to_eax))
443 .globl GNAME(alloc_8_to_eax)
444 TYPE(GNAME(alloc_8_to_eax))
445 .align align_4byte,0x90
447 .globl GNAME(alloc_16_to_eax)
448 TYPE(GNAME(alloc_16_to_eax))
449 .align align_4byte,0x90
450 GNAME(alloc_16_to_eax):
451 pushl %ecx # Save ecx and edx as C could destroy them.
453 pushl $16 # Push the size.
455 addl $4,%esp # Pop the size arg.
456 popl %edx # Restore ecx and edx.
459 SIZE(GNAME(alloc_16_to_eax))
461 .globl GNAME(alloc_to_ecx)
462 TYPE(GNAME(alloc_to_ecx))
463 .align align_4byte,0x90
465 pushl %eax # Save eax and edx as C could destroy them.
467 pushl %ecx # Push the size.
469 addl $4,%esp # Pop the size arg.
470 movl %eax,%ecx # Set up the destination.
471 popl %edx # Restore eax and edx.
474 SIZE(GNAME(alloc_to_ecx))
476 .globl GNAME(alloc_8_to_ecx)
477 TYPE(GNAME(alloc_8_to_ecx))
478 .align align_4byte,0x90
479 GNAME(alloc_8_to_ecx):
480 pushl %eax # Save eax and edx as C could destroy them.
482 pushl $8 # Push the size.
484 addl $4,%esp # Pop the size arg.
485 movl %eax,%ecx # Set up the destination.
486 popl %edx # Restore eax and edx.
489 SIZE(GNAME(alloc_8_to_ecx))
491 .globl GNAME(alloc_16_to_ecx)
492 TYPE(GNAME(alloc_16_to_ecx))
493 .align align_4byte,0x90
494 GNAME(alloc_16_to_ecx):
495 pushl %eax # Save eax and edx as C could destroy them.
497 pushl $16 # Push the size.
499 addl $4,%esp # Pop the size arg.
500 movl %eax,%ecx # Set up the destination.
501 popl %edx # Restore eax and edx.
504 SIZE(GNAME(alloc_16_to_ecx))
507 .globl GNAME(alloc_to_edx)
508 TYPE(GNAME(alloc_to_edx))
509 .align align_4byte,0x90
511 pushl %eax # Save eax and ecx as C could destroy them.
513 pushl %edx # Push the size.
515 addl $4,%esp # Pop the size arg.
516 movl %eax,%edx # Set up the destination.
517 popl %ecx # Restore eax and ecx.
520 SIZE(GNAME(alloc_to_edx))
522 .globl GNAME(alloc_8_to_edx)
523 TYPE(GNAME(alloc_8_to_edx))
524 .align align_4byte,0x90
525 GNAME(alloc_8_to_edx):
526 pushl %eax # Save eax and ecx as C could destroy them.
528 pushl $8 # Push the size.
530 addl $4,%esp # Pop the size arg.
531 movl %eax,%edx # Set up the destination.
532 popl %ecx # Restore eax and ecx.
535 SIZE(GNAME(alloc_8_to_edx))
537 .globl GNAME(alloc_16_to_edx)
538 TYPE(GNAME(alloc_16_to_edx))
539 .align align_4byte,0x90
540 GNAME(alloc_16_to_edx):
541 pushl %eax # Save eax and ecx as C could destroy them.
543 pushl $16 # Push the size.
545 addl $4,%esp # Pop the size arg.
546 movl %eax,%edx # Set up the destination.
547 popl %ecx # Restore eax and ecx.
550 SIZE(GNAME(alloc_16_to_edx))
554 .globl GNAME(alloc_to_ebx)
555 TYPE(GNAME(alloc_to_ebx))
556 .align align_4byte,0x90
558 pushl %eax # Save eax, ecx, and edx as C could destroy them.
561 pushl %ebx # Push the size.
563 addl $4,%esp # Pop the size arg.
564 movl %eax,%ebx # Set up the destination.
565 popl %edx # Restore eax, ecx and edx.
569 SIZE(GNAME(alloc_to_ebx))
571 .globl GNAME(alloc_8_to_ebx)
572 TYPE(GNAME(alloc_8_to_ebx))
573 .align align_4byte,0x90
574 GNAME(alloc_8_to_ebx):
575 pushl %eax # Save eax, ecx, and edx as C could destroy them.
578 pushl $8 # Push the size.
580 addl $4,%esp # Pop the size arg.
581 movl %eax,%ebx # Set up the destination.
582 popl %edx # Restore eax, ecx and edx.
586 SIZE(GNAME(alloc_8_to_ebx))
588 .globl GNAME(alloc_16_to_ebx)
589 TYPE(GNAME(alloc_16_to_ebx))
590 .align align_4byte,0x90
591 GNAME(alloc_16_to_ebx):
592 pushl %eax # Save eax, ecx, and edx as C could destroy them.
595 pushl $16 # Push the size
597 addl $4,%esp # pop the size arg.
598 movl %eax,%ebx # setup the destination.
599 popl %edx # Restore eax, ecx and edx.
603 SIZE(GNAME(alloc_16_to_ebx))
607 .globl GNAME(alloc_to_esi)
608 TYPE(GNAME(alloc_to_esi))
609 .align align_4byte,0x90
611 pushl %eax # Save eax, ecx, and edx as C could destroy them.
614 pushl %esi # Push the size
616 addl $4,%esp # pop the size arg.
617 movl %eax,%esi # setup the destination.
618 popl %edx # Restore eax, ecx and edx.
622 SIZE(GNAME(alloc_to_esi))
624 .globl GNAME(alloc_8_to_esi)
625 TYPE(GNAME(alloc_8_to_esi))
626 .align align_4byte,0x90
627 GNAME(alloc_8_to_esi):
628 pushl %eax # Save eax, ecx, and edx as C could destroy them.
631 pushl $8 # Push the size
633 addl $4,%esp # pop the size arg.
634 movl %eax,%esi # setup the destination.
635 popl %edx # Restore eax, ecx and edx.
639 SIZE(GNAME(alloc_8_to_esi))
641 .globl GNAME(alloc_16_to_esi)
642 TYPE(GNAME(alloc_16_to_esi))
643 .align align_4byte,0x90
644 GNAME(alloc_16_to_esi):
645 pushl %eax # Save eax, ecx, and edx as C could destroy them.
648 pushl $16 # Push the size
650 addl $4,%esp # pop the size arg.
651 movl %eax,%esi # setup the destination.
652 popl %edx # Restore eax, ecx and edx.
656 SIZE(GNAME(alloc_16_to_esi))
659 .globl GNAME(alloc_to_edi)
660 TYPE(GNAME(alloc_to_edi))
661 .align align_4byte,0x90
663 pushl %eax # Save eax, ecx, and edx as C could destroy them.
666 pushl %edi # Push the size
668 addl $4,%esp # pop the size arg.
669 movl %eax,%edi # setup the destination.
670 popl %edx # Restore eax, ecx and edx.
674 SIZE(GNAME(alloc_to_edi))
676 .globl GNAME(alloc_8_to_edi)
677 TYPE(GNAME(alloc_8_to_edi))
678 .align align_4byte,0x90
679 GNAME(alloc_8_to_edi):
680 pushl %eax # Save eax, ecx, and edx as C could destroy them.
683 pushl $8 # Push the size
685 addl $4,%esp # pop the size arg.
686 movl %eax,%edi # setup the destination.
687 popl %edx # Restore eax, ecx and edx.
691 SIZE(GNAME(alloc_8_to_edi))
693 .globl GNAME(alloc_16_to_edi)
694 TYPE(GNAME(alloc_16_to_edi))
695 .align align_4byte,0x90
696 GNAME(alloc_16_to_edi):
697 pushl %eax # Save eax, ecx, and edx as C could destroy them.
700 pushl $16 # Push the size
702 addl $4,%esp # pop the size arg.
703 movl %eax,%edi # setup the destination.
704 popl %edx # Restore eax, ecx and edx.
708 SIZE(GNAME(alloc_16_to_edi))
711 /* Called from lisp when an inline allocation overflows.
712 Every register except the result needs to be preserved.
713 We depend on C to preserve ebx, esi, edi, and ebp.
714 But where necessary must save eax, ecx, edx. */
716 #ifdef LISP_FEATURE_SB_THREAD
717 #define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET
719 #define START_REGION GNAME(boxed_region)
722 /* This routine handles an overflow with eax=crfp+size. So the
725 .globl GNAME(alloc_overflow_eax)
726 TYPE(GNAME(alloc_overflow_eax))
727 GNAME(alloc_overflow_eax):
728 pushl %ecx # Save ecx
729 pushl %edx # Save edx
730 /* Calculate the size for the allocation. */
731 subl START_REGION,%eax
732 pushl %eax # Push the size
734 addl $4,%esp # pop the size arg.
735 popl %edx # Restore edx.
736 popl %ecx # Restore ecx.
738 SIZE(GNAME(alloc_overflow_eax))
741 .globl GNAME(alloc_overflow_ecx)
742 TYPE(GNAME(alloc_overflow_ecx))
743 GNAME(alloc_overflow_ecx):
744 pushl %eax # Save eax
745 pushl %edx # Save edx
746 /* Calculate the size for the allocation. */
747 subl START_REGION,%ecx
748 pushl %ecx # Push the size
750 addl $4,%esp # pop the size arg.
751 movl %eax,%ecx # setup the destination.
752 popl %edx # Restore edx.
753 popl %eax # Restore eax.
755 SIZE(GNAME(alloc_overflow_ecx))
758 .globl GNAME(alloc_overflow_edx)
759 TYPE(GNAME(alloc_overflow_edx))
760 GNAME(alloc_overflow_edx):
761 pushl %eax # Save eax
762 pushl %ecx # Save ecx
763 /* Calculate the size for the allocation. */
764 subl START_REGION,%edx
765 pushl %edx # Push the size
767 addl $4,%esp # pop the size arg.
768 movl %eax,%edx # setup the destination.
769 popl %ecx # Restore ecx.
770 popl %eax # Restore eax.
772 SIZE(GNAME(alloc_overflow_edx))
774 /* This routine handles an overflow with ebx=crfp+size. So the
777 .globl GNAME(alloc_overflow_ebx)
778 TYPE(GNAME(alloc_overflow_ebx))
779 GNAME(alloc_overflow_ebx):
780 pushl %eax # Save eax
781 pushl %ecx # Save ecx
782 pushl %edx # Save edx
783 /* Calculate the size for the allocation. */
784 subl START_REGION,%ebx
785 pushl %ebx # Push the size
787 addl $4,%esp # pop the size arg.
788 movl %eax,%ebx # setup the destination.
789 popl %edx # Restore edx.
790 popl %ecx # Restore ecx.
791 popl %eax # Restore eax.
793 SIZE(GNAME(alloc_overflow_ebx))
795 /* This routine handles an overflow with esi=crfp+size. So the
798 .globl GNAME(alloc_overflow_esi)
799 TYPE(GNAME(alloc_overflow_esi))
800 GNAME(alloc_overflow_esi):
801 pushl %eax # Save eax
802 pushl %ecx # Save ecx
803 pushl %edx # Save edx
804 /* Calculate the size for the allocation. */
805 subl START_REGION,%esi
806 pushl %esi # Push the size
808 addl $4,%esp # pop the size arg.
809 movl %eax,%esi # setup the destination.
810 popl %edx # Restore edx.
811 popl %ecx # Restore ecx.
812 popl %eax # Restore eax.
814 SIZE(GNAME(alloc_overflow_esi))
817 .globl GNAME(alloc_overflow_edi)
818 TYPE(GNAME(alloc_overflow_edi))
819 GNAME(alloc_overflow_edi):
820 pushl %eax # Save eax
821 pushl %ecx # Save ecx
822 pushl %edx # Save edx
823 /* Calculate the size for the allocation. */
824 subl START_REGION,%edi
825 pushl %edi # Push the size
827 addl $4,%esp # pop the size arg.
828 movl %eax,%edi # setup the destination.
829 popl %edx # Restore edx.
830 popl %ecx # Restore ecx.
831 popl %eax # Restore eax.
833 SIZE(GNAME(alloc_overflow_edi))
835 .align align_4byte,0x90
836 .globl GNAME(post_signal_tramp)
837 TYPE(GNAME(post_signal_tramp))
838 GNAME(post_signal_tramp):
839 /* this is notionally the second half of a function whose first half
840 * doesn't exist. This is where call_into_lisp returns when called
841 * using return_to_lisp_function */
842 addl $12,%esp /* clear call_into_lisp args from stack */
843 popal /* restore registers */
847 SIZE(GNAME(post_signal_tramp))
849 #ifdef LISP_FEATURE_WIN32
851 * This is part of the funky magic for exception handling on win32.
852 * see sigtrap_emulator() in win32-os.c for details.
854 .globl GNAME(sigtrap_trampoline)
855 GNAME(sigtrap_trampoline):
859 call GNAME(sigtrap_wrapper)
863 .byte trap_ContextRestore
864 hlt # We should never return here.
867 * This is part of the funky magic for exception handling on win32.
868 * see handle_exception() in win32-os.c for details.
870 .globl GNAME(exception_trampoline)
871 GNAME(exception_trampoline):
875 call GNAME(handle_win32_exception_wrapper)
879 .byte trap_ContextRestore
880 hlt # We should never return here.
883 /* fast_bzero implementations and code to detect which implementation
887 .globl GNAME(fast_bzero_pointer)
890 GNAME(fast_bzero_pointer):
891 /* Variable containing a pointer to the bzero function to use.
892 * Initially points to a basic function. Change this variable
893 * to fast_bzero_detect if OS supports SSE. */
894 .long GNAME(fast_bzero_base)
897 .align align_8byte,0x90
898 .globl GNAME(fast_bzero)
899 TYPE(GNAME(fast_bzero))
901 /* Indirect function call */
902 jmp *GNAME(fast_bzero_pointer)
903 SIZE(GNAME(fast_bzero))
907 .align align_8byte,0x90
908 .globl GNAME(fast_bzero_detect)
909 TYPE(GNAME(fast_bzero_detect))
910 GNAME(fast_bzero_detect):
911 /* Decide whether to use SSE, MMX or REP version */
912 push %eax /* CPUID uses EAX-EDX */
918 test $0x04000000, %edx /* SSE2 needed for MOVNTDQ */
920 /* Originally there was another case here for using the
921 * MOVNTQ instruction for processors that supported MMX but
922 * not SSE2. This turned out to be a loss especially on
923 * Athlons (where this instruction is apparently microcoded
924 * somewhat slowly). So for simplicity revert to REP STOSL
925 * for all non-SSE2 processors.
928 movl $(GNAME(fast_bzero_base)), GNAME(fast_bzero_pointer)
931 movl $(GNAME(fast_bzero_sse)), GNAME(fast_bzero_pointer)
939 jmp *GNAME(fast_bzero_pointer)
941 SIZE(GNAME(fast_bzero_detect))
945 .align align_8byte,0x90
946 .globl GNAME(fast_bzero_sse)
947 TYPE(GNAME(fast_bzero_sse))
949 GNAME(fast_bzero_sse):
950 /* A fast routine for zero-filling blocks of memory that are
951 * guaranteed to start and end at a 4096-byte aligned address.
953 push %esi /* Save temporary registers */
955 mov 16(%esp), %esi /* Parameter: amount of bytes to fill */
956 mov 12(%esp), %edi /* Parameter: start address */
957 shr $6, %esi /* Amount of 64-byte blocks to copy */
958 jz Lend_sse /* If none, stop */
959 movups %xmm7, -16(%esp) /* Save XMM register */
960 xorps %xmm7, %xmm7 /* Zero the XMM register */
965 /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
966 * non-caching double-quadword moving variant, i.e. the memory areas
967 * we're touching are not fetched into the L1 cache, since we're just
968 * going to overwrite the memory soon anyway.
970 movntdq %xmm7, 0(%edi)
971 movntdq %xmm7, 16(%edi)
972 movntdq %xmm7, 32(%edi)
973 movntdq %xmm7, 48(%edi)
975 add $64, %edi /* Advance pointer */
976 dec %esi /* Decrement 64-byte block count */
978 movups -16(%esp), %xmm7 /* Restore the XMM register */
979 sfence /* Ensure that weakly ordered writes are flushed. */
981 mov 12(%esp), %esi /* Parameter: start address */
982 prefetcht0 0(%esi) /* Prefetch the start of the block into cache,
983 * since it's likely to be used immediately. */
984 pop %edi /* Restore temp registers */
987 SIZE(GNAME(fast_bzero_sse))
991 .align align_8byte,0x90
992 .globl GNAME(fast_bzero_base)
993 TYPE(GNAME(fast_bzero_base))
995 GNAME(fast_bzero_base):
996 /* A fast routine for zero-filling blocks of memory that are
997 * guaranteed to start and end at a 4096-byte aligned address.
999 push %eax /* Save temporary registers */
1002 mov 20(%esp), %ecx /* Parameter: amount of bytes to fill */
1003 mov 16(%esp), %edi /* Parameter: start address */
1004 xor %eax, %eax /* Zero EAX */
1005 shr $2, %ecx /* Amount of 4-byte blocks to copy */
1007 cld /* Set direction of STOSL to increment */
1010 stosl /* Store EAX to *EDI, ECX times, incrementing
1011 * EDI by 4 after each store */
1014 pop %edi /* Restore temp registers */
1018 SIZE(GNAME(fast_bzero_base))