1.0.19.33: Improved interrupt handling on darwin/x86[-64]
[sbcl.git] / src / runtime / x86-darwin-os.c
1
2
3 #ifdef LISP_FEATURE_SB_THREAD
4 #include <architecture/i386/table.h>
5 #include <i386/user_ldt.h>
6 #include <mach/mach_init.h>
7 #endif
8
9 #include "thread.h"
10 #include "validate.h"
11 #include "runtime.h"
12 #include "interrupt.h"
13 #include "x86-darwin-os.h"
14 #include "genesis/fdefn.h"
15
16 #include <mach/mach.h>
17 #include <mach/mach_error.h>
18 #include <mach/mach_types.h>
19 #include <mach/sync_policy.h>
20 #include <mach/vm_region.h>
21 #include <mach/machine/thread_state.h>
22 #include <mach/machine/thread_status.h>
23 #include <sys/_types.h>
24 #include <sys/ucontext.h>
25 #include <pthread.h>
26 #include <assert.h>
27 #include <stdlib.h>
28
29 #ifdef LISP_FEATURE_SB_THREAD
30
31 pthread_mutex_t modify_ldt_lock = PTHREAD_MUTEX_INITIALIZER;
32
33 void set_data_desc_size(data_desc_t* desc, unsigned long size)
34 {
35     desc->limit00 = (size - 1) & 0xffff;
36     desc->limit16 = ((size - 1) >> 16) &0xf;
37 }
38
39 void set_data_desc_addr(data_desc_t* desc, void* addr)
40 {
41     desc->base00 = (unsigned int)addr & 0xffff;
42     desc->base16 = ((unsigned int)addr & 0xff0000) >> 16;
43     desc->base24 = ((unsigned int)addr & 0xff000000) >> 24;
44 }
45
46 #endif
47
48 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
49 kern_return_t mach_thread_init(mach_port_t thread_exception_port);
50 #endif
51
52 int arch_os_thread_init(struct thread *thread) {
53 #ifdef LISP_FEATURE_SB_THREAD
54     int n;
55     sel_t sel;
56
57     data_desc_t ldt_entry = { 0, 0, 0, DESC_DATA_WRITE,
58                               3, 1, 0, DESC_DATA_32B, DESC_GRAN_BYTE, 0 };
59
60     set_data_desc_addr(&ldt_entry, thread);
61     set_data_desc_size(&ldt_entry, dynamic_values_bytes);
62
63     thread_mutex_lock(&modify_ldt_lock);
64     n = i386_set_ldt(LDT_AUTO_ALLOC, (union ldt_entry*) &ldt_entry, 1);
65
66     if (n < 0) {
67         perror("i386_set_ldt");
68         lose("unexpected i386_set_ldt(..) failure\n");
69     }
70     thread_mutex_unlock(&modify_ldt_lock);
71
72     FSHOW_SIGNAL((stderr, "/ TLS: Allocated LDT %x\n", n));
73     sel.index = n;
74     sel.rpl = USER_PRIV;
75     sel.ti = SEL_LDT;
76
77     __asm__ __volatile__ ("mov %0, %%fs" : : "r"(sel));
78
79     thread->tls_cookie=n;
80     pthread_setspecific(specials,thread);
81 #endif
82 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
83     mach_thread_init(THREAD_STRUCT_TO_EXCEPTION_PORT(thread));
84 #endif
85
86 #ifdef LISP_FEATURE_C_STACK_IS_CONTROL_STACK
87     stack_t sigstack;
88
89     /* Signal handlers are run on the control stack, so if it is exhausted
90      * we had better use an alternate stack for whatever signal tells us
91      * we've exhausted it */
92     sigstack.ss_sp=((void *) thread)+dynamic_values_bytes;
93     sigstack.ss_flags=0;
94     sigstack.ss_size = 32*SIGSTKSZ;
95     sigaltstack(&sigstack,0);
96 #endif
97     return 1;                  /* success */
98 }
99
100 int arch_os_thread_cleanup(struct thread *thread) {
101 #if defined(LISP_FEATURE_SB_THREAD)
102     int n = thread->tls_cookie;
103
104     /* Set the %%fs register back to 0 and free the ldt by setting it
105      * to NULL.
106      */
107     FSHOW_SIGNAL((stderr, "/ TLS: Freeing LDT %x\n", n));
108
109     __asm__ __volatile__ ("mov %0, %%fs" : : "r"(0));
110     thread_mutex_lock(&modify_ldt_lock);
111     i386_set_ldt(n, NULL, 1);
112     thread_mutex_unlock(&modify_ldt_lock);
113 #endif
114     return 1;                  /* success */
115 }
116
117 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
118
119 void sigill_handler(int signal, siginfo_t *siginfo, void *void_context);
120 void sigtrap_handler(int signal, siginfo_t *siginfo, void *void_context);
121 void memory_fault_handler(int signal, siginfo_t *siginfo, void *void_context);
122
123 /* exc_server handles mach exception messages from the kernel and
124  * calls catch exception raise. We use the system-provided
125  * mach_msg_server, which, I assume, calls exc_server in a loop.
126  *
127  */
128 extern boolean_t exc_server();
129
130 /* This executes in the faulting thread as part of the signal
131  * emulation.  It is passed a context with the uc_mcontext field
132  * pointing to a valid block of memory. */
133 void build_fake_signal_context(os_context_t *context,
134                                x86_thread_state32_t *thread_state,
135                                x86_float_state32_t *float_state) {
136     pthread_sigmask(0, NULL, &context->uc_sigmask);
137     context->uc_mcontext->SS = *thread_state;
138     context->uc_mcontext->FS = *float_state;
139 }
140
141 /* This executes in the faulting thread as part of the signal
142  * emulation.  It is effectively the inverse operation from above. */
143 void update_thread_state_from_context(x86_thread_state32_t *thread_state,
144                                       x86_float_state32_t *float_state,
145                                       os_context_t *context) {
146     *thread_state = context->uc_mcontext->SS;
147     *float_state = context->uc_mcontext->FS;
148     pthread_sigmask(SIG_SETMASK, &context->uc_sigmask, NULL);
149 }
150
151 /* Modify a context to push new data on its stack. */
152 void push_context(u32 data, x86_thread_state32_t *thread_state)
153 {
154     u32 *stack_pointer;
155
156     stack_pointer = (u32*) thread_state->ESP;
157     *(--stack_pointer) = data;
158     thread_state->ESP = (unsigned int) stack_pointer;
159 }
160
161 void align_context_stack(x86_thread_state32_t *thread_state)
162 {
163     /* 16byte align the stack (provided that the stack is, as it
164      * should be, 4byte aligned. */
165     while (thread_state->ESP & 15) push_context(0, thread_state);
166 }
167
168 /* Stack allocation starts with a context that has a mod-4 ESP value
169  * and needs to leave a context with a mod-16 ESP that will restore
170  * the old ESP value and other register state when activated.  The
171  * first part of this is the recovery trampoline, which loads ESP from
172  * EBP, pops EBP, and returns. */
173 asm("_stack_allocation_recover: movl %ebp, %esp; popl %ebp; ret;");
174
175 void open_stack_allocation(x86_thread_state32_t *thread_state)
176 {
177     void stack_allocation_recover(void);
178
179     push_context(thread_state->EIP, thread_state);
180     push_context(thread_state->EBP, thread_state);
181     thread_state->EBP = thread_state->ESP;
182     thread_state->EIP = (unsigned int) stack_allocation_recover;
183
184     align_context_stack(thread_state);
185 }
186
187 /* Stack allocation of data starts with a context with a mod-16 ESP
188  * value and reserves some space on it by manipulating the ESP
189  * register. */
190 void *stack_allocate(x86_thread_state32_t *thread_state, size_t size)
191 {
192     /* round up size to 16byte multiple */
193     size = (size + 15) & -16;
194
195     thread_state->ESP = ((u32)thread_state->ESP) - size;
196
197     return (void *)thread_state->ESP;
198 }
199
200 /* Arranging to invoke a C function is tricky, as we have to assume
201  * cdecl calling conventions (caller removes args) and x86/darwin
202  * alignment requirements.  The simplest way to arrange this,
203  * actually, is to open a new stack allocation.
204  * WARNING!!! THIS DOES NOT PRESERVE REGISTERS! */
205 void call_c_function_in_context(x86_thread_state32_t *thread_state,
206                                 void *function,
207                                 int nargs,
208                                 ...)
209 {
210     va_list ap;
211     int i;
212     u32 *stack_pointer;
213
214     /* Set up to restore stack on exit. */
215     open_stack_allocation(thread_state);
216
217     /* Have to keep stack 16byte aligned on x86/darwin. */
218     for (i = (3 & -nargs); i; i--) {
219         push_context(0, thread_state);
220     }
221
222     thread_state->ESP = ((u32)thread_state->ESP) - nargs * 4;
223     stack_pointer = (u32 *)thread_state->ESP;
224
225     va_start(ap, nargs);
226     for (i = 0; i < nargs; i++) {
227         //push_context(va_arg(ap, u32), thread_state);
228         stack_pointer[i] = va_arg(ap, u32);
229     }
230     va_end(ap);
231
232     push_context(thread_state->EIP, thread_state);
233     thread_state->EIP = (unsigned int) function;
234 }
235
236 void signal_emulation_wrapper(x86_thread_state32_t *thread_state,
237                               x86_float_state32_t *float_state,
238                               int signal,
239                               siginfo_t *siginfo,
240                               void (*handler)(int, siginfo_t *, void *))
241 {
242
243     /* CLH: FIXME **NOTE: HACK ALERT!** Ideally, we would allocate
244      * context and regs on the stack as local variables, but this
245      * causes problems for the lisp debugger. When it walks the stack
246      * for a back trace, it sees the 1) address of the local variable
247      * on the stack and thinks that is a frame pointer to a lisp
248      * frame, and, 2) the address of the sap that we alloc'ed in
249      * dynamic space and thinks that is a return address, so it,
250      * heuristicly (and wrongly), chooses that this should be
251      * interpreted as a lisp frame instead of as a C frame.
252      * We can work around this in this case by os_validating the
253      * context (and regs just for symmetry).
254      */
255
256     os_context_t *context;
257 #if MAC_OS_X_VERSION_10_5
258     struct __darwin_mcontext32 *regs;
259 #else
260     struct mcontext *regs;
261 #endif
262
263     context = (os_context_t*) os_validate(0, sizeof(os_context_t));
264 #if MAC_OS_X_VERSION_10_5
265     regs = (struct __darwin_mcontext32*) os_validate(0, sizeof(struct __darwin_mcontext32));
266 #else
267     regs = (struct mcontext*) os_validate(0, sizeof(struct mcontext));
268 #endif
269     context->uc_mcontext = regs;
270
271     /* when BSD signals are fired, they mask they signals in sa_mask
272        which always seem to be the blockable_sigset, for us, so we
273        need to:
274        1) save the current sigmask
275        2) block blockable signals
276        3) call the signal handler
277        4) restore the sigmask */
278
279     build_fake_signal_context(context, thread_state, float_state);
280
281     block_blockable_signals();
282
283     handler(signal, siginfo, context);
284
285     update_thread_state_from_context(thread_state, float_state, context);
286
287     os_invalidate((os_vm_address_t)context, sizeof(os_context_t));
288 #if MAC_OS_X_VERSION_10_5
289     os_invalidate((os_vm_address_t)regs, sizeof(struct __darwin_mcontext32));
290 #else
291     os_invalidate((os_vm_address_t)regs, sizeof(struct mcontext));
292 #endif
293
294     /* Trap to restore the signal context. */
295     asm volatile ("movl %0, %%eax; movl %1, %%ebx; .long 0xffff0b0f"
296                   : : "r" (thread_state), "r" (float_state));
297 }
298
299 /* Convenience wrapper for the above */
300 void call_handler_on_thread(mach_port_t thread,
301                             x86_thread_state32_t *thread_state,
302                             int signal,
303                             siginfo_t *siginfo,
304                             void (*handler)(int, siginfo_t *, void *))
305 {
306     x86_thread_state32_t new_state;
307     x86_thread_state32_t *save_thread_state;
308     x86_float_state32_t *save_float_state;
309     mach_msg_type_number_t state_count;
310     siginfo_t *save_siginfo;
311     kern_return_t ret;
312     /* Initialize the new state */
313     new_state = *thread_state;
314     open_stack_allocation(&new_state);
315     stack_allocate(&new_state, 256);
316     /* Save old state */
317     save_thread_state = (x86_thread_state32_t *)stack_allocate(&new_state, sizeof(*save_thread_state));
318     *save_thread_state = *thread_state;
319     /* Save float state */
320     save_float_state = (x86_float_state32_t *)stack_allocate(&new_state, sizeof(*save_float_state));
321     state_count = x86_FLOAT_STATE32_COUNT;
322     if ((ret = thread_get_state(thread,
323                                 x86_FLOAT_STATE32,
324                                 (thread_state_t)save_float_state,
325                                 &state_count)) != KERN_SUCCESS)
326         lose("thread_get_state (x86_THREAD_STATE32) failed %d\n", ret);
327     /* Set up siginfo */
328     save_siginfo = stack_allocate(&new_state, sizeof(*siginfo));
329     if (siginfo == NULL)
330         save_siginfo = siginfo;
331     else
332         *save_siginfo = *siginfo;
333     /* Prepare to call */
334     call_c_function_in_context(&new_state,
335                                signal_emulation_wrapper,
336                                5,
337                                save_thread_state,
338                                save_float_state,
339                                signal,
340                                save_siginfo,
341                                handler);
342     /* Update the thread state */
343     state_count = x86_THREAD_STATE32_COUNT;
344     if ((ret = thread_set_state(thread,
345                                 x86_THREAD_STATE32,
346                                 (thread_state_t)&new_state,
347                                 state_count)) != KERN_SUCCESS)
348         lose("thread_set_state (x86_FLOAT_STATE32) failed %d\n", ret);
349
350 }
351
352 #if defined DUMP_CONTEXT
353 void dump_context(x86_thread_state32_t *thread_state)
354 {
355     int i;
356     u32 *stack_pointer;
357
358     printf("eax: %08lx  ecx: %08lx  edx: %08lx  ebx: %08lx\n",
359            thread_state->EAX, thread_state->ECX, thread_state->EDX, thread_state->EAX);
360     printf("esp: %08lx  ebp: %08lx  esi: %08lx  edi: %08lx\n",
361            thread_state->ESP, thread_state->EBP, thread_state->ESI, thread_state->EDI);
362     printf("eip: %08lx  eflags: %08lx\n",
363            thread_state->EIP, thread_state->EFLAGS);
364     printf("cs: %04hx  ds: %04hx  es: %04hx  "
365            "ss: %04hx  fs: %04hx  gs: %04hx\n",
366            thread_state->CS,
367            thread_state->DS,
368            thread_state->ES,
369            thread_state->SS,
370            thread_state->FS,
371            thread_state->GS);
372
373     stack_pointer = (u32 *)thread_state->ESP;
374     for (i = 0; i < 48; i+=4) {
375         printf("%08x:  %08x %08x %08x %08x\n",
376                thread_state->ESP + (i * 4),
377                stack_pointer[i],
378                stack_pointer[i+1],
379                stack_pointer[i+2],
380                stack_pointer[i+3]);
381     }
382 }
383 #endif
384
385 void
386 control_stack_exhausted_handler(int signal, siginfo_t *siginfo, void *void_context) {
387     os_context_t *context = arch_os_get_context(&void_context);
388
389     arrange_return_to_lisp_function
390         (context, StaticSymbolFunction(CONTROL_STACK_EXHAUSTED_ERROR));
391 }
392
393 void
394 undefined_alien_handler(int signal, siginfo_t *siginfo, void *void_context) {
395     os_context_t *context = arch_os_get_context(&void_context);
396
397     arrange_return_to_lisp_function
398         (context, StaticSymbolFunction(UNDEFINED_ALIEN_VARIABLE_ERROR));
399 }
400
401 kern_return_t
402 catch_exception_raise(mach_port_t exception_port,
403                       mach_port_t thread,
404                       mach_port_t task,
405                       exception_type_t exception,
406                       exception_data_t code_vector,
407                       mach_msg_type_number_t code_count)
408 {
409     struct thread *th = (struct thread*) exception_port;
410     x86_thread_state32_t thread_state;
411     mach_msg_type_number_t state_count;
412     vm_address_t region_addr;
413     vm_size_t region_size;
414     vm_region_basic_info_data_t region_info;
415     mach_msg_type_number_t info_count;
416     mach_port_t region_name;
417     void *addr = NULL;
418     int signal = 0;
419     void (*handler)(int, siginfo_t *, void *) = NULL;
420     siginfo_t siginfo;
421     kern_return_t ret;
422
423     /* Get state and info */
424     state_count = x86_THREAD_STATE32_COUNT;
425     if ((ret = thread_get_state(thread,
426                                 x86_THREAD_STATE32,
427                                 (thread_state_t)&thread_state,
428                                 &state_count)) != KERN_SUCCESS)
429         lose("thread_get_state (x86_THREAD_STATE32) failed %d\n", ret);
430     switch (exception) {
431     case EXC_BAD_ACCESS:
432         signal = SIGBUS;
433         /* Check if write protection fault */
434         if ((code_vector[0] & OS_VM_PROT_ALL) == 0) {
435             ret = KERN_INVALID_RIGHT;
436             break;
437         }
438         addr = (void*)code_vector[1];
439         /* Undefined alien */
440         if (os_trunc_to_page(addr) == undefined_alien_address) {
441             handler = undefined_alien_handler;
442             break;
443         }
444         /* At stack guard */
445         if (os_trunc_to_page(addr) == CONTROL_STACK_GUARD_PAGE(th)) {
446             protect_control_stack_guard_page_thread(0, th);
447             protect_control_stack_return_guard_page_thread(1, th);
448             handler = control_stack_exhausted_handler;
449             break;
450         }
451         /* Return from stack guard */
452         if (os_trunc_to_page(addr) == CONTROL_STACK_RETURN_GUARD_PAGE(th)) {
453             protect_control_stack_guard_page_thread(1, th);
454             protect_control_stack_return_guard_page_thread(0, th);
455             break;
456         }
457         /* Regular memory fault */
458         handler = memory_fault_handler;
459         break;
460     case EXC_BAD_INSTRUCTION:
461         signal = SIGTRAP;
462         /* Check if illegal instruction trap */
463         if (code_vector[0] != EXC_I386_INVOP) {
464             ret = KERN_INVALID_RIGHT;
465             break;
466         }
467         /* Check if UD2 instruction */
468         if (*(unsigned short *)thread_state.EIP != 0x0b0f) {
469             /* KLUDGE: There are two ways we could get here:
470              * 1) We're executing data and we've hit some truly
471              *    illegal opcode, of which there are a few, see
472              *    Intel 64 and IA-32 Architectures
473              *    Sofware Developer's Manual
474              *    Volume 3A page 5-34)
475              * 2) The kernel started an unrelated signal handler
476              *    before we got a chance to run. The context that
477              *    caused the exception is saved in a stack frame
478              *    somewhere down below.
479              * In either case we rely on the exception to retrigger,
480              * eventually bailing out if we're spinning on case 2).
481              */
482             static mach_port_t last_thread;
483             static unsigned int last_eip;
484             if (last_thread == thread && last_eip == thread_state.EIP)
485                 ret = KERN_INVALID_RIGHT;
486             else
487                 ret = KERN_SUCCESS;
488             last_thread = thread;
489             last_eip = thread_state.EIP;
490             break;
491         }
492         /* Skip the trap code */
493         thread_state.EIP += 2;
494         /* Return from handler? */
495         if (*(unsigned short *)thread_state.EIP == 0xffff) {
496             if ((ret = thread_set_state(thread,
497                                         x86_THREAD_STATE32,
498                                         (thread_state_t)thread_state.EAX,
499                                         x86_THREAD_STATE32_COUNT)) != KERN_SUCCESS)
500                 lose("thread_set_state (x86_THREAD_STATE32) failed %d\n", ret);
501             if ((ret = thread_set_state(thread,
502                                         x86_FLOAT_STATE32,
503                                         (thread_state_t)thread_state.EBX,
504                                         x86_FLOAT_STATE32_COUNT)) != KERN_SUCCESS)
505                 lose("thread_set_state (x86_FLOAT_STATE32) failed %d\n", ret);
506             break;
507         }
508         /* Trap call */
509         handler = sigtrap_handler;
510         break;
511     default:
512         ret = KERN_INVALID_RIGHT;
513     }
514     /* Call handler */
515     if (handler != 0) {
516       siginfo.si_signo = signal;
517       siginfo.si_addr = addr;
518       call_handler_on_thread(thread, &thread_state, signal, &siginfo, handler);
519     }
520     return ret;
521 }
522
523 void *
524 mach_exception_handler(void *port)
525 {
526   mach_msg_server(exc_server, 2048, (mach_port_t) port, 0);
527   /* mach_msg_server should never return, but it should dispatch mach
528    * exceptions to our catch_exception_raise function
529    */
530   abort();
531 }
532
533 #endif
534
535 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
536
537 /* Sets up the thread that will listen for mach exceptions. note that
538    the exception handlers will be run on this thread. This is
539    different from the BSD-style signal handling situation in which the
540    signal handlers run in the relevant thread directly. */
541
542 mach_port_t mach_exception_handler_port_set = MACH_PORT_NULL;
543
544 pthread_t
545 setup_mach_exception_handling_thread()
546 {
547     kern_return_t ret;
548     pthread_t mach_exception_handling_thread = NULL;
549     pthread_attr_t attr;
550
551     /* allocate a mach_port for this process */
552     ret = mach_port_allocate(mach_task_self(),
553                              MACH_PORT_RIGHT_PORT_SET,
554                              &mach_exception_handler_port_set);
555
556     /* create the thread that will receive the mach exceptions */
557
558     FSHOW((stderr, "Creating mach_exception_handler thread!\n"));
559
560     pthread_attr_init(&attr);
561     pthread_create(&mach_exception_handling_thread,
562                    &attr,
563                    mach_exception_handler,
564                    (void*) mach_exception_handler_port_set);
565     pthread_attr_destroy(&attr);
566
567     return mach_exception_handling_thread;
568 }
569
570 /* tell the kernel that we want EXC_BAD_ACCESS exceptions sent to the
571    exception port (which is being listened to do by the mach
572    exception handling thread). */
573 kern_return_t
574 mach_thread_init(mach_port_t thread_exception_port)
575 {
576     kern_return_t ret;
577     /* allocate a named port for the thread */
578
579     FSHOW((stderr, "Allocating mach port %x\n", thread_exception_port));
580
581     ret = mach_port_allocate_name(mach_task_self(),
582                                   MACH_PORT_RIGHT_RECEIVE,
583                                   thread_exception_port);
584     if (ret) {
585         lose("mach_port_allocate_name failed with return_code %d\n", ret);
586     }
587
588     /* establish the right for the thread_exception_port to send messages */
589     ret = mach_port_insert_right(mach_task_self(),
590                                  thread_exception_port,
591                                  thread_exception_port,
592                                  MACH_MSG_TYPE_MAKE_SEND);
593     if (ret) {
594         lose("mach_port_insert_right failed with return_code %d\n", ret);
595     }
596
597     ret = thread_set_exception_ports(mach_thread_self(),
598                                      EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION,
599                                      thread_exception_port,
600                                      EXCEPTION_DEFAULT,
601                                      THREAD_STATE_NONE);
602     if (ret) {
603         lose("thread_set_exception_port failed with return_code %d\n", ret);
604     }
605
606     ret = mach_port_move_member(mach_task_self(),
607                                 thread_exception_port,
608                                 mach_exception_handler_port_set);
609     if (ret) {
610         lose("mach_port_ failed with return_code %d\n", ret);
611     }
612
613     return ret;
614 }
615
616 void
617 setup_mach_exceptions() {
618     setup_mach_exception_handling_thread();
619     mach_thread_init(THREAD_STRUCT_TO_EXCEPTION_PORT(all_threads));
620 }
621
622 pid_t
623 mach_fork() {
624     pid_t pid = fork();
625     if (pid == 0) {
626         setup_mach_exceptions();
627         return pid;
628     } else {
629         return pid;
630     }
631 }
632
633 #endif