e61ecbe9997a062398db4c468eda24c4bc811622
[sbcl.git] / src / runtime / thread.c
1 /*
2  * This software is part of the SBCL system. See the README file for
3  * more information.
4  *
5  * This software is derived from the CMU CL system, which was
6  * written at Carnegie Mellon University and released into the
7  * public domain. The software is in the public domain and is
8  * provided with absolutely no warranty. See the COPYING and CREDITS
9  * files for more information.
10  */
11
12 #include "sbcl.h"
13
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #ifndef LISP_FEATURE_WIN32
18 #include <sched.h>
19 #endif
20 #include <signal.h>
21 #include <stddef.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #ifndef LISP_FEATURE_WIN32
25 #include <sys/wait.h>
26 #endif
27
28 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
29 #include <mach/mach.h>
30 #include <mach/mach_error.h>
31 #include <mach/mach_types.h>
32 #endif
33
34 #include "runtime.h"
35 #include "validate.h"           /* for CONTROL_STACK_SIZE etc */
36 #include "alloc.h"
37 #include "thread.h"
38 #include "arch.h"
39 #include "target-arch-os.h"
40 #include "os.h"
41 #include "globals.h"
42 #include "dynbind.h"
43 #include "genesis/cons.h"
44 #include "genesis/fdefn.h"
45 #include "interr.h"             /* for lose() */
46 #include "gc-internal.h"
47
48 #ifdef LISP_FEATURE_WIN32
49 /*
50  * Win32 doesn't have SIGSTKSZ, and we're not switching stacks anyway,
51  * so define it arbitrarily
52  */
53 #define SIGSTKSZ 1024
54 #endif
55
56 #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_SB_THREAD)
57 #define QUEUE_FREEABLE_THREAD_STACKS
58 #define LOCK_CREATE_THREAD
59 #endif
60
61 #ifdef LISP_FEATURE_FREEBSD
62 #define CREATE_CLEANUP_THREAD
63 #define LOCK_CREATE_THREAD
64 #endif
65
66 #define ALIEN_STACK_SIZE (1*1024*1024) /* 1Mb size chosen at random */
67
68 struct freeable_stack {
69 #ifdef QUEUE_FREEABLE_THREAD_STACKS
70     struct freeable_stack *next;
71 #endif
72     os_thread_t os_thread;
73     os_vm_address_t stack;
74 };
75
76
77 #ifdef QUEUE_FREEABLE_THREAD_STACKS
78 static struct freeable_stack * volatile freeable_stack_queue = 0;
79 static int freeable_stack_count = 0;
80 pthread_mutex_t freeable_stack_lock = PTHREAD_MUTEX_INITIALIZER;
81 #else
82 static struct freeable_stack * volatile freeable_stack = 0;
83 #endif
84
85 int dynamic_values_bytes=4096*sizeof(lispobj);  /* same for all threads */
86 struct thread * volatile all_threads;
87 extern struct interrupt_data * global_interrupt_data;
88
89 #ifdef LISP_FEATURE_SB_THREAD
90 pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER;
91 #ifdef LOCK_CREATE_THREAD
92 static pthread_mutex_t create_thread_lock = PTHREAD_MUTEX_INITIALIZER;
93 #endif
94 #endif
95
96 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
97 extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs);
98 #endif
99
100 static void
101 link_thread(struct thread *th)
102 {
103     if (all_threads) all_threads->prev=th;
104     th->next=all_threads;
105     th->prev=0;
106     all_threads=th;
107 }
108
109 #ifdef LISP_FEATURE_SB_THREAD
110 static void
111 unlink_thread(struct thread *th)
112 {
113     if (th->prev)
114         th->prev->next = th->next;
115     else
116         all_threads = th->next;
117     if (th->next)
118         th->next->prev = th->prev;
119 }
120 #endif
121
122 static int
123 initial_thread_trampoline(struct thread *th)
124 {
125     lispobj function;
126 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
127     lispobj *args = NULL;
128 #endif
129     function = th->no_tls_value_marker;
130     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
131     if(arch_os_thread_init(th)==0) return 1;
132     link_thread(th);
133     th->os_thread=thread_self();
134 #ifndef LISP_FEATURE_WIN32
135     protect_control_stack_guard_page(1);
136 #endif
137
138 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
139     return call_into_lisp_first_time(function,args,0);
140 #else
141     return funcall0(function);
142 #endif
143 }
144
145 #define THREAD_STRUCT_SIZE (THREAD_CONTROL_STACK_SIZE + BINDING_STACK_SIZE + \
146                             ALIEN_STACK_SIZE + dynamic_values_bytes + \
147                             32 * SIGSTKSZ)
148
149 #ifdef LISP_FEATURE_SB_THREAD
150
151 #ifdef QUEUE_FREEABLE_THREAD_STACKS
152
153 static void
154 queue_freeable_thread_stack(struct thread *thread_to_be_cleaned_up)
155 {
156      if (thread_to_be_cleaned_up) {
157         pthread_mutex_lock(&freeable_stack_lock);
158         if (freeable_stack_queue) {
159             struct freeable_stack *new_freeable_stack = 0, *next;
160             next = freeable_stack_queue;
161             while (next->next) {
162                 next = next->next;
163             }
164             new_freeable_stack = (struct freeable_stack *)
165                 os_validate(0, sizeof(struct freeable_stack));
166             new_freeable_stack->next = NULL;
167             new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
168             new_freeable_stack->stack = (os_vm_address_t)
169                 thread_to_be_cleaned_up->control_stack_start;
170             next->next = new_freeable_stack;
171             freeable_stack_count++;
172         } else {
173             struct freeable_stack *new_freeable_stack = 0;
174             new_freeable_stack = (struct freeable_stack *)
175                 os_validate(0, sizeof(struct freeable_stack));
176             new_freeable_stack->next = NULL;
177             new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
178             new_freeable_stack->stack = (os_vm_address_t)
179                 thread_to_be_cleaned_up->control_stack_start;
180             freeable_stack_queue = new_freeable_stack;
181             freeable_stack_count++;
182         }
183         pthread_mutex_unlock(&freeable_stack_lock);
184     }
185 }
186
187 #define FREEABLE_STACK_QUEUE_SIZE 4
188
189 static void
190 free_freeable_stacks() {
191     if (freeable_stack_queue && (freeable_stack_count > FREEABLE_STACK_QUEUE_SIZE)) {
192         struct freeable_stack* old;
193         pthread_mutex_lock(&freeable_stack_lock);
194         old = freeable_stack_queue;
195         freeable_stack_queue = old->next;
196         freeable_stack_count--;
197         gc_assert(pthread_join(old->os_thread, NULL) == 0);
198         FSHOW((stderr, "freeing thread %x stack\n", old->os_thread));
199         os_invalidate(old->stack, THREAD_STRUCT_SIZE);
200         os_invalidate((os_vm_address_t)old, sizeof(struct freeable_stack));
201         pthread_mutex_unlock(&freeable_stack_lock);
202     }
203 }
204
205 #elif defined(CREATE_CLEANUP_THREAD)
206 static void *
207 cleanup_thread(void *arg)
208 {
209     struct freeable_stack *freeable = arg;
210     pthread_t self = pthread_self();
211
212     FSHOW((stderr, "/cleaner thread(%p): joining %p\n",
213            self, freeable->os_thread));
214     gc_assert(pthread_join(freeable->os_thread, NULL) == 0);
215     FSHOW((stderr, "/cleaner thread(%p): free stack %p\n",
216            self, freeable->stack));
217     os_invalidate(freeable->stack, THREAD_STRUCT_SIZE);
218     free(freeable);
219
220     pthread_detach(self);
221
222     return NULL;
223 }
224
225 static void
226 create_cleanup_thread(struct thread *thread_to_be_cleaned_up)
227 {
228     pthread_t thread;
229     int result;
230
231     if (thread_to_be_cleaned_up) {
232         struct freeable_stack *freeable =
233             malloc(sizeof(struct freeable_stack));
234         gc_assert(freeable != NULL);
235         freeable->os_thread = thread_to_be_cleaned_up->os_thread;
236         freeable->stack =
237             (os_vm_address_t) thread_to_be_cleaned_up->control_stack_start;
238         result = pthread_create(&thread, NULL, cleanup_thread, freeable);
239         gc_assert(result == 0);
240         sched_yield();
241     }
242 }
243
244 #else
245 static void
246 free_thread_stack_later(struct thread *thread_to_be_cleaned_up)
247 {
248     struct freeable_stack *new_freeable_stack = 0;
249     if (thread_to_be_cleaned_up) {
250         new_freeable_stack = (struct freeable_stack *)
251             os_validate(0, sizeof(struct freeable_stack));
252         new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
253         new_freeable_stack->stack = (os_vm_address_t)
254             thread_to_be_cleaned_up->control_stack_start;
255     }
256     new_freeable_stack = (struct freeable_stack *)
257         swap_lispobjs((lispobj *)(void *)&freeable_stack,
258                       (lispobj)new_freeable_stack);
259     if (new_freeable_stack) {
260         FSHOW((stderr,"/reaping %p\n", (void*) new_freeable_stack->os_thread));
261         /* Under NPTL pthread_join really waits until the thread
262          * exists and the stack can be safely freed. This is sadly not
263          * mandated by the pthread spec. */
264         gc_assert(pthread_join(new_freeable_stack->os_thread, NULL) == 0);
265         os_invalidate(new_freeable_stack->stack, THREAD_STRUCT_SIZE);
266         os_invalidate((os_vm_address_t) new_freeable_stack,
267                       sizeof(struct freeable_stack));
268     }
269 }
270 #endif
271
272 /* this is the first thing that runs in the child (which is why the
273  * silly calling convention).  Basically it calls the user's requested
274  * lisp function after doing arch_os_thread_init and whatever other
275  * bookkeeping needs to be done
276  */
277 int
278 new_thread_trampoline(struct thread *th)
279 {
280     lispobj function;
281     int result, lock_ret;
282
283     FSHOW((stderr,"/creating thread %lu\n", thread_self()));
284     function = th->no_tls_value_marker;
285     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
286     if(arch_os_thread_init(th)==0) {
287         /* FIXME: handle error */
288         lose("arch_os_thread_init failed\n");
289     }
290
291     th->os_thread=thread_self();
292     protect_control_stack_guard_page(1);
293     /* Since GC can only know about this thread from the all_threads
294      * list and we're just adding this thread to it there is no danger
295      * of deadlocking even with SIG_STOP_FOR_GC blocked (which it is
296      * not). */
297     lock_ret = pthread_mutex_lock(&all_threads_lock);
298     gc_assert(lock_ret == 0);
299     link_thread(th);
300     lock_ret = pthread_mutex_unlock(&all_threads_lock);
301     gc_assert(lock_ret == 0);
302
303     result = funcall0(function);
304
305     /* Block GC */
306     block_blockable_signals();
307     th->state=STATE_DEAD;
308
309     /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
310      * thread, but since we are already dead it won't wait long. */
311     lock_ret = pthread_mutex_lock(&all_threads_lock);
312     gc_assert(lock_ret == 0);
313
314     gc_alloc_update_page_tables(0, &th->alloc_region);
315     unlink_thread(th);
316     pthread_mutex_unlock(&all_threads_lock);
317     gc_assert(lock_ret == 0);
318
319     if(th->tls_cookie>=0) arch_os_thread_cleanup(th);
320     os_invalidate((os_vm_address_t)th->interrupt_data,
321                   (sizeof (struct interrupt_data)));
322
323 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
324     FSHOW((stderr, "Deallocating mach port %x\n", THREAD_STRUCT_TO_EXCEPTION_PORT(th)));
325     mach_port_move_member(mach_task_self(),
326                           THREAD_STRUCT_TO_EXCEPTION_PORT(th),
327                           MACH_PORT_NULL);
328     mach_port_deallocate(mach_task_self(),
329                          THREAD_STRUCT_TO_EXCEPTION_PORT(th));
330     mach_port_destroy(mach_task_self(),
331                       THREAD_STRUCT_TO_EXCEPTION_PORT(th));
332 #endif
333
334 #ifdef QUEUE_FREEABLE_THREAD_STACKS
335     queue_freeable_thread_stack(th);
336 #elif defined(CREATE_CLEANUP_THREAD)
337     create_cleanup_thread(th);
338 #else
339     free_thread_stack_later(th);
340 #endif
341
342     FSHOW((stderr,"/exiting thread %p\n", thread_self()));
343     return result;
344 }
345
346 #endif /* LISP_FEATURE_SB_THREAD */
347
348 static void
349 free_thread_struct(struct thread *th)
350 {
351     if (th->interrupt_data)
352         os_invalidate((os_vm_address_t) th->interrupt_data,
353                       (sizeof (struct interrupt_data)));
354     os_invalidate((os_vm_address_t) th->control_stack_start,
355                   THREAD_STRUCT_SIZE);
356 }
357
358 /* this is called from any other thread to create the new one, and
359  * initialize all parts of it that can be initialized from another
360  * thread
361  */
362
363 static struct thread *
364 create_thread_struct(lispobj initial_function) {
365     union per_thread_data *per_thread;
366     struct thread *th=0;        /*  subdue gcc */
367     void *spaces=0;
368 #ifdef LISP_FEATURE_SB_THREAD
369     int i;
370 #endif
371
372 #ifdef CREATE_CLEANUP_THREAD
373     /* Give a chance for cleanup threads to run. */
374     sched_yield();
375 #endif
376     /* May as well allocate all the spaces at once: it saves us from
377      * having to decide what to do if only some of the allocations
378      * succeed.  SPACES must be page-aligned, since the GC expects the
379      * control stack to start at a page boundary.  We can't rely on the
380      * alignment passed from os_validate, since that might assume the
381      * current (e.g. 4k) pagesize, while we calculate with the biggest
382      * (e.g. 64k) pagesize allowed by the ABI.  */
383     spaces=os_validate(0, THREAD_STRUCT_SIZE + BACKEND_PAGE_SIZE);
384     if(!spaces)
385          return NULL;
386     spaces = (void *)((((unsigned long)(char *)spaces)
387                        + BACKEND_PAGE_SIZE - 1)
388                       & ~(BACKEND_PAGE_SIZE - 1));
389     per_thread=(union per_thread_data *)
390         (spaces+
391          THREAD_CONTROL_STACK_SIZE+
392          BINDING_STACK_SIZE+
393          ALIEN_STACK_SIZE);
394
395 #ifdef LISP_FEATURE_SB_THREAD
396     for(i = 0; i < (dynamic_values_bytes / sizeof(lispobj)); i++)
397         per_thread->dynamic_values[i] = NO_TLS_VALUE_MARKER_WIDETAG;
398     if (all_threads == 0) {
399         if(SymbolValue(FREE_TLS_INDEX,0)==UNBOUND_MARKER_WIDETAG) {
400             SetSymbolValue
401                 (FREE_TLS_INDEX,
402                  /* FIXME: should be MAX_INTERRUPTS -1 ? */
403                  make_fixnum(MAX_INTERRUPTS+
404                              sizeof(struct thread)/sizeof(lispobj)),
405                  0);
406             SetSymbolValue(TLS_INDEX_LOCK,make_fixnum(0),0);
407         }
408 #define STATIC_TLS_INIT(sym,field) \
409   ((struct symbol *)(sym-OTHER_POINTER_LOWTAG))->tls_index= \
410   make_fixnum(THREAD_SLOT_OFFSET_WORDS(field))
411
412         STATIC_TLS_INIT(BINDING_STACK_START,binding_stack_start);
413         STATIC_TLS_INIT(BINDING_STACK_POINTER,binding_stack_pointer);
414         STATIC_TLS_INIT(CONTROL_STACK_START,control_stack_start);
415         STATIC_TLS_INIT(CONTROL_STACK_END,control_stack_end);
416         STATIC_TLS_INIT(ALIEN_STACK,alien_stack_pointer);
417 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
418         STATIC_TLS_INIT(PSEUDO_ATOMIC_BITS,pseudo_atomic_bits);
419 #endif
420 #undef STATIC_TLS_INIT
421     }
422 #endif
423
424     th=&per_thread->thread;
425     th->control_stack_start = spaces;
426     th->binding_stack_start=
427         (lispobj*)((void*)th->control_stack_start+THREAD_CONTROL_STACK_SIZE);
428     th->control_stack_end = th->binding_stack_start;
429     th->alien_stack_start=
430         (lispobj*)((void*)th->binding_stack_start+BINDING_STACK_SIZE);
431     th->binding_stack_pointer=th->binding_stack_start;
432     th->this=th;
433     th->os_thread=0;
434     th->state=STATE_RUNNING;
435 #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD
436     th->alien_stack_pointer=((void *)th->alien_stack_start
437                              + ALIEN_STACK_SIZE-N_WORD_BYTES);
438 #else
439     th->alien_stack_pointer=((void *)th->alien_stack_start);
440 #endif
441 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
442     th->pseudo_atomic_bits=0;
443 #endif
444 #ifdef LISP_FEATURE_GENCGC
445     gc_set_region_empty(&th->alloc_region);
446 #endif
447
448 #ifndef LISP_FEATURE_SB_THREAD
449     /* the tls-points-into-struct-thread trick is only good for threaded
450      * sbcl, because unithread sbcl doesn't have tls.  So, we copy the
451      * appropriate values from struct thread here, and make sure that
452      * we use the appropriate SymbolValue macros to access any of the
453      * variable quantities from the C runtime.  It's not quite OAOOM,
454      * it just feels like it */
455     SetSymbolValue(BINDING_STACK_START,(lispobj)th->binding_stack_start,th);
456     SetSymbolValue(CONTROL_STACK_START,(lispobj)th->control_stack_start,th);
457     SetSymbolValue(CONTROL_STACK_END,(lispobj)th->control_stack_end,th);
458 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
459     SetSymbolValue(BINDING_STACK_POINTER,(lispobj)th->binding_stack_pointer,th);
460     SetSymbolValue(ALIEN_STACK,(lispobj)th->alien_stack_pointer,th);
461     SetSymbolValue(PSEUDO_ATOMIC_BITS,(lispobj)th->pseudo_atomic_bits,th);
462 #else
463     current_binding_stack_pointer=th->binding_stack_pointer;
464     current_control_stack_pointer=th->control_stack_start;
465 #endif
466 #endif
467     bind_variable(CURRENT_CATCH_BLOCK,make_fixnum(0),th);
468     bind_variable(CURRENT_UNWIND_PROTECT_BLOCK,make_fixnum(0),th);
469     bind_variable(FREE_INTERRUPT_CONTEXT_INDEX,make_fixnum(0),th);
470     bind_variable(INTERRUPT_PENDING, NIL,th);
471     bind_variable(INTERRUPTS_ENABLED,T,th);
472     bind_variable(ALLOW_WITH_INTERRUPTS,T,th);
473     bind_variable(GC_PENDING,NIL,th);
474 #ifdef LISP_FEATURE_SB_THREAD
475     bind_variable(STOP_FOR_GC_PENDING,NIL,th);
476 #endif
477
478     th->interrupt_data = (struct interrupt_data *)
479         os_validate(0,(sizeof (struct interrupt_data)));
480     if (!th->interrupt_data) {
481         free_thread_struct(th);
482         return 0;
483     }
484     th->interrupt_data->pending_handler = 0;
485     th->no_tls_value_marker=initial_function;
486
487     th->stepping = NIL;
488     return th;
489 }
490
491 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
492 mach_port_t setup_mach_exception_handling_thread();
493 kern_return_t mach_thread_init(mach_port_t thread_exception_port);
494
495 #endif
496
497 void create_initial_thread(lispobj initial_function) {
498     struct thread *th=create_thread_struct(initial_function);
499     if(th) {
500 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
501         kern_return_t ret;
502
503         setup_mach_exception_handling_thread();
504 #endif
505         initial_thread_trampoline(th); /* no return */
506     } else lose("can't create initial thread\n");
507 }
508
509 #ifdef LISP_FEATURE_SB_THREAD
510
511 #ifndef __USE_XOPEN2K
512 extern int pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr,
513                                   size_t __stacksize);
514 #endif
515
516 boolean create_os_thread(struct thread *th,os_thread_t *kid_tid)
517 {
518     /* The new thread inherits the restrictive signal mask set here,
519      * and enables signals again when it is set up properly. */
520     pthread_attr_t attr;
521     sigset_t newset,oldset;
522     boolean r=1;
523     int retcode, initcode, sizecode, addrcode;
524
525     FSHOW_SIGNAL((stderr,"/create_os_thread: creating new thread\n"));
526
527 #ifdef LOCK_CREATE_THREAD
528     retcode = pthread_mutex_lock(&create_thread_lock);
529     gc_assert(retcode == 0);
530     FSHOW_SIGNAL((stderr,"/create_os_thread: got lock\n"));
531 #endif
532     sigemptyset(&newset);
533     /* Blocking deferrable signals is enough, no need to block
534      * SIG_STOP_FOR_GC because the child process is not linked onto
535      * all_threads until it's ready. */
536     sigaddset_deferrable(&newset);
537     thread_sigmask(SIG_BLOCK, &newset, &oldset);
538
539 #if defined(LISP_FEATURE_DARWIN)
540 #define CONTROL_STACK_ADJUST 8192 /* darwin wants page-aligned stacks */
541 #else
542 #define CONTROL_STACK_ADJUST 16
543 #endif
544
545     if((initcode = pthread_attr_init(&attr)) ||
546        /* FIXME: why do we even have this in the first place? */
547        (pthread_attr_setstack(&attr,th->control_stack_start,
548                               THREAD_CONTROL_STACK_SIZE-CONTROL_STACK_ADJUST)) ||
549 #undef CONTROL_STACK_ADJUST
550        (retcode = pthread_create
551         (kid_tid,&attr,(void *(*)(void *))new_thread_trampoline,th))) {
552         FSHOW_SIGNAL((stderr, "init, size, addr = %d, %d, %d\n", initcode, sizecode, addrcode));
553         FSHOW_SIGNAL((stderr, printf("pthread_create returned %d, errno %d\n", retcode, errno)));
554         FSHOW_SIGNAL((stderr, "wanted stack size %d, min stack size %d\n",
555                       THREAD_CONTROL_STACK_SIZE-16, PTHREAD_STACK_MIN));
556         if(retcode < 0) {
557             perror("create_os_thread");
558         }
559         r=0;
560     }
561
562 #ifdef QUEUE_FREEABLE_THREAD_STACKS
563     free_freeable_stacks();
564 #endif
565     thread_sigmask(SIG_SETMASK,&oldset,0);
566 #ifdef LOCK_CREATE_THREAD
567     retcode = pthread_mutex_unlock(&create_thread_lock);
568     gc_assert(retcode == 0);
569     FSHOW_SIGNAL((stderr,"/create_os_thread: released lock\n"));
570 #endif
571     return r;
572 }
573
574 os_thread_t create_thread(lispobj initial_function) {
575     struct thread *th;
576     os_thread_t kid_tid;
577
578     /* Assuming that a fresh thread struct has no lisp objects in it,
579      * linking it to all_threads can be left to the thread itself
580      * without fear of gc lossage. initial_function violates this
581      * assumption and must stay pinned until the child starts up. */
582     th = create_thread_struct(initial_function);
583     if(th==0) return 0;
584
585     if (create_os_thread(th,&kid_tid)) {
586         return kid_tid;
587     } else {
588         free_thread_struct(th);
589         return 0;
590     }
591 }
592
593 /* Send the signo to os_thread, retry if the rt signal queue is
594  * full. */
595 int
596 kill_thread_safely(os_thread_t os_thread, int signo)
597 {
598     int r;
599     /* The man page does not mention EAGAIN as a valid return value
600      * for either pthread_kill or kill. But that's theory, this is
601      * practice. By waiting here we assume that the delivery of this
602      * signal is not necessary for the delivery of the signals in the
603      * queue. In other words, we _assume_ there are no deadlocks. */
604     while ((r=pthread_kill(os_thread,signo))==EAGAIN) {
605         /* wait a bit then try again in the hope of the rt signal
606          * queue not being full */
607         FSHOW_SIGNAL((stderr,"/rt signal queue full\n"));
608         /* FIXME: some kind of backoff (random, exponential) would be
609          * nice. */
610         sleep(1);
611     }
612     return r;
613 }
614
615 int signal_interrupt_thread(os_thread_t os_thread)
616 {
617     int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD);
618     if (status == 0) {
619         return 0;
620     } else if (status == ESRCH) {
621         return -1;
622     } else {
623         lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s\n",
624              os_thread, status, strerror(status));
625     }
626 }
627
628 /* stopping the world is a two-stage process.  From this thread we signal
629  * all the others with SIG_STOP_FOR_GC.  The handler for this signal does
630  * the usual pseudo-atomic checks (we don't want to stop a thread while
631  * it's in the middle of allocation) then waits for another SIG_STOP_FOR_GC.
632  */
633
634 /* To avoid deadlocks when gc stops the world all clients of each
635  * mutex must enable or disable SIG_STOP_FOR_GC for the duration of
636  * holding the lock, but they must agree on which. */
637 void gc_stop_the_world()
638 {
639     struct thread *p,*th=arch_os_get_current_thread();
640     int status, lock_ret;
641 #ifdef LOCK_CREATE_THREAD
642     /* KLUDGE: Stopping the thread during pthread_create() causes deadlock
643      * on FreeBSD. */
644     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on create_thread_lock, thread=%lu\n",
645                   th->os_thread));
646     lock_ret = pthread_mutex_lock(&create_thread_lock);
647     gc_assert(lock_ret == 0);
648     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got create_thread_lock, thread=%lu\n",
649                   th->os_thread));
650 #endif
651     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on lock, thread=%lu\n",
652                   th->os_thread));
653     /* keep threads from starting while the world is stopped. */
654     lock_ret = pthread_mutex_lock(&all_threads_lock);      \
655     gc_assert(lock_ret == 0);
656
657     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got lock, thread=%lu\n",
658                   th->os_thread));
659     /* stop all other threads by sending them SIG_STOP_FOR_GC */
660     for(p=all_threads; p; p=p->next) {
661         gc_assert(p->os_thread != 0);
662         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: p->state: %x\n", p->state));
663         if((p!=th) && ((p->state==STATE_RUNNING))) {
664             FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending %x, os_thread %x\n",
665                           p, p->os_thread));
666             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
667             if (status==ESRCH) {
668                 /* This thread has exited. */
669                 gc_assert(p->state==STATE_DEAD);
670             } else if (status) {
671                 lose("cannot send suspend thread=%lu: %d, %s\n",
672                      p->os_thread,status,strerror(status));
673             }
674         }
675     }
676     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:signals sent\n"));
677     /* wait for the running threads to stop or finish */
678     for(p=all_threads;p;) {
679         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: th: %p, p: %p\n", th, p));
680         if((p!=th) && (p->state==STATE_RUNNING)) {
681             sched_yield();
682         } else {
683             p=p->next;
684         }
685     }
686     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:end\n"));
687 }
688
689 void gc_start_the_world()
690 {
691     struct thread *p,*th=arch_os_get_current_thread();
692     int status, lock_ret;
693     /* if a resumed thread creates a new thread before we're done with
694      * this loop, the new thread will get consed on the front of
695      * all_threads, but it won't have been stopped so won't need
696      * restarting */
697     FSHOW_SIGNAL((stderr,"/gc_start_the_world:begin\n"));
698     for(p=all_threads;p;p=p->next) {
699         gc_assert(p->os_thread!=0);
700         if((p!=th) && (p->state!=STATE_DEAD)) {
701             if(p->state!=STATE_SUSPENDED) {
702                 lose("gc_start_the_world: wrong thread state is %d\n",
703                      fixnum_value(p->state));
704             }
705             FSHOW_SIGNAL((stderr, "/gc_start_the_world: resuming %lu\n",
706                           p->os_thread));
707             p->state=STATE_RUNNING;
708
709 #if defined(SIG_RESUME_FROM_GC)
710             status=kill_thread_safely(p->os_thread,SIG_RESUME_FROM_GC);
711 #else
712             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
713 #endif
714             if (status) {
715                 lose("cannot resume thread=%lu: %d, %s\n",
716                      p->os_thread,status,strerror(status));
717             }
718         }
719     }
720     /* If we waited here until all threads leave STATE_SUSPENDED, then
721      * SIG_STOP_FOR_GC wouldn't need to be a rt signal. That has some
722      * performance implications, but does away with the 'rt signal
723      * queue full' problem. */
724
725     lock_ret = pthread_mutex_unlock(&all_threads_lock);
726     gc_assert(lock_ret == 0);
727 #ifdef LOCK_CREATE_THREAD
728     lock_ret = pthread_mutex_unlock(&create_thread_lock);
729     gc_assert(lock_ret == 0);
730 #endif
731
732     FSHOW_SIGNAL((stderr,"/gc_start_the_world:end\n"));
733 }
734 #endif