5b8ec4ac475f9b4c8ed26a6f38fa97ee411423af
[sbcl.git] / src / runtime / thread.c
1 /*
2  * This software is part of the SBCL system. See the README file for
3  * more information.
4  *
5  * This software is derived from the CMU CL system, which was
6  * written at Carnegie Mellon University and released into the
7  * public domain. The software is in the public domain and is
8  * provided with absolutely no warranty. See the COPYING and CREDITS
9  * files for more information.
10  */
11
12 #include "sbcl.h"
13
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #ifndef LISP_FEATURE_WIN32
18 #include <sched.h>
19 #endif
20 #include <signal.h>
21 #include <stddef.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #ifndef LISP_FEATURE_WIN32
25 #include <sys/wait.h>
26 #endif
27
28 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
29 #include <mach/mach.h>
30 #include <mach/mach_error.h>
31 #include <mach/mach_types.h>
32 #endif
33
34 #include "runtime.h"
35 #include "validate.h"           /* for CONTROL_STACK_SIZE etc */
36 #include "alloc.h"
37 #include "thread.h"
38 #include "arch.h"
39 #include "target-arch-os.h"
40 #include "os.h"
41 #include "globals.h"
42 #include "dynbind.h"
43 #include "genesis/cons.h"
44 #include "genesis/fdefn.h"
45 #include "interr.h"             /* for lose() */
46 #include "gc-internal.h"
47
48 #ifdef LISP_FEATURE_WIN32
49 /*
50  * Win32 doesn't have SIGSTKSZ, and we're not switching stacks anyway,
51  * so define it arbitrarily
52  */
53 #define SIGSTKSZ 1024
54 #endif
55
56 #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_SB_THREAD)
57 #define QUEUE_FREEABLE_THREAD_STACKS
58 #define LOCK_CREATE_THREAD
59 #endif
60
61 #ifdef LISP_FEATURE_FREEBSD
62 #define CREATE_CLEANUP_THREAD
63 #define LOCK_CREATE_THREAD
64 #endif
65
66 #define ALIEN_STACK_SIZE (1*1024*1024) /* 1Mb size chosen at random */
67
68 struct freeable_stack {
69 #ifdef QUEUE_FREEABLE_THREAD_STACKS
70     struct freeable_stack *next;
71 #endif
72     os_thread_t os_thread;
73     os_vm_address_t os_address;
74 };
75
76
77 #ifdef QUEUE_FREEABLE_THREAD_STACKS
78 static struct freeable_stack * volatile freeable_stack_queue = 0;
79 static int freeable_stack_count = 0;
80 pthread_mutex_t freeable_stack_lock = PTHREAD_MUTEX_INITIALIZER;
81 #else
82 static struct freeable_stack * volatile freeable_stack = 0;
83 #endif
84
85 int dynamic_values_bytes=4096*sizeof(lispobj);  /* same for all threads */
86 struct thread * volatile all_threads;
87 extern struct interrupt_data * global_interrupt_data;
88
89 #ifdef LISP_FEATURE_SB_THREAD
90 pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER;
91 #ifdef LOCK_CREATE_THREAD
92 static pthread_mutex_t create_thread_lock = PTHREAD_MUTEX_INITIALIZER;
93 #endif
94 #ifdef LISP_FEATURE_GCC_TLS
95 __thread struct thread *current_thread;
96 #endif
97 #endif
98
99 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
100 extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs);
101 #endif
102
103 static void
104 link_thread(struct thread *th)
105 {
106     if (all_threads) all_threads->prev=th;
107     th->next=all_threads;
108     th->prev=0;
109     all_threads=th;
110 }
111
112 #ifdef LISP_FEATURE_SB_THREAD
113 static void
114 unlink_thread(struct thread *th)
115 {
116     if (th->prev)
117         th->prev->next = th->next;
118     else
119         all_threads = th->next;
120     if (th->next)
121         th->next->prev = th->prev;
122 }
123 #endif
124
125 static int
126 initial_thread_trampoline(struct thread *th)
127 {
128     lispobj function;
129 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
130     lispobj *args = NULL;
131 #endif
132     function = th->no_tls_value_marker;
133     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
134     if(arch_os_thread_init(th)==0) return 1;
135     link_thread(th);
136     th->os_thread=thread_self();
137 #ifndef LISP_FEATURE_WIN32
138     protect_control_stack_guard_page(1);
139 #endif
140
141 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
142     return call_into_lisp_first_time(function,args,0);
143 #else
144     return funcall0(function);
145 #endif
146 }
147
148 #define THREAD_STRUCT_SIZE (THREAD_CONTROL_STACK_SIZE + BINDING_STACK_SIZE + \
149                             ALIEN_STACK_SIZE + dynamic_values_bytes + \
150                             32 * SIGSTKSZ + \
151                             BACKEND_PAGE_SIZE)
152
153 #ifdef LISP_FEATURE_SB_THREAD
154
155 #ifdef QUEUE_FREEABLE_THREAD_STACKS
156
157 static void
158 queue_freeable_thread_stack(struct thread *thread_to_be_cleaned_up)
159 {
160      struct freeable_stack *new_freeable_stack = 0;
161      if (thread_to_be_cleaned_up) {
162         /* FIXME: os_validate is mmap -- for small things like these
163          * malloc would probably perform better. */
164         new_freeable_stack = (struct freeable_stack *)
165             os_validate(0, sizeof(struct freeable_stack));
166         new_freeable_stack->next = NULL;
167         new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
168         new_freeable_stack->os_address = thread_to_be_cleaned_up->os_address;
169         pthread_mutex_lock(&freeable_stack_lock);
170         if (freeable_stack_queue) {
171             struct freeable_stack *next;
172             next = freeable_stack_queue;
173             while (next->next) {
174                 next = next->next;
175             }
176             next->next = new_freeable_stack;
177         } else {
178             freeable_stack_queue = new_freeable_stack;
179         }
180         freeable_stack_count++;
181         pthread_mutex_unlock(&freeable_stack_lock);
182     }
183 }
184
185 #define FREEABLE_STACK_QUEUE_SIZE 4
186
187 static void
188 free_freeable_stacks() {
189     if (freeable_stack_queue && (freeable_stack_count > FREEABLE_STACK_QUEUE_SIZE)) {
190         struct freeable_stack* old;
191         pthread_mutex_lock(&freeable_stack_lock);
192         old = freeable_stack_queue;
193         freeable_stack_queue = old->next;
194         freeable_stack_count--;
195         gc_assert(pthread_join(old->os_thread, NULL) == 0);
196         FSHOW((stderr, "freeing thread %x stack\n", old->os_thread));
197         os_invalidate(old->os_address, THREAD_STRUCT_SIZE);
198         os_invalidate((os_vm_address_t)old, sizeof(struct freeable_stack));
199         pthread_mutex_unlock(&freeable_stack_lock);
200     }
201 }
202
203 #elif defined(CREATE_CLEANUP_THREAD)
204 static void *
205 cleanup_thread(void *arg)
206 {
207     struct freeable_stack *freeable = arg;
208     pthread_t self = pthread_self();
209
210     FSHOW((stderr, "/cleaner thread(%p): joining %p\n",
211            self, freeable->os_thread));
212     gc_assert(pthread_join(freeable->os_thread, NULL) == 0);
213     FSHOW((stderr, "/cleaner thread(%p): free stack %p\n",
214            self, freeable->stack));
215     os_invalidate(freeable->os_address, THREAD_STRUCT_SIZE);
216     free(freeable);
217
218     pthread_detach(self);
219
220     return NULL;
221 }
222
223 static void
224 create_cleanup_thread(struct thread *thread_to_be_cleaned_up)
225 {
226     pthread_t thread;
227     int result;
228
229     if (thread_to_be_cleaned_up) {
230         struct freeable_stack *freeable =
231             malloc(sizeof(struct freeable_stack));
232         gc_assert(freeable != NULL);
233         freeable->os_thread = thread_to_be_cleaned_up->os_thread;
234         freeable->os_address =
235             (os_vm_address_t) thread_to_be_cleaned_up->os_address;
236         result = pthread_create(&thread, NULL, cleanup_thread, freeable);
237         gc_assert(result == 0);
238         sched_yield();
239     }
240 }
241
242 #else
243 static void
244 free_thread_stack_later(struct thread *thread_to_be_cleaned_up)
245 {
246     struct freeable_stack *new_freeable_stack = 0;
247     if (thread_to_be_cleaned_up) {
248         new_freeable_stack = (struct freeable_stack *)
249             os_validate(0, sizeof(struct freeable_stack));
250         new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
251         new_freeable_stack->os_address = (os_vm_address_t)
252             thread_to_be_cleaned_up->os_address;
253     }
254     new_freeable_stack = (struct freeable_stack *)
255         swap_lispobjs((lispobj *)(void *)&freeable_stack,
256                       (lispobj)new_freeable_stack);
257     if (new_freeable_stack) {
258         FSHOW((stderr,"/reaping %p\n", (void*) new_freeable_stack->os_thread));
259         /* Under NPTL pthread_join really waits until the thread
260          * exists and the stack can be safely freed. This is sadly not
261          * mandated by the pthread spec. */
262         gc_assert(pthread_join(new_freeable_stack->os_thread, NULL) == 0);
263         os_invalidate(new_freeable_stack->os_address, THREAD_STRUCT_SIZE);
264         os_invalidate((os_vm_address_t) new_freeable_stack,
265                       sizeof(struct freeable_stack));
266     }
267 }
268 #endif
269
270 /* this is the first thing that runs in the child (which is why the
271  * silly calling convention).  Basically it calls the user's requested
272  * lisp function after doing arch_os_thread_init and whatever other
273  * bookkeeping needs to be done
274  */
275 int
276 new_thread_trampoline(struct thread *th)
277 {
278     lispobj function;
279     int result, lock_ret;
280
281     FSHOW((stderr,"/creating thread %lu\n", thread_self()));
282     function = th->no_tls_value_marker;
283     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
284     if(arch_os_thread_init(th)==0) {
285         /* FIXME: handle error */
286         lose("arch_os_thread_init failed\n");
287     }
288
289     th->os_thread=thread_self();
290     protect_control_stack_guard_page(1);
291     /* Since GC can only know about this thread from the all_threads
292      * list and we're just adding this thread to it there is no danger
293      * of deadlocking even with SIG_STOP_FOR_GC blocked (which it is
294      * not). */
295     lock_ret = pthread_mutex_lock(&all_threads_lock);
296     gc_assert(lock_ret == 0);
297     link_thread(th);
298     lock_ret = pthread_mutex_unlock(&all_threads_lock);
299     gc_assert(lock_ret == 0);
300
301     result = funcall0(function);
302
303     /* Block GC */
304     block_blockable_signals();
305     th->state=STATE_DEAD;
306
307     /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
308      * thread, but since we are already dead it won't wait long. */
309     lock_ret = pthread_mutex_lock(&all_threads_lock);
310     gc_assert(lock_ret == 0);
311
312     gc_alloc_update_page_tables(0, &th->alloc_region);
313     unlink_thread(th);
314     pthread_mutex_unlock(&all_threads_lock);
315     gc_assert(lock_ret == 0);
316
317     if(th->tls_cookie>=0) arch_os_thread_cleanup(th);
318     os_invalidate((os_vm_address_t)th->interrupt_data,
319                   (sizeof (struct interrupt_data)));
320
321 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
322     FSHOW((stderr, "Deallocating mach port %x\n", THREAD_STRUCT_TO_EXCEPTION_PORT(th)));
323     mach_port_move_member(mach_task_self(),
324                           THREAD_STRUCT_TO_EXCEPTION_PORT(th),
325                           MACH_PORT_NULL);
326     mach_port_deallocate(mach_task_self(),
327                          THREAD_STRUCT_TO_EXCEPTION_PORT(th));
328     mach_port_destroy(mach_task_self(),
329                       THREAD_STRUCT_TO_EXCEPTION_PORT(th));
330 #endif
331
332 #ifdef QUEUE_FREEABLE_THREAD_STACKS
333     queue_freeable_thread_stack(th);
334 #elif defined(CREATE_CLEANUP_THREAD)
335     create_cleanup_thread(th);
336 #else
337     free_thread_stack_later(th);
338 #endif
339
340     FSHOW((stderr,"/exiting thread %p\n", thread_self()));
341     return result;
342 }
343
344 #endif /* LISP_FEATURE_SB_THREAD */
345
346 static void
347 free_thread_struct(struct thread *th)
348 {
349     if (th->interrupt_data)
350         os_invalidate((os_vm_address_t) th->interrupt_data,
351                       (sizeof (struct interrupt_data)));
352     os_invalidate((os_vm_address_t) th->os_address,
353                   THREAD_STRUCT_SIZE);
354 }
355
356 /* this is called from any other thread to create the new one, and
357  * initialize all parts of it that can be initialized from another
358  * thread
359  */
360
361 static struct thread *
362 create_thread_struct(lispobj initial_function) {
363     union per_thread_data *per_thread;
364     struct thread *th=0;        /*  subdue gcc */
365     void *spaces=0;
366     void *aligned_spaces=0;
367 #ifdef LISP_FEATURE_SB_THREAD
368     int i;
369 #endif
370
371 #ifdef CREATE_CLEANUP_THREAD
372     /* Give a chance for cleanup threads to run. */
373     sched_yield();
374 #endif
375     /* May as well allocate all the spaces at once: it saves us from
376      * having to decide what to do if only some of the allocations
377      * succeed.  SPACES must be page-aligned, since the GC expects the
378      * control stack to start at a page boundary.  We can't rely on the
379      * alignment passed from os_validate, since that might assume the
380      * current (e.g. 4k) pagesize, while we calculate with the biggest
381      * (e.g. 64k) pagesize allowed by the ABI.  */
382     spaces=os_validate(0, THREAD_STRUCT_SIZE);
383     if(!spaces)
384         return NULL;
385     /* Aligning up is safe as THREAD_STRUCT_SIZE has BACKEND_PAGE_SIZE
386      * padding. */
387     aligned_spaces = (void *)((((unsigned long)(char *)spaces)
388                                + BACKEND_PAGE_SIZE - 1)
389                               & ~(unsigned long)(BACKEND_PAGE_SIZE - 1));
390     per_thread=(union per_thread_data *)
391         (aligned_spaces+
392          THREAD_CONTROL_STACK_SIZE+
393          BINDING_STACK_SIZE+
394          ALIEN_STACK_SIZE);
395
396 #ifdef LISP_FEATURE_SB_THREAD
397     for(i = 0; i < (dynamic_values_bytes / sizeof(lispobj)); i++)
398         per_thread->dynamic_values[i] = NO_TLS_VALUE_MARKER_WIDETAG;
399     if (all_threads == 0) {
400         if(SymbolValue(FREE_TLS_INDEX,0)==UNBOUND_MARKER_WIDETAG) {
401             SetSymbolValue
402                 (FREE_TLS_INDEX,
403                  /* FIXME: should be MAX_INTERRUPTS -1 ? */
404                  make_fixnum(MAX_INTERRUPTS+
405                              sizeof(struct thread)/sizeof(lispobj)),
406                  0);
407             SetSymbolValue(TLS_INDEX_LOCK,make_fixnum(0),0);
408         }
409 #define STATIC_TLS_INIT(sym,field) \
410   ((struct symbol *)(sym-OTHER_POINTER_LOWTAG))->tls_index= \
411   make_fixnum(THREAD_SLOT_OFFSET_WORDS(field))
412
413         STATIC_TLS_INIT(BINDING_STACK_START,binding_stack_start);
414         STATIC_TLS_INIT(BINDING_STACK_POINTER,binding_stack_pointer);
415         STATIC_TLS_INIT(CONTROL_STACK_START,control_stack_start);
416         STATIC_TLS_INIT(CONTROL_STACK_END,control_stack_end);
417         STATIC_TLS_INIT(ALIEN_STACK,alien_stack_pointer);
418 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
419         STATIC_TLS_INIT(PSEUDO_ATOMIC_BITS,pseudo_atomic_bits);
420 #endif
421 #undef STATIC_TLS_INIT
422     }
423 #endif
424
425     th=&per_thread->thread;
426     th->os_address = spaces;
427     th->control_stack_start = aligned_spaces;
428     th->binding_stack_start=
429         (lispobj*)((void*)th->control_stack_start+THREAD_CONTROL_STACK_SIZE);
430     th->control_stack_end = th->binding_stack_start;
431     th->alien_stack_start=
432         (lispobj*)((void*)th->binding_stack_start+BINDING_STACK_SIZE);
433     th->binding_stack_pointer=th->binding_stack_start;
434     th->this=th;
435     th->os_thread=0;
436     th->state=STATE_RUNNING;
437 #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD
438     th->alien_stack_pointer=((void *)th->alien_stack_start
439                              + ALIEN_STACK_SIZE-N_WORD_BYTES);
440 #else
441     th->alien_stack_pointer=((void *)th->alien_stack_start);
442 #endif
443 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
444     th->pseudo_atomic_bits=0;
445 #endif
446 #ifdef LISP_FEATURE_GENCGC
447     gc_set_region_empty(&th->alloc_region);
448 #endif
449
450 #ifndef LISP_FEATURE_SB_THREAD
451     /* the tls-points-into-struct-thread trick is only good for threaded
452      * sbcl, because unithread sbcl doesn't have tls.  So, we copy the
453      * appropriate values from struct thread here, and make sure that
454      * we use the appropriate SymbolValue macros to access any of the
455      * variable quantities from the C runtime.  It's not quite OAOOM,
456      * it just feels like it */
457     SetSymbolValue(BINDING_STACK_START,(lispobj)th->binding_stack_start,th);
458     SetSymbolValue(CONTROL_STACK_START,(lispobj)th->control_stack_start,th);
459     SetSymbolValue(CONTROL_STACK_END,(lispobj)th->control_stack_end,th);
460 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
461     SetSymbolValue(BINDING_STACK_POINTER,(lispobj)th->binding_stack_pointer,th);
462     SetSymbolValue(ALIEN_STACK,(lispobj)th->alien_stack_pointer,th);
463     SetSymbolValue(PSEUDO_ATOMIC_BITS,(lispobj)th->pseudo_atomic_bits,th);
464 #else
465     current_binding_stack_pointer=th->binding_stack_pointer;
466     current_control_stack_pointer=th->control_stack_start;
467 #endif
468 #endif
469     bind_variable(CURRENT_CATCH_BLOCK,make_fixnum(0),th);
470     bind_variable(CURRENT_UNWIND_PROTECT_BLOCK,make_fixnum(0),th);
471     bind_variable(FREE_INTERRUPT_CONTEXT_INDEX,make_fixnum(0),th);
472     bind_variable(INTERRUPT_PENDING, NIL,th);
473     bind_variable(INTERRUPTS_ENABLED,T,th);
474     bind_variable(ALLOW_WITH_INTERRUPTS,T,th);
475     bind_variable(GC_PENDING,NIL,th);
476 #ifdef LISP_FEATURE_SB_THREAD
477     bind_variable(STOP_FOR_GC_PENDING,NIL,th);
478 #endif
479
480     th->interrupt_data = (struct interrupt_data *)
481         os_validate(0,(sizeof (struct interrupt_data)));
482     if (!th->interrupt_data) {
483         free_thread_struct(th);
484         return 0;
485     }
486     th->interrupt_data->pending_handler = 0;
487     th->no_tls_value_marker=initial_function;
488
489     th->stepping = NIL;
490     return th;
491 }
492
493 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
494 mach_port_t setup_mach_exception_handling_thread();
495 kern_return_t mach_thread_init(mach_port_t thread_exception_port);
496
497 #endif
498
499 void create_initial_thread(lispobj initial_function) {
500     struct thread *th=create_thread_struct(initial_function);
501     if(th) {
502 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
503         kern_return_t ret;
504
505         setup_mach_exception_handling_thread();
506 #endif
507         initial_thread_trampoline(th); /* no return */
508     } else lose("can't create initial thread\n");
509 }
510
511 #ifdef LISP_FEATURE_SB_THREAD
512
513 #ifndef __USE_XOPEN2K
514 extern int pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr,
515                                   size_t __stacksize);
516 #endif
517
518 boolean create_os_thread(struct thread *th,os_thread_t *kid_tid)
519 {
520     /* The new thread inherits the restrictive signal mask set here,
521      * and enables signals again when it is set up properly. */
522     pthread_attr_t attr;
523     sigset_t newset,oldset;
524     boolean r=1;
525     int retcode, initcode, sizecode, addrcode;
526
527     FSHOW_SIGNAL((stderr,"/create_os_thread: creating new thread\n"));
528
529 #ifdef LOCK_CREATE_THREAD
530     retcode = pthread_mutex_lock(&create_thread_lock);
531     gc_assert(retcode == 0);
532     FSHOW_SIGNAL((stderr,"/create_os_thread: got lock\n"));
533 #endif
534     sigemptyset(&newset);
535     /* Blocking deferrable signals is enough, no need to block
536      * SIG_STOP_FOR_GC because the child process is not linked onto
537      * all_threads until it's ready. */
538     sigaddset_deferrable(&newset);
539     thread_sigmask(SIG_BLOCK, &newset, &oldset);
540
541 #if defined(LISP_FEATURE_DARWIN)
542 #define CONTROL_STACK_ADJUST 8192 /* darwin wants page-aligned stacks */
543 #else
544 #define CONTROL_STACK_ADJUST 16
545 #endif
546
547     if((initcode = pthread_attr_init(&attr)) ||
548        /* FIXME: why do we even have this in the first place? */
549        (pthread_attr_setstack(&attr,th->control_stack_start,
550                               THREAD_CONTROL_STACK_SIZE-CONTROL_STACK_ADJUST)) ||
551 #undef CONTROL_STACK_ADJUST
552        (retcode = pthread_create
553         (kid_tid,&attr,(void *(*)(void *))new_thread_trampoline,th))) {
554         FSHOW_SIGNAL((stderr, "init, size, addr = %d, %d, %d\n", initcode, sizecode, addrcode));
555         FSHOW_SIGNAL((stderr, printf("pthread_create returned %d, errno %d\n", retcode, errno)));
556         FSHOW_SIGNAL((stderr, "wanted stack size %d, min stack size %d\n",
557                       THREAD_CONTROL_STACK_SIZE-16, PTHREAD_STACK_MIN));
558         if(retcode < 0) {
559             perror("create_os_thread");
560         }
561         r=0;
562     }
563
564 #ifdef QUEUE_FREEABLE_THREAD_STACKS
565     free_freeable_stacks();
566 #endif
567     thread_sigmask(SIG_SETMASK,&oldset,0);
568 #ifdef LOCK_CREATE_THREAD
569     retcode = pthread_mutex_unlock(&create_thread_lock);
570     gc_assert(retcode == 0);
571     FSHOW_SIGNAL((stderr,"/create_os_thread: released lock\n"));
572 #endif
573     return r;
574 }
575
576 os_thread_t create_thread(lispobj initial_function) {
577     struct thread *th;
578     os_thread_t kid_tid;
579
580     /* Assuming that a fresh thread struct has no lisp objects in it,
581      * linking it to all_threads can be left to the thread itself
582      * without fear of gc lossage. initial_function violates this
583      * assumption and must stay pinned until the child starts up. */
584     th = create_thread_struct(initial_function);
585     if(th==0) return 0;
586
587     if (create_os_thread(th,&kid_tid)) {
588         return kid_tid;
589     } else {
590         free_thread_struct(th);
591         return 0;
592     }
593 }
594
595 /* Send the signo to os_thread, retry if the rt signal queue is
596  * full. */
597 int
598 kill_thread_safely(os_thread_t os_thread, int signo)
599 {
600     int r;
601     /* The man page does not mention EAGAIN as a valid return value
602      * for either pthread_kill or kill. But that's theory, this is
603      * practice. By waiting here we assume that the delivery of this
604      * signal is not necessary for the delivery of the signals in the
605      * queue. In other words, we _assume_ there are no deadlocks. */
606     while ((r=pthread_kill(os_thread,signo))==EAGAIN) {
607         /* wait a bit then try again in the hope of the rt signal
608          * queue not being full */
609         FSHOW_SIGNAL((stderr,"/rt signal queue full\n"));
610         /* FIXME: some kind of backoff (random, exponential) would be
611          * nice. */
612         sleep(1);
613     }
614     return r;
615 }
616
617 int signal_interrupt_thread(os_thread_t os_thread)
618 {
619     int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD);
620     if (status == 0) {
621         return 0;
622     } else if (status == ESRCH) {
623         return -1;
624     } else {
625         lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s\n",
626              os_thread, status, strerror(status));
627     }
628 }
629
630 /* stopping the world is a two-stage process.  From this thread we signal
631  * all the others with SIG_STOP_FOR_GC.  The handler for this signal does
632  * the usual pseudo-atomic checks (we don't want to stop a thread while
633  * it's in the middle of allocation) then waits for another SIG_STOP_FOR_GC.
634  */
635
636 /* To avoid deadlocks when gc stops the world all clients of each
637  * mutex must enable or disable SIG_STOP_FOR_GC for the duration of
638  * holding the lock, but they must agree on which. */
639 void gc_stop_the_world()
640 {
641     struct thread *p,*th=arch_os_get_current_thread();
642     int status, lock_ret;
643 #ifdef LOCK_CREATE_THREAD
644     /* KLUDGE: Stopping the thread during pthread_create() causes deadlock
645      * on FreeBSD. */
646     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on create_thread_lock, thread=%lu\n",
647                   th->os_thread));
648     lock_ret = pthread_mutex_lock(&create_thread_lock);
649     gc_assert(lock_ret == 0);
650     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got create_thread_lock, thread=%lu\n",
651                   th->os_thread));
652 #endif
653     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on lock, thread=%lu\n",
654                   th->os_thread));
655     /* keep threads from starting while the world is stopped. */
656     lock_ret = pthread_mutex_lock(&all_threads_lock);      \
657     gc_assert(lock_ret == 0);
658
659     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got lock, thread=%lu\n",
660                   th->os_thread));
661     /* stop all other threads by sending them SIG_STOP_FOR_GC */
662     for(p=all_threads; p; p=p->next) {
663         gc_assert(p->os_thread != 0);
664         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: p->state: %x\n", p->state));
665         if((p!=th) && ((p->state==STATE_RUNNING))) {
666             FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending %x, os_thread %x\n",
667                           p, p->os_thread));
668             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
669             if (status==ESRCH) {
670                 /* This thread has exited. */
671                 gc_assert(p->state==STATE_DEAD);
672             } else if (status) {
673                 lose("cannot send suspend thread=%lu: %d, %s\n",
674                      p->os_thread,status,strerror(status));
675             }
676         }
677     }
678     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:signals sent\n"));
679     /* wait for the running threads to stop or finish */
680     for(p=all_threads;p;) {
681         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: th: %p, p: %p\n", th, p));
682         if((p!=th) && (p->state==STATE_RUNNING)) {
683             sched_yield();
684         } else {
685             p=p->next;
686         }
687     }
688     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:end\n"));
689 }
690
691 void gc_start_the_world()
692 {
693     struct thread *p,*th=arch_os_get_current_thread();
694     int status, lock_ret;
695     /* if a resumed thread creates a new thread before we're done with
696      * this loop, the new thread will get consed on the front of
697      * all_threads, but it won't have been stopped so won't need
698      * restarting */
699     FSHOW_SIGNAL((stderr,"/gc_start_the_world:begin\n"));
700     for(p=all_threads;p;p=p->next) {
701         gc_assert(p->os_thread!=0);
702         if((p!=th) && (p->state!=STATE_DEAD)) {
703             if(p->state!=STATE_SUSPENDED) {
704                 lose("gc_start_the_world: wrong thread state is %d\n",
705                      fixnum_value(p->state));
706             }
707             FSHOW_SIGNAL((stderr, "/gc_start_the_world: resuming %lu\n",
708                           p->os_thread));
709             p->state=STATE_RUNNING;
710
711 #if defined(SIG_RESUME_FROM_GC)
712             status=kill_thread_safely(p->os_thread,SIG_RESUME_FROM_GC);
713 #else
714             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
715 #endif
716             if (status) {
717                 lose("cannot resume thread=%lu: %d, %s\n",
718                      p->os_thread,status,strerror(status));
719             }
720         }
721     }
722     /* If we waited here until all threads leave STATE_SUSPENDED, then
723      * SIG_STOP_FOR_GC wouldn't need to be a rt signal. That has some
724      * performance implications, but does away with the 'rt signal
725      * queue full' problem. */
726
727     lock_ret = pthread_mutex_unlock(&all_threads_lock);
728     gc_assert(lock_ret == 0);
729 #ifdef LOCK_CREATE_THREAD
730     lock_ret = pthread_mutex_unlock(&create_thread_lock);
731     gc_assert(lock_ret == 0);
732 #endif
733
734     FSHOW_SIGNAL((stderr,"/gc_start_the_world:end\n"));
735 }
736 #endif