650d5151059a96a753c0aa1db7695df3fc486666
[sbcl.git] / src / runtime / thread.c
1 /*
2  * This software is part of the SBCL system. See the README file for
3  * more information.
4  *
5  * This software is derived from the CMU CL system, which was
6  * written at Carnegie Mellon University and released into the
7  * public domain. The software is in the public domain and is
8  * provided with absolutely no warranty. See the COPYING and CREDITS
9  * files for more information.
10  */
11
12 #include "sbcl.h"
13
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #ifndef LISP_FEATURE_WIN32
18 #include <sched.h>
19 #endif
20 #include <signal.h>
21 #include <stddef.h>
22 #include <errno.h>
23 #include <sys/types.h>
24 #ifndef LISP_FEATURE_WIN32
25 #include <sys/wait.h>
26 #endif
27
28 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
29 #include <mach/mach.h>
30 #include <mach/mach_error.h>
31 #include <mach/mach_types.h>
32 #endif
33
34 #include "runtime.h"
35 #include "validate.h"           /* for CONTROL_STACK_SIZE etc */
36 #include "alloc.h"
37 #include "thread.h"
38 #include "arch.h"
39 #include "target-arch-os.h"
40 #include "os.h"
41 #include "globals.h"
42 #include "dynbind.h"
43 #include "genesis/cons.h"
44 #include "genesis/fdefn.h"
45 #include "interr.h"             /* for lose() */
46 #include "gc-internal.h"
47
48 #ifdef LISP_FEATURE_WIN32
49 /*
50  * Win32 doesn't have SIGSTKSZ, and we're not switching stacks anyway,
51  * so define it arbitrarily
52  */
53 #define SIGSTKSZ 1024
54 #endif
55
56 #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_SB_THREAD)
57 #define QUEUE_FREEABLE_THREAD_STACKS
58 #define LOCK_CREATE_THREAD
59 #endif
60
61 #ifdef LISP_FEATURE_FREEBSD
62 #define CREATE_CLEANUP_THREAD
63 #define LOCK_CREATE_THREAD
64 #endif
65
66 #define ALIEN_STACK_SIZE (1*1024*1024) /* 1Mb size chosen at random */
67
68 struct freeable_stack {
69 #ifdef QUEUE_FREEABLE_THREAD_STACKS
70     struct freeable_stack *next;
71 #endif
72     os_thread_t os_thread;
73     os_vm_address_t os_address;
74 };
75
76
77 #ifdef QUEUE_FREEABLE_THREAD_STACKS
78 static struct freeable_stack * volatile freeable_stack_queue = 0;
79 static int freeable_stack_count = 0;
80 pthread_mutex_t freeable_stack_lock = PTHREAD_MUTEX_INITIALIZER;
81 #else
82 static struct freeable_stack * volatile freeable_stack = 0;
83 #endif
84
85 int dynamic_values_bytes=4096*sizeof(lispobj);  /* same for all threads */
86 struct thread * volatile all_threads;
87 extern struct interrupt_data * global_interrupt_data;
88
89 #ifdef LISP_FEATURE_SB_THREAD
90 pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER;
91 #ifdef LOCK_CREATE_THREAD
92 static pthread_mutex_t create_thread_lock = PTHREAD_MUTEX_INITIALIZER;
93 #endif
94 #endif
95
96 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
97 extern lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs);
98 #endif
99
100 static void
101 link_thread(struct thread *th)
102 {
103     if (all_threads) all_threads->prev=th;
104     th->next=all_threads;
105     th->prev=0;
106     all_threads=th;
107 }
108
109 #ifdef LISP_FEATURE_SB_THREAD
110 static void
111 unlink_thread(struct thread *th)
112 {
113     if (th->prev)
114         th->prev->next = th->next;
115     else
116         all_threads = th->next;
117     if (th->next)
118         th->next->prev = th->prev;
119 }
120 #endif
121
122 static int
123 initial_thread_trampoline(struct thread *th)
124 {
125     lispobj function;
126 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
127     lispobj *args = NULL;
128 #endif
129     function = th->no_tls_value_marker;
130     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
131     if(arch_os_thread_init(th)==0) return 1;
132     link_thread(th);
133     th->os_thread=thread_self();
134 #ifndef LISP_FEATURE_WIN32
135     protect_control_stack_guard_page(1);
136 #endif
137
138 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
139     return call_into_lisp_first_time(function,args,0);
140 #else
141     return funcall0(function);
142 #endif
143 }
144
145 #define THREAD_STRUCT_SIZE (THREAD_CONTROL_STACK_SIZE + BINDING_STACK_SIZE + \
146                             ALIEN_STACK_SIZE + dynamic_values_bytes + \
147                             32 * SIGSTKSZ + \
148                             BACKEND_PAGE_SIZE)
149
150 #ifdef LISP_FEATURE_SB_THREAD
151
152 #ifdef QUEUE_FREEABLE_THREAD_STACKS
153
154 static void
155 queue_freeable_thread_stack(struct thread *thread_to_be_cleaned_up)
156 {
157      struct freeable_stack *new_freeable_stack = 0;
158      if (thread_to_be_cleaned_up) {
159         /* FIXME: os_validate is mmap -- for small things like these
160          * malloc would probably perform better. */
161         new_freeable_stack = (struct freeable_stack *)
162             os_validate(0, sizeof(struct freeable_stack));
163         new_freeable_stack->next = NULL;
164         new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
165         new_freeable_stack->os_address = thread_to_be_cleaned_up->os_address;
166         pthread_mutex_lock(&freeable_stack_lock);
167         if (freeable_stack_queue) {
168             struct freeable_stack *next;
169             next = freeable_stack_queue;
170             while (next->next) {
171                 next = next->next;
172             }
173             next->next = new_freeable_stack;
174         } else {
175             freeable_stack_queue = new_freeable_stack;
176         }
177         freeable_stack_count++;
178         pthread_mutex_unlock(&freeable_stack_lock);
179     }
180 }
181
182 #define FREEABLE_STACK_QUEUE_SIZE 4
183
184 static void
185 free_freeable_stacks() {
186     if (freeable_stack_queue && (freeable_stack_count > FREEABLE_STACK_QUEUE_SIZE)) {
187         struct freeable_stack* old;
188         pthread_mutex_lock(&freeable_stack_lock);
189         old = freeable_stack_queue;
190         freeable_stack_queue = old->next;
191         freeable_stack_count--;
192         gc_assert(pthread_join(old->os_thread, NULL) == 0);
193         FSHOW((stderr, "freeing thread %x stack\n", old->os_thread));
194         os_invalidate(old->os_address, THREAD_STRUCT_SIZE);
195         os_invalidate((os_vm_address_t)old, sizeof(struct freeable_stack));
196         pthread_mutex_unlock(&freeable_stack_lock);
197     }
198 }
199
200 #elif defined(CREATE_CLEANUP_THREAD)
201 static void *
202 cleanup_thread(void *arg)
203 {
204     struct freeable_stack *freeable = arg;
205     pthread_t self = pthread_self();
206
207     FSHOW((stderr, "/cleaner thread(%p): joining %p\n",
208            self, freeable->os_thread));
209     gc_assert(pthread_join(freeable->os_thread, NULL) == 0);
210     FSHOW((stderr, "/cleaner thread(%p): free stack %p\n",
211            self, freeable->stack));
212     os_invalidate(freeable->os_address, THREAD_STRUCT_SIZE);
213     free(freeable);
214
215     pthread_detach(self);
216
217     return NULL;
218 }
219
220 static void
221 create_cleanup_thread(struct thread *thread_to_be_cleaned_up)
222 {
223     pthread_t thread;
224     int result;
225
226     if (thread_to_be_cleaned_up) {
227         struct freeable_stack *freeable =
228             malloc(sizeof(struct freeable_stack));
229         gc_assert(freeable != NULL);
230         freeable->os_thread = thread_to_be_cleaned_up->os_thread;
231         freeable->os_address =
232             (os_vm_address_t) thread_to_be_cleaned_up->os_address;
233         result = pthread_create(&thread, NULL, cleanup_thread, freeable);
234         gc_assert(result == 0);
235         sched_yield();
236     }
237 }
238
239 #else
240 static void
241 free_thread_stack_later(struct thread *thread_to_be_cleaned_up)
242 {
243     struct freeable_stack *new_freeable_stack = 0;
244     if (thread_to_be_cleaned_up) {
245         new_freeable_stack = (struct freeable_stack *)
246             os_validate(0, sizeof(struct freeable_stack));
247         new_freeable_stack->os_thread = thread_to_be_cleaned_up->os_thread;
248         new_freeable_stack->os_address = (os_vm_address_t)
249             thread_to_be_cleaned_up->os_address;
250     }
251     new_freeable_stack = (struct freeable_stack *)
252         swap_lispobjs((lispobj *)(void *)&freeable_stack,
253                       (lispobj)new_freeable_stack);
254     if (new_freeable_stack) {
255         FSHOW((stderr,"/reaping %p\n", (void*) new_freeable_stack->os_thread));
256         /* Under NPTL pthread_join really waits until the thread
257          * exists and the stack can be safely freed. This is sadly not
258          * mandated by the pthread spec. */
259         gc_assert(pthread_join(new_freeable_stack->os_thread, NULL) == 0);
260         os_invalidate(new_freeable_stack->os_address, THREAD_STRUCT_SIZE);
261         os_invalidate((os_vm_address_t) new_freeable_stack,
262                       sizeof(struct freeable_stack));
263     }
264 }
265 #endif
266
267 /* this is the first thing that runs in the child (which is why the
268  * silly calling convention).  Basically it calls the user's requested
269  * lisp function after doing arch_os_thread_init and whatever other
270  * bookkeeping needs to be done
271  */
272 int
273 new_thread_trampoline(struct thread *th)
274 {
275     lispobj function;
276     int result, lock_ret;
277
278     FSHOW((stderr,"/creating thread %lu\n", thread_self()));
279     function = th->no_tls_value_marker;
280     th->no_tls_value_marker = NO_TLS_VALUE_MARKER_WIDETAG;
281     if(arch_os_thread_init(th)==0) {
282         /* FIXME: handle error */
283         lose("arch_os_thread_init failed\n");
284     }
285
286     th->os_thread=thread_self();
287     protect_control_stack_guard_page(1);
288     /* Since GC can only know about this thread from the all_threads
289      * list and we're just adding this thread to it there is no danger
290      * of deadlocking even with SIG_STOP_FOR_GC blocked (which it is
291      * not). */
292     lock_ret = pthread_mutex_lock(&all_threads_lock);
293     gc_assert(lock_ret == 0);
294     link_thread(th);
295     lock_ret = pthread_mutex_unlock(&all_threads_lock);
296     gc_assert(lock_ret == 0);
297
298     result = funcall0(function);
299
300     /* Block GC */
301     block_blockable_signals();
302     th->state=STATE_DEAD;
303
304     /* SIG_STOP_FOR_GC is blocked and GC might be waiting for this
305      * thread, but since we are already dead it won't wait long. */
306     lock_ret = pthread_mutex_lock(&all_threads_lock);
307     gc_assert(lock_ret == 0);
308
309     gc_alloc_update_page_tables(0, &th->alloc_region);
310     unlink_thread(th);
311     pthread_mutex_unlock(&all_threads_lock);
312     gc_assert(lock_ret == 0);
313
314     if(th->tls_cookie>=0) arch_os_thread_cleanup(th);
315     os_invalidate((os_vm_address_t)th->interrupt_data,
316                   (sizeof (struct interrupt_data)));
317
318 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
319     FSHOW((stderr, "Deallocating mach port %x\n", THREAD_STRUCT_TO_EXCEPTION_PORT(th)));
320     mach_port_move_member(mach_task_self(),
321                           THREAD_STRUCT_TO_EXCEPTION_PORT(th),
322                           MACH_PORT_NULL);
323     mach_port_deallocate(mach_task_self(),
324                          THREAD_STRUCT_TO_EXCEPTION_PORT(th));
325     mach_port_destroy(mach_task_self(),
326                       THREAD_STRUCT_TO_EXCEPTION_PORT(th));
327 #endif
328
329 #ifdef QUEUE_FREEABLE_THREAD_STACKS
330     queue_freeable_thread_stack(th);
331 #elif defined(CREATE_CLEANUP_THREAD)
332     create_cleanup_thread(th);
333 #else
334     free_thread_stack_later(th);
335 #endif
336
337     FSHOW((stderr,"/exiting thread %p\n", thread_self()));
338     return result;
339 }
340
341 #endif /* LISP_FEATURE_SB_THREAD */
342
343 static void
344 free_thread_struct(struct thread *th)
345 {
346     if (th->interrupt_data)
347         os_invalidate((os_vm_address_t) th->interrupt_data,
348                       (sizeof (struct interrupt_data)));
349     os_invalidate((os_vm_address_t) th->os_address,
350                   THREAD_STRUCT_SIZE);
351 }
352
353 /* this is called from any other thread to create the new one, and
354  * initialize all parts of it that can be initialized from another
355  * thread
356  */
357
358 static struct thread *
359 create_thread_struct(lispobj initial_function) {
360     union per_thread_data *per_thread;
361     struct thread *th=0;        /*  subdue gcc */
362     void *spaces=0;
363     void *aligned_spaces=0;
364 #ifdef LISP_FEATURE_SB_THREAD
365     int i;
366 #endif
367
368 #ifdef CREATE_CLEANUP_THREAD
369     /* Give a chance for cleanup threads to run. */
370     sched_yield();
371 #endif
372     /* May as well allocate all the spaces at once: it saves us from
373      * having to decide what to do if only some of the allocations
374      * succeed.  SPACES must be page-aligned, since the GC expects the
375      * control stack to start at a page boundary.  We can't rely on the
376      * alignment passed from os_validate, since that might assume the
377      * current (e.g. 4k) pagesize, while we calculate with the biggest
378      * (e.g. 64k) pagesize allowed by the ABI.  */
379     spaces=os_validate(0, THREAD_STRUCT_SIZE);
380     if(!spaces)
381         return NULL;
382     /* Aligning up is safe as THREAD_STRUCT_SIZE has BACKEND_PAGE_SIZE
383      * padding. */
384     aligned_spaces = (void *)((((unsigned long)(char *)spaces)
385                                + BACKEND_PAGE_SIZE - 1)
386                               & ~(unsigned long)(BACKEND_PAGE_SIZE - 1));
387     per_thread=(union per_thread_data *)
388         (aligned_spaces+
389          THREAD_CONTROL_STACK_SIZE+
390          BINDING_STACK_SIZE+
391          ALIEN_STACK_SIZE);
392
393 #ifdef LISP_FEATURE_SB_THREAD
394     for(i = 0; i < (dynamic_values_bytes / sizeof(lispobj)); i++)
395         per_thread->dynamic_values[i] = NO_TLS_VALUE_MARKER_WIDETAG;
396     if (all_threads == 0) {
397         if(SymbolValue(FREE_TLS_INDEX,0)==UNBOUND_MARKER_WIDETAG) {
398             SetSymbolValue
399                 (FREE_TLS_INDEX,
400                  /* FIXME: should be MAX_INTERRUPTS -1 ? */
401                  make_fixnum(MAX_INTERRUPTS+
402                              sizeof(struct thread)/sizeof(lispobj)),
403                  0);
404             SetSymbolValue(TLS_INDEX_LOCK,make_fixnum(0),0);
405         }
406 #define STATIC_TLS_INIT(sym,field) \
407   ((struct symbol *)(sym-OTHER_POINTER_LOWTAG))->tls_index= \
408   make_fixnum(THREAD_SLOT_OFFSET_WORDS(field))
409
410         STATIC_TLS_INIT(BINDING_STACK_START,binding_stack_start);
411         STATIC_TLS_INIT(BINDING_STACK_POINTER,binding_stack_pointer);
412         STATIC_TLS_INIT(CONTROL_STACK_START,control_stack_start);
413         STATIC_TLS_INIT(CONTROL_STACK_END,control_stack_end);
414         STATIC_TLS_INIT(ALIEN_STACK,alien_stack_pointer);
415 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
416         STATIC_TLS_INIT(PSEUDO_ATOMIC_BITS,pseudo_atomic_bits);
417 #endif
418 #undef STATIC_TLS_INIT
419     }
420 #endif
421
422     th=&per_thread->thread;
423     th->os_address = spaces;
424     th->control_stack_start = aligned_spaces;
425     th->binding_stack_start=
426         (lispobj*)((void*)th->control_stack_start+THREAD_CONTROL_STACK_SIZE);
427     th->control_stack_end = th->binding_stack_start;
428     th->alien_stack_start=
429         (lispobj*)((void*)th->binding_stack_start+BINDING_STACK_SIZE);
430     th->binding_stack_pointer=th->binding_stack_start;
431     th->this=th;
432     th->os_thread=0;
433     th->state=STATE_RUNNING;
434 #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD
435     th->alien_stack_pointer=((void *)th->alien_stack_start
436                              + ALIEN_STACK_SIZE-N_WORD_BYTES);
437 #else
438     th->alien_stack_pointer=((void *)th->alien_stack_start);
439 #endif
440 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
441     th->pseudo_atomic_bits=0;
442 #endif
443 #ifdef LISP_FEATURE_GENCGC
444     gc_set_region_empty(&th->alloc_region);
445 #endif
446
447 #ifndef LISP_FEATURE_SB_THREAD
448     /* the tls-points-into-struct-thread trick is only good for threaded
449      * sbcl, because unithread sbcl doesn't have tls.  So, we copy the
450      * appropriate values from struct thread here, and make sure that
451      * we use the appropriate SymbolValue macros to access any of the
452      * variable quantities from the C runtime.  It's not quite OAOOM,
453      * it just feels like it */
454     SetSymbolValue(BINDING_STACK_START,(lispobj)th->binding_stack_start,th);
455     SetSymbolValue(CONTROL_STACK_START,(lispobj)th->control_stack_start,th);
456     SetSymbolValue(CONTROL_STACK_END,(lispobj)th->control_stack_end,th);
457 #if defined(LISP_FEATURE_X86) || defined (LISP_FEATURE_X86_64)
458     SetSymbolValue(BINDING_STACK_POINTER,(lispobj)th->binding_stack_pointer,th);
459     SetSymbolValue(ALIEN_STACK,(lispobj)th->alien_stack_pointer,th);
460     SetSymbolValue(PSEUDO_ATOMIC_BITS,(lispobj)th->pseudo_atomic_bits,th);
461 #else
462     current_binding_stack_pointer=th->binding_stack_pointer;
463     current_control_stack_pointer=th->control_stack_start;
464 #endif
465 #endif
466     bind_variable(CURRENT_CATCH_BLOCK,make_fixnum(0),th);
467     bind_variable(CURRENT_UNWIND_PROTECT_BLOCK,make_fixnum(0),th);
468     bind_variable(FREE_INTERRUPT_CONTEXT_INDEX,make_fixnum(0),th);
469     bind_variable(INTERRUPT_PENDING, NIL,th);
470     bind_variable(INTERRUPTS_ENABLED,T,th);
471     bind_variable(ALLOW_WITH_INTERRUPTS,T,th);
472     bind_variable(GC_PENDING,NIL,th);
473 #ifdef LISP_FEATURE_SB_THREAD
474     bind_variable(STOP_FOR_GC_PENDING,NIL,th);
475 #endif
476
477     th->interrupt_data = (struct interrupt_data *)
478         os_validate(0,(sizeof (struct interrupt_data)));
479     if (!th->interrupt_data) {
480         free_thread_struct(th);
481         return 0;
482     }
483     th->interrupt_data->pending_handler = 0;
484     th->no_tls_value_marker=initial_function;
485
486     th->stepping = NIL;
487     return th;
488 }
489
490 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
491 mach_port_t setup_mach_exception_handling_thread();
492 kern_return_t mach_thread_init(mach_port_t thread_exception_port);
493
494 #endif
495
496 void create_initial_thread(lispobj initial_function) {
497     struct thread *th=create_thread_struct(initial_function);
498     if(th) {
499 #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER
500         kern_return_t ret;
501
502         setup_mach_exception_handling_thread();
503 #endif
504         initial_thread_trampoline(th); /* no return */
505     } else lose("can't create initial thread\n");
506 }
507
508 #ifdef LISP_FEATURE_SB_THREAD
509
510 #ifndef __USE_XOPEN2K
511 extern int pthread_attr_setstack (pthread_attr_t *__attr, void *__stackaddr,
512                                   size_t __stacksize);
513 #endif
514
515 boolean create_os_thread(struct thread *th,os_thread_t *kid_tid)
516 {
517     /* The new thread inherits the restrictive signal mask set here,
518      * and enables signals again when it is set up properly. */
519     pthread_attr_t attr;
520     sigset_t newset,oldset;
521     boolean r=1;
522     int retcode, initcode, sizecode, addrcode;
523
524     FSHOW_SIGNAL((stderr,"/create_os_thread: creating new thread\n"));
525
526 #ifdef LOCK_CREATE_THREAD
527     retcode = pthread_mutex_lock(&create_thread_lock);
528     gc_assert(retcode == 0);
529     FSHOW_SIGNAL((stderr,"/create_os_thread: got lock\n"));
530 #endif
531     sigemptyset(&newset);
532     /* Blocking deferrable signals is enough, no need to block
533      * SIG_STOP_FOR_GC because the child process is not linked onto
534      * all_threads until it's ready. */
535     sigaddset_deferrable(&newset);
536     thread_sigmask(SIG_BLOCK, &newset, &oldset);
537
538 #if defined(LISP_FEATURE_DARWIN)
539 #define CONTROL_STACK_ADJUST 8192 /* darwin wants page-aligned stacks */
540 #else
541 #define CONTROL_STACK_ADJUST 16
542 #endif
543
544     if((initcode = pthread_attr_init(&attr)) ||
545        /* FIXME: why do we even have this in the first place? */
546        (pthread_attr_setstack(&attr,th->control_stack_start,
547                               THREAD_CONTROL_STACK_SIZE-CONTROL_STACK_ADJUST)) ||
548 #undef CONTROL_STACK_ADJUST
549        (retcode = pthread_create
550         (kid_tid,&attr,(void *(*)(void *))new_thread_trampoline,th))) {
551         FSHOW_SIGNAL((stderr, "init, size, addr = %d, %d, %d\n", initcode, sizecode, addrcode));
552         FSHOW_SIGNAL((stderr, printf("pthread_create returned %d, errno %d\n", retcode, errno)));
553         FSHOW_SIGNAL((stderr, "wanted stack size %d, min stack size %d\n",
554                       THREAD_CONTROL_STACK_SIZE-16, PTHREAD_STACK_MIN));
555         if(retcode < 0) {
556             perror("create_os_thread");
557         }
558         r=0;
559     }
560
561 #ifdef QUEUE_FREEABLE_THREAD_STACKS
562     free_freeable_stacks();
563 #endif
564     thread_sigmask(SIG_SETMASK,&oldset,0);
565 #ifdef LOCK_CREATE_THREAD
566     retcode = pthread_mutex_unlock(&create_thread_lock);
567     gc_assert(retcode == 0);
568     FSHOW_SIGNAL((stderr,"/create_os_thread: released lock\n"));
569 #endif
570     return r;
571 }
572
573 os_thread_t create_thread(lispobj initial_function) {
574     struct thread *th;
575     os_thread_t kid_tid;
576
577     /* Assuming that a fresh thread struct has no lisp objects in it,
578      * linking it to all_threads can be left to the thread itself
579      * without fear of gc lossage. initial_function violates this
580      * assumption and must stay pinned until the child starts up. */
581     th = create_thread_struct(initial_function);
582     if(th==0) return 0;
583
584     if (create_os_thread(th,&kid_tid)) {
585         return kid_tid;
586     } else {
587         free_thread_struct(th);
588         return 0;
589     }
590 }
591
592 /* Send the signo to os_thread, retry if the rt signal queue is
593  * full. */
594 int
595 kill_thread_safely(os_thread_t os_thread, int signo)
596 {
597     int r;
598     /* The man page does not mention EAGAIN as a valid return value
599      * for either pthread_kill or kill. But that's theory, this is
600      * practice. By waiting here we assume that the delivery of this
601      * signal is not necessary for the delivery of the signals in the
602      * queue. In other words, we _assume_ there are no deadlocks. */
603     while ((r=pthread_kill(os_thread,signo))==EAGAIN) {
604         /* wait a bit then try again in the hope of the rt signal
605          * queue not being full */
606         FSHOW_SIGNAL((stderr,"/rt signal queue full\n"));
607         /* FIXME: some kind of backoff (random, exponential) would be
608          * nice. */
609         sleep(1);
610     }
611     return r;
612 }
613
614 int signal_interrupt_thread(os_thread_t os_thread)
615 {
616     int status = kill_thread_safely(os_thread, SIG_INTERRUPT_THREAD);
617     if (status == 0) {
618         return 0;
619     } else if (status == ESRCH) {
620         return -1;
621     } else {
622         lose("cannot send SIG_INTERRUPT_THREAD to thread=%lu: %d, %s\n",
623              os_thread, status, strerror(status));
624     }
625 }
626
627 /* stopping the world is a two-stage process.  From this thread we signal
628  * all the others with SIG_STOP_FOR_GC.  The handler for this signal does
629  * the usual pseudo-atomic checks (we don't want to stop a thread while
630  * it's in the middle of allocation) then waits for another SIG_STOP_FOR_GC.
631  */
632
633 /* To avoid deadlocks when gc stops the world all clients of each
634  * mutex must enable or disable SIG_STOP_FOR_GC for the duration of
635  * holding the lock, but they must agree on which. */
636 void gc_stop_the_world()
637 {
638     struct thread *p,*th=arch_os_get_current_thread();
639     int status, lock_ret;
640 #ifdef LOCK_CREATE_THREAD
641     /* KLUDGE: Stopping the thread during pthread_create() causes deadlock
642      * on FreeBSD. */
643     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on create_thread_lock, thread=%lu\n",
644                   th->os_thread));
645     lock_ret = pthread_mutex_lock(&create_thread_lock);
646     gc_assert(lock_ret == 0);
647     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got create_thread_lock, thread=%lu\n",
648                   th->os_thread));
649 #endif
650     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:waiting on lock, thread=%lu\n",
651                   th->os_thread));
652     /* keep threads from starting while the world is stopped. */
653     lock_ret = pthread_mutex_lock(&all_threads_lock);      \
654     gc_assert(lock_ret == 0);
655
656     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:got lock, thread=%lu\n",
657                   th->os_thread));
658     /* stop all other threads by sending them SIG_STOP_FOR_GC */
659     for(p=all_threads; p; p=p->next) {
660         gc_assert(p->os_thread != 0);
661         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: p->state: %x\n", p->state));
662         if((p!=th) && ((p->state==STATE_RUNNING))) {
663             FSHOW_SIGNAL((stderr,"/gc_stop_the_world: suspending %x, os_thread %x\n",
664                           p, p->os_thread));
665             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
666             if (status==ESRCH) {
667                 /* This thread has exited. */
668                 gc_assert(p->state==STATE_DEAD);
669             } else if (status) {
670                 lose("cannot send suspend thread=%lu: %d, %s\n",
671                      p->os_thread,status,strerror(status));
672             }
673         }
674     }
675     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:signals sent\n"));
676     /* wait for the running threads to stop or finish */
677     for(p=all_threads;p;) {
678         FSHOW_SIGNAL((stderr,"/gc_stop_the_world: th: %p, p: %p\n", th, p));
679         if((p!=th) && (p->state==STATE_RUNNING)) {
680             sched_yield();
681         } else {
682             p=p->next;
683         }
684     }
685     FSHOW_SIGNAL((stderr,"/gc_stop_the_world:end\n"));
686 }
687
688 void gc_start_the_world()
689 {
690     struct thread *p,*th=arch_os_get_current_thread();
691     int status, lock_ret;
692     /* if a resumed thread creates a new thread before we're done with
693      * this loop, the new thread will get consed on the front of
694      * all_threads, but it won't have been stopped so won't need
695      * restarting */
696     FSHOW_SIGNAL((stderr,"/gc_start_the_world:begin\n"));
697     for(p=all_threads;p;p=p->next) {
698         gc_assert(p->os_thread!=0);
699         if((p!=th) && (p->state!=STATE_DEAD)) {
700             if(p->state!=STATE_SUSPENDED) {
701                 lose("gc_start_the_world: wrong thread state is %d\n",
702                      fixnum_value(p->state));
703             }
704             FSHOW_SIGNAL((stderr, "/gc_start_the_world: resuming %lu\n",
705                           p->os_thread));
706             p->state=STATE_RUNNING;
707
708 #if defined(SIG_RESUME_FROM_GC)
709             status=kill_thread_safely(p->os_thread,SIG_RESUME_FROM_GC);
710 #else
711             status=kill_thread_safely(p->os_thread,SIG_STOP_FOR_GC);
712 #endif
713             if (status) {
714                 lose("cannot resume thread=%lu: %d, %s\n",
715                      p->os_thread,status,strerror(status));
716             }
717         }
718     }
719     /* If we waited here until all threads leave STATE_SUSPENDED, then
720      * SIG_STOP_FOR_GC wouldn't need to be a rt signal. That has some
721      * performance implications, but does away with the 'rt signal
722      * queue full' problem. */
723
724     lock_ret = pthread_mutex_unlock(&all_threads_lock);
725     gc_assert(lock_ret == 0);
726 #ifdef LOCK_CREATE_THREAD
727     lock_ret = pthread_mutex_unlock(&create_thread_lock);
728     gc_assert(lock_ret == 0);
729 #endif
730
731     FSHOW_SIGNAL((stderr,"/gc_start_the_world:end\n"));
732 }
733 #endif