0.8.5.12
[sbcl.git] / contrib / experimental-thread.patch
1
2 The attached changes are supposed to fix bugs in SBCL when used for
3 gc-intensive multithreaded applications.  They haven't had sufficient
4 testing to be commited in time for SBCL 0.8.5 (may even make things
5 worse), but if you run into problems with deadlock or spinning on CPU,
6 you may want to apply this and rebuild.    -dan 2003.10.23
7
8
9
10 Index: src/code/gc.lisp
11 ===================================================================
12 RCS file: /cvsroot/sbcl/sbcl/src/code/gc.lisp,v
13 retrieving revision 1.52
14 diff -u -r1.52 gc.lisp
15 --- src/code/gc.lisp    2 Oct 2003 23:13:09 -0000       1.52
16 +++ src/code/gc.lisp    23 Oct 2003 19:22:19 -0000
17 @@ -236,22 +236,26 @@
18  (defvar *already-in-gc* nil "System is running SUB-GC")
19  (defvar *gc-mutex* (sb!thread:make-mutex :name "GC Mutex"))
20  
21 +
22 +
23  (defun sub-gc (&key (gen 0) &aux (pre-gc-dynamic-usage (dynamic-usage)))
24    ;; catch attempts to gc recursively or during post-hooks and ignore them
25 -  (when (sb!thread::mutex-value *gc-mutex*)  (return-from sub-gc nil))
26 -  (sb!thread:with-mutex (*gc-mutex* :wait-p nil)
27 -    (setf *need-to-collect-garbage* t)
28 -    (when (zerop *gc-inhibit*)
29 -      (without-interrupts
30 -       (gc-stop-the-world)
31 -       (collect-garbage gen)
32 -       (incf *n-bytes-freed-or-purified*
33 -            (max 0 (- pre-gc-dynamic-usage (dynamic-usage))))
34 -       (setf *need-to-collect-garbage* nil)
35 -       (gc-start-the-world))
36 -      (scrub-control-stack)
37 -      (setf *need-to-collect-garbage* nil)
38 -      (dolist (h *after-gc-hooks*) (carefully-funcall h))))
39 +  (let ((value (sb!thread::mutex-value *gc-mutex*))) 
40 +    (when (eql value (sb!thread:current-thread-id)) (return-from sub-gc nil))
41 +    (sb!thread:with-mutex (*gc-mutex*)
42 +      (when value (return-from sub-gc nil))
43 +      (setf *need-to-collect-garbage* t)
44 +      (when (zerop *gc-inhibit*)
45 +       (without-interrupts
46 +        (gc-stop-the-world)
47 +        (collect-garbage gen)
48 +        (incf *n-bytes-freed-or-purified*
49 +              (max 0 (- pre-gc-dynamic-usage (dynamic-usage))))
50 +        (setf *need-to-collect-garbage* nil)
51 +        (gc-start-the-world))
52 +       (scrub-control-stack)
53 +       (setf *need-to-collect-garbage* nil)
54 +       (dolist (h *after-gc-hooks*) (carefully-funcall h)))))
55    (values))
56         
57  
58 Index: src/runtime/thread.c
59 ===================================================================
60 RCS file: /cvsroot/sbcl/sbcl/src/runtime/thread.c,v
61 retrieving revision 1.18
62 diff -u -r1.18 thread.c
63 --- src/runtime/thread.c        7 Oct 2003 21:41:27 -0000       1.18
64 +++ src/runtime/thread.c        23 Oct 2003 19:22:26 -0000
65 @@ -53,6 +53,8 @@
66         fprintf(stderr, "/continue\n");
67      }
68      th->unbound_marker = UNBOUND_MARKER_WIDETAG;
69 +    if(arch_os_thread_init(th)==0) 
70 +       return 1;               /* failure.  no, really */
71  #ifdef LISP_FEATURE_SB_THREAD
72      /* wait here until our thread is linked into all_threads: see below */
73      while(th->pid<1) sched_yield();
74 @@ -61,8 +63,7 @@
75         lose("th->pid not set up right");
76  #endif
77  
78 -    if(arch_os_thread_init(th)==0) 
79 -       return 1;               /* failure.  no, really */
80 +    th->state=STATE_RUNNING;
81  #if !defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_X86)
82      return call_into_lisp_first_time(function,args,0);
83  #else
84 @@ -139,7 +140,7 @@
85      th->binding_stack_pointer=th->binding_stack_start;
86      th->this=th;
87      th->pid=0;
88 -    th->state=STATE_RUNNING;
89 +    th->state=STATE_STOPPED;
90  #ifdef LISP_FEATURE_STACK_GROWS_DOWNWARD_NOT_UPWARD
91      th->alien_stack_pointer=((void *)th->alien_stack_start
92                              + ALIEN_STACK_SIZE-4); /* naked 4.  FIXME */
93 @@ -312,39 +313,36 @@
94  {
95      /* stop all other threads by sending them SIG_STOP_FOR_GC */
96      struct thread *p,*th=arch_os_get_current_thread();
97 -    struct thread *tail=0;
98 +    pid_t old_pid;
99      int finished=0;
100      do {
101         get_spinlock(&all_threads_lock,th->pid);
102 -       if(tail!=all_threads) {
103 -           /* new threads always get consed onto the front of all_threads,
104 -            * and may be created by any thread that we haven't signalled
105 -            * yet or hasn't received our signal and stopped yet.  So, check
106 -            * for them on each time around */
107 -           for(p=all_threads;p!=tail;p=p->next) {
108 -               if(p==th) continue;
109 -               /* if the head of all_threads is removed during
110 -                * gc_stop_the_world, we may take a second trip through the 
111 -                * list and end up counting twice as many threads to wait for
112 -                * as actually exist */
113 -               if(p->state!=STATE_RUNNING) continue;
114 -               countdown_to_gc++;
115 -               p->state=STATE_STOPPING;
116 -               /* Note no return value check from kill().  If the
117 -                * thread had been reaped already, we kill it and
118 -                * increment countdown_to_gc anyway.  This is to avoid
119 -                * complicating the logic in destroy_thread, which would 
120 -                * otherwise have to know whether the thread died before or
121 -                * after it was killed
122 -                */
123 -               kill(p->pid,SIG_STOP_FOR_GC);
124 -           }
125 -           tail=all_threads;
126 -       } else {
127 -           finished=(countdown_to_gc==0);
128 +       for(p=all_threads,old_pid=p->pid; p; p=p->next) {
129 +           if(p==th) continue;
130 +           if(p->state!=STATE_RUNNING) continue;
131 +           countdown_to_gc++;
132 +           p->state=STATE_STOPPING;
133 +           /* Note no return value check from kill().  If the
134 +            * thread had been reaped already, we kill it and
135 +            * increment countdown_to_gc anyway.  This is to avoid
136 +            * complicating the logic in destroy_thread, which would 
137 +            * otherwise have to know whether the thread died before or
138 +            * after it was killed
139 +            */
140 +           kill(p->pid,SIG_STOP_FOR_GC);
141         }
142         release_spinlock(&all_threads_lock);
143         sched_yield();
144 +       /* if everything has stopped, and there is no possibility that
145 +        * a new thread has been created, we're done.  Otherwise go
146 +        * round again and signal anything that sprang up since last
147 +        * time  */
148 +       if(old_pid==all_threads->pid) {
149 +           finished=1;
150 +           for_each_thread(p) 
151 +               finished = finished &&
152 +               ((p==th) || (p->state==STATE_STOPPED));
153 +       }
154      } while(!finished);
155  }
156