Fix the threading problems revealed by Edi's CL-PPCRE tests ...
If target-arch.h is included before genesis/config.h, some
runtime files with inline expansion of get_spinlock get the
wrong version
Significantly change handling of thread exit: the
SIG_THREAD_EXIT handler just makes th->state=STATE_DEAD, does
not do all_threads surgery, does not call Lisp code and is no
longer deferrable, eliminating a problem where threads die at
the start of GC and become zombified so cannot be stopped for
GC
Instead we call thread exit handlers from reap_dead_threads(),
necessitating further changes in HANDLE-THREAD-EXIT, as it
can now be called from threads other than the parent of the
dead one
stop_the_world doesn't actually need to hold
all_threads_lock(), as it doesn't modify the all_threads
list. Likewise sig_stop_for_gc_handler(), which means the
sched_yield() kludge can go away
(scrub-control-stack)
(setf *need-to-collect-garbage* nil)
(dolist (h *after-gc-hooks*) (carefully-funcall h))
- (gc-start-the-world)))))))
+ (gc-start-the-world))
+ (sb!thread::reap-dead-threads))))))
;;; This is the user-advertised garbage collection function.
(defun gc (&key (gen 0) (full nil) &allow-other-keys)
unsigned-int
(thread-id unsigned-long))
+(define-alien-routine reap-dead-threads void)
+
(defvar *session* nil)
;;;; queues, locks
;;; i suspect there may be a race still in this: the futex version requires
;;; the old mutex value before sleeping, so how do we get away without it
(defun get-mutex (lock &optional new-value (wait-p t))
+ "Acquire LOCK, setting it to NEW-VALUE or some suitable default value
+if NIL. If WAIT-P is non-NIL and the lock is in use, sleep until it
+is available"
(declare (type mutex lock) (optimize (speed 3)))
(let ((pid (current-thread-id)))
(unless new-value (setf new-value pid))
(let ((pid (current-thread-id))
old)
(unless new-value (setf new-value pid))
- (assert (not (eql new-value (mutex-value lock))))
+ (when (eql new-value (mutex-value lock))
+ (warn "recursive lock attempt ~S~%" lock))
(loop
(unless
(setf old (sb!vm::%instance-set-conditional lock 4 nil new-value))
until (sb!sys:sap= thread (sb!sys:int-sap 0))
collect (funcall function thread))))
+(defun thread-sap-from-id (id)
+ (let ((thread (alien-sap (extern-alien "all_threads" (* t)))))
+ (loop
+ (when (sb!sys:sap= thread (sb!sys:int-sap 0)) (return nil))
+ (let ((pid (sb!sys:sap-ref-32 thread (* 4 sb!vm::thread-pid-slot))))
+ (when (= pid id) (return thread))
+ (setf thread (sb!sys:sap-ref-sap thread (* 4 sb!vm::thread-next-slot)))))))
+
+;;; internal use only. If you think you need to use this, either you
+;;; are an SBCL developer, are doing something that you should discuss
+;;; with an SBCL developer first, or are doing something that you
+;;; should probably discuss with a professional psychiatrist first
+(defun symbol-value-in-thread (symbol thread-id)
+ (let ((thread (thread-sap-from-id thread-id)))
+ (when thread
+ (let* ((index (sb!vm::symbol-tls-index symbol))
+ (tl-val (sb!sys:sap-ref-32 thread (* 4 index))))
+ (if (eql tl-val sb!vm::unbound-marker-widetag)
+ (sb!vm::symbol-global-value symbol)
+ (sb!kernel:make-lisp-obj tl-val))))))
+
;;;; job control, independent listeners
(defstruct session
(defun init-job-control ()
(setf *session* (new-session)))
-(defun %delete-thread-from-session (tid)
- (with-mutex ((session-lock *session*))
- (setf (session-threads *session*)
- (delete tid (session-threads *session*))
- (session-interactive-threads *session*)
- (delete tid (session-interactive-threads *session*)))))
+(defun %delete-thread-from-session (tid session)
+ (with-mutex ((session-lock session))
+ (setf (session-threads session)
+ (delete tid (session-threads session))
+ (session-interactive-threads session)
+ (delete tid (session-interactive-threads session)))))
(defun call-with-new-session (fn)
- (%delete-thread-from-session (current-thread-id))
+ (%delete-thread-from-session (current-thread-id) *session*)
(let ((*session* (new-session))) (funcall fn)))
(defmacro with-new-session (args &body forms)
`(labels ((,fb-name () ,@forms))
(call-with-new-session (function ,fb-name)))))
-;;; this is called from a C signal handler: some signals may be masked
+;;; Remove thread id TID from its session, if it has one. This is
+;;; called from C reap_dead_threads() so is run in the context of
+;;; whichever thread called that (usually after a GC), which may not have
+;;; any meaningful parent/child/sibling relationship with the dead thread
(defun handle-thread-exit (tid)
- "Remove thread id TID from the session, if it's there"
- (%delete-thread-from-session tid))
+ (let ((session (symbol-value-in-thread '*session* tid)))
+ (and session (%delete-thread-from-session tid session))))
(defun terminate-session ()
- "Kill all threads in session exept for this one. Does nothing if current
+ "Kill all threads in session except for this one. Does nothing if current
thread is not the foreground thread"
+ (reap-dead-threads)
(let* ((tid (current-thread-id))
(to-kill
(with-mutex ((session-lock *session*))
(sb!sys:sap-ref-32 (alien-sap (extern-alien "all_threads" (* t)))
(* sb!vm::thread-pid-slot 4)))
+(defun reap-dead-threads ())
+
;;;; queues, locks
;; spinlocks use 0 as "free" value: higher-level locks use NIL
;; first data slot, and if you subtract 7 you get a symbol header.
;; also the CAR of NIL-as-end-of-list
- (value :init :unbound)
+ (value :init :unbound :ref-known (flushable) :ref-trans symbol-global-value)
;; also the CDR of NIL-as-end-of-list. Its reffer needs special
;; care for this reason, as hash values must be fixnums.
(hash :set-trans %set-symbol-hash)
(package :ref-trans symbol-package
:set-trans %set-symbol-package
:init :null)
- #!+sb-thread (tls-index))
+ #!+sb-thread (tls-index :ref-known (flushable) :ref-trans symbol-tls-index))
(define-primitive-object (complex-single-float
:lowtag other-pointer-lowtag
(tls-cookie) ; on x86, the LDT index
(this :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1)
(next :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1)
- (state) ; running, stopping, stopped
+ (state) ; running, stopping, stopped, dead
#!+x86 (pseudo-atomic-atomic)
#!+x86 (pseudo-atomic-interrupted)
(interrupt-data :c-type "struct interrupt_data *"
#include <stdio.h>
#include <string.h>
+#include "sbcl.h"
#include "genesis/config.h"
#include "runtime.h"
#include "os.h"
-#include "sbcl.h"
#include "alloc.h"
#include "globals.h"
#include "gc.h"
#ifdef LISP_FEATURE_SB_THREAD
sigaddset(s, SIG_STOP_FOR_GC);
sigaddset(s, SIG_INTERRUPT_THREAD);
- sigaddset(s, SIG_THREAD_EXIT);
#endif
}
sigset_t ss;
int i;
- /* KLUDGE: at least on Linux, the kernel apparently schedules a
- thread immediately it is signalled. However, we signal
- SIG_STOP_FOR_GC while holding the spinlock, and consequently we
- can easily end up with a kind of thundering herd of threads all
- wanting to acquire the lock at the same time so that they can
- tell the system that they've gone to sleep. So we yield here.
- Whether this is the right fix or not is unknown. -- CSR,
- 2004-07-16 */
- sched_yield();
-
if(maybe_defer_handler(sig_stop_for_gc_handler,data,
signal,info,context)) {
return;
for(i=1;i<NSIG;i++) sigaddset(&ss,i); /* Block everything. */
sigprocmask(SIG_BLOCK,&ss,0);
- get_spinlock(&all_threads_lock,thread->pid);
thread->state=STATE_STOPPED;
- release_spinlock(&all_threads_lock);
sigemptyset(&ss); sigaddset(&ss,SIG_STOP_FOR_GC);
sigwaitinfo(&ss,0);
void thread_exit_handler(int num, siginfo_t *info, void *v_context)
{ /* called when a child thread exits */
- os_context_t *context = (os_context_t*)arch_os_get_context(&v_context);
- struct thread *th=arch_os_get_current_thread();
pid_t kid;
- int *status;
- struct interrupt_data *data=
- th ? th->interrupt_data : global_interrupt_data;
- if(maybe_defer_handler(thread_exit_handler,data,num,info,context)){
- return ;
- }
+ int status;
+
while(1) {
kid=waitpid(-1,&status,__WALL|WNOHANG);
- if(kid<1) break;
+ if(kid<=0) break;
if(WIFEXITED(status) || WIFSIGNALED(status)) {
struct thread *th=find_thread_by_pid(kid);
- if(!th) continue;
- funcall1(SymbolFunction(HANDLE_THREAD_EXIT),make_fixnum(kid));
- destroy_thread(th);
+ if(th) th->state=STATE_DEAD;
}
}
}
+
+
#endif
boolean handle_control_stack_guard_triggered(os_context_t *context,void *addr){
#include <signal.h>
#include <stddef.h>
#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
#include "runtime.h"
#include "sbcl.h"
#include "validate.h" /* for CONTROL_STACK_SIZE etc */
#include "globals.h"
#include "dynbind.h"
#include "genesis/cons.h"
+#include "genesis/fdefn.h"
#define ALIEN_STACK_SIZE (1*1024*1024) /* 1Mb size chosen at random */
int dynamic_values_bytes=4096*sizeof(lispobj); /* same for all threads */
volatile lispobj all_threads_lock;
extern struct interrupt_data * global_interrupt_data;
-void get_spinlock(lispobj *word,int value);
+void get_spinlock(volatile lispobj *word,int value);
int
initial_thread_trampoline(struct thread *th)
void link_thread(struct thread *th,pid_t kid_pid)
{
+ sigset_t newset,oldset;
+ sigemptyset(&newset);
+ sigaddset_blockable(&newset);
+ sigprocmask(SIG_BLOCK, &newset, &oldset);
+
get_spinlock(&all_threads_lock,kid_pid);
th->next=all_threads;
all_threads=th;
*/
protect_control_stack_guard_page(th->pid,1);
release_spinlock(&all_threads_lock);
+
+ sigprocmask(SIG_SETMASK,&oldset,0);
th->pid=kid_pid; /* child will not start until this is set */
}
#ifdef LISP_FEATURE_SB_THREAD
pid_t create_thread(lispobj initial_function) {
struct thread *th=create_thread_struct(initial_function);
- pid_t kid_pid=clone(new_thread_trampoline,
- (((void*)th->control_stack_start)+
- THREAD_CONTROL_STACK_SIZE-4),
- CLONE_FILES|SIG_THREAD_EXIT|CLONE_VM,th);
-
- if(th && kid_pid>0) {
+ pid_t kid_pid=0;
+
+ if(th==0) return 0;
+ kid_pid=clone(new_thread_trampoline,
+ (((void*)th->control_stack_start)+
+ THREAD_CONTROL_STACK_SIZE-4),
+ CLONE_FILES|SIG_THREAD_EXIT|CLONE_VM,th);
+
+ if(kid_pid>0) {
link_thread(th,kid_pid);
return th->pid;
} else {
- destroy_thread(th);
+ os_invalidate((os_vm_address_t) th->control_stack_start,
+ ((sizeof (lispobj))
+ * (th->control_stack_end-th->control_stack_start)) +
+ BINDING_STACK_SIZE+ALIEN_STACK_SIZE+dynamic_values_bytes+
+ 32*SIGSTKSZ);
return 0;
}
}
#endif
+/* unused */
void destroy_thread (struct thread *th)
{
/* precondition: the unix task has already been killed and exited.
- * This is called by the parent */
+ * This is called by the parent or some other thread */
#ifdef LISP_FEATURE_GENCGC
gc_alloc_update_page_tables(0, &th->alloc_region);
#endif
get_spinlock(&all_threads_lock,th->pid);
- th->state=STATE_STOPPED;
+ th->unbound_marker=0; /* for debugging */
if(th==all_threads)
all_threads=th->next;
else {
32*SIGSTKSZ);
}
+void reap_dead_threads()
+{
+ struct thread *th,*next,*prev=0;
+ th=all_threads;
+ while(th) {
+ next=th->next;
+ if(th->state==STATE_DEAD) {
+ funcall1(SymbolFunction(HANDLE_THREAD_EXIT),make_fixnum(th->pid));
+#ifdef LISP_FEATURE_GENCGC
+ gc_alloc_update_page_tables(0, &th->alloc_region);
+#endif
+ get_spinlock(&all_threads_lock,th->pid);
+ if(prev) prev->next=next;
+ else all_threads=next;
+ release_spinlock(&all_threads_lock);
+ if(th->tls_cookie>=0) arch_os_thread_cleanup(th);
+ os_invalidate((os_vm_address_t) th->control_stack_start,
+ ((sizeof (lispobj))
+ * (th->control_stack_end-th->control_stack_start)) +
+ BINDING_STACK_SIZE+ALIEN_STACK_SIZE+dynamic_values_bytes+
+ 32*SIGSTKSZ);
+ } else
+ prev=th;
+ th=next;
+ }
+}
+
struct thread *find_thread_by_pid(pid_t pid)
{
/* stopping the world is a two-stage process. From this thread we signal
- * all the others with SIG_STOP_FOR_GC. The handler for this thread does
+ * all the others with SIG_STOP_FOR_GC. The handler for this signal does
* the usual pseudo-atomic checks (we don't want to stop a thread while
* it's in the middle of allocation) then kills _itself_ with SIGSTOP.
*/
/* stop all other threads by sending them SIG_STOP_FOR_GC */
struct thread *p,*th=arch_os_get_current_thread();
pid_t old_pid;
- int finished=0;
+ int finished;
do {
- get_spinlock(&all_threads_lock,th->pid);
+ finished=1;
for(p=all_threads,old_pid=p->pid; p; p=p->next) {
if(p==th) continue;
- if(p->state!=STATE_RUNNING) continue;
- p->state=STATE_STOPPING;
- kill(p->pid,SIG_STOP_FOR_GC);
+ if(p->state==STATE_RUNNING) {
+ p->state=STATE_STOPPING;
+ kill(p->pid,SIG_STOP_FOR_GC);
+ }
+ if((p->state!=STATE_STOPPED) &&
+ (p->state!=STATE_DEAD)) {
+ finished=0;
+ }
}
- release_spinlock(&all_threads_lock);
- sched_yield();
- /* if everything has stopped, and there is no possibility that
- * a new thread has been created, we're done. Otherwise go
- * round again and signal anything that sprang up since last
- * time */
- if(old_pid==all_threads->pid) {
- finished=1;
- for_each_thread(p)
- finished = finished &&
- ((p==th) || (p->state==STATE_STOPPED));
+ if(old_pid!=all_threads->pid) {
+ finished=0;
}
} while(!finished);
}
void gc_start_the_world()
{
struct thread *p,*th=arch_os_get_current_thread();
- get_spinlock(&all_threads_lock,th->pid);
+ /* if a resumed thread creates a new thread before we're done with
+ * this loop, the new thread will get consed on the front of *
+ * all_threads_lock, but it won't have been stopped so won't need
+ * restarting */
for(p=all_threads;p;p=p->next) {
- if(p==th) continue;
+ if((p==th) || (p->state==STATE_DEAD)) continue;
p->state=STATE_RUNNING;
kill(p->pid,SIG_STOP_FOR_GC);
}
- release_spinlock(&all_threads_lock);
}
#endif
#define STATE_RUNNING (make_fixnum(0))
#define STATE_STOPPING (make_fixnum(1))
#define STATE_STOPPED (make_fixnum(2))
+#define STATE_DEAD (make_fixnum(3))
#define THREAD_SLOT_OFFSET_WORDS(c) \
(offsetof(struct thread,c)/(sizeof (struct thread *)))
* namespace that we control. */
#ifndef _X86_ARCH_H
#define _X86_ARCH_H
+#ifndef SBCL_GENESIS_CONFIG
+#error genesis/config.h (or sbcl.h) must be incuded before this file
+#endif
#define ARCH_HAS_STACK_POINTER
* 2002-02-15) */
#ifdef LISP_FEATURE_SB_THREAD
+
+extern never_returns lose(char *fmt, ...);
+
static inline void
-get_spinlock(lispobj *word,int value)
+get_spinlock(volatile lispobj *word,int value)
{
u32 eax=0;
+ if(*word==value)
+ lose("recursive get_spinlock: 0x%x,%d\n",word,value);
do {
asm ("xor %0,%0\n\
lock cmpxchg %1,%2"
}
static inline void
-release_spinlock(lispobj *word)
+release_spinlock(volatile lispobj *word)
{
*word=0;
}
#include <unistd.h>
#include <errno.h>
+#include "sbcl.h"
#include "./signal.h"
#include "os.h"
#include "arch.h"
#include "interrupt.h"
#include "interr.h"
#include "lispregs.h"
-#include "sbcl.h"
#include <sys/socket.h>
#include <sys/utsname.h>
printf("%d bytes in ldt: print/x local_ldt_copy\n", n);
}
-lispobj modify_ldt_lock; /* protect all calls to modify_ldt */
+volatile lispobj modify_ldt_lock; /* protect all calls to modify_ldt */
int arch_os_thread_init(struct thread *thread) {
stack_t sigstack;
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.8.12.41"
+"0.8.12.42"