From 2675adcb29d689ee6d270f52658af17f2deeaf77 Mon Sep 17 00:00:00 2001 From: Daniel Barlow Date: Thu, 2 Oct 2003 23:13:08 +0000 Subject: [PATCH] 0.8.4.1 Merge most of atropos-branch: miscellaneous (mostly threading) fixes that were probably a little too risky for late in 0.8.4 development. doc/ - fix up some of the sgml errors that sourceforge keeps mailing me about New function release-spinlock that only changes the lock value if we owned the spinlock, so good for unwind-protect cleanups when lock acquisition failed get-spinlock release-spinlock current-thread-id could all win from being inlinable Use a RT signal (SIG_DEQUEUE) for resuming threads that were on queues, instead of having SIGCONT do both this and the resume-after-gc task. Scattered commentary describing the state of the signal mask in various interesting places In gencgc alloc, only install a deferred handler for GC if there was no previous handler for anything else. This fixes a longstanding bug where the GC thread would eat all cpu while waiting indefinitely for othr threads to stop. Add SIG_STOP_FOR_GC to the blockable list interrupt_maybe_gc_int: enable signals before calling SUB-GC, or the locking that sub-gc does is going to interact badly. Minor rearrangement to parent thread to stop it having to wake up on every GC Add grovel_headers line for SIG-DEQUEUE. OAOOM alert... --- doc/beyond-ansi.sgml | 7 +++--- src/code/gc.lisp | 34 +++++++++++++---------------- src/code/target-thread.lisp | 44 ++++++++++++++++++++++++-------------- src/runtime/alloc.c | 1 + src/runtime/breakpoint.c | 5 ++++- src/runtime/cheneygc.c | 2 ++ src/runtime/gencgc.c | 10 ++++++--- src/runtime/interrupt.c | 26 ++++++++++++---------- src/runtime/linux-os.c | 2 +- src/runtime/linux-os.h | 1 + src/runtime/runtime.c | 6 +++--- src/runtime/thread.c | 12 +++++++++-- tests/threads.impure.lisp | 2 +- tools-for-build/grovel_headers.c | 5 ++++- version.lisp-expr | 2 +- 15 files changed, 97 insertions(+), 62 deletions(-) diff --git a/doc/beyond-ansi.sgml b/doc/beyond-ansi.sgml index afc3962..388ebdf 100644 --- a/doc/beyond-ansi.sgml +++ b/doc/beyond-ansi.sgml @@ -125,7 +125,7 @@ implements a subset of the Franz Allegro simple-streams proposal. &SBCL; supports a MetaObject Protocol which is intended to be compatible with &AMOP;; present exceptions to this (as distinct from -current bugs) are: +current bugs) are: the abstract metaobject class is not present in the class hierarchy; @@ -137,7 +137,8 @@ current bugs) are: the system-supplied :around method for compute-slots specialized on funcallable-standard-class does not respect the - requested order from a user-supplied primary method. + requested order from a user-supplied primary method. + @@ -146,7 +147,7 @@ current bugs) are: &SBCL; (as of version 0.8.3, on Linux x86 only) supports a fairly low-level threading interface that maps onto the host operating -system's concept of threads or lightweight processes. +system's concept of threads or lightweight processes. Lisp-level view diff --git a/src/code/gc.lisp b/src/code/gc.lisp index 684e926..e048d8d 100644 --- a/src/code/gc.lisp +++ b/src/code/gc.lisp @@ -237,25 +237,21 @@ and submit it as a patch." (defvar *gc-mutex* (sb!thread:make-mutex :name "GC Mutex")) (defun sub-gc (&key (gen 0) &aux (pre-gc-dynamic-usage (dynamic-usage))) - (when *already-in-gc* (return-from sub-gc nil)) - (setf *need-to-collect-garbage* t) - (when (zerop *gc-inhibit*) - (sb!thread:with-recursive-lock (*gc-mutex*) - (let ((*already-in-gc* t)) - (without-interrupts - (gc-stop-the-world) - #+nil - (dolist (h *before-gc-hooks*) - (carefully-funcall h)) - (collect-garbage gen) - (incf *n-bytes-freed-or-purified* - (max 0 (- pre-gc-dynamic-usage (dynamic-usage)))) - (setf *need-to-collect-garbage* nil) - - (gc-start-the-world))) - (scrub-control-stack)) - (dolist (h *after-gc-hooks*) - (carefully-funcall h))) + ;; catch attempts to gc recursively or during post-hooks and ignore them + (when (sb!thread::mutex-value *gc-mutex*) (return-from sub-gc nil)) + (sb!thread:with-mutex (*gc-mutex* :wait-p nil) + (setf *need-to-collect-garbage* t) + (when (zerop *gc-inhibit*) + (without-interrupts + (gc-stop-the-world) + (collect-garbage gen) + (incf *n-bytes-freed-or-purified* + (max 0 (- pre-gc-dynamic-usage (dynamic-usage)))) + (setf *need-to-collect-garbage* nil) + (gc-start-the-world)) + (scrub-control-stack) + (setf *need-to-collect-garbage* nil) + (dolist (h *after-gc-hooks*) (carefully-funcall h)))) (values)) diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 093f50f..2601a46 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -27,8 +27,8 @@ (funcall real-function)) 0)))))))) -;;; Conventional wisdom says that it's a bad idea to use these unless -;;; you really need to. Use a lock or a waitqueue instead +;;; Really, you don't want to use these: they'll get into trouble with +;;; garbage collection. Use a lock or a waitqueue instead (defun suspend-thread (thread-id) (sb!unix:unix-kill thread-id sb!unix:sigstop)) (defun resume-thread (thread-id) @@ -60,10 +60,13 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" (interrupt-thread thread-id 'sb!ext:quit)) - +(declaim (inline current-thread-id)) (defun current-thread-id () - (sb!sys:sap-int - (sb!vm::current-thread-offset-sap sb!vm::thread-pid-slot))) + (logand + (sb!sys:sap-int + (sb!vm::current-thread-offset-sap sb!vm::thread-pid-slot)) + ;; KLUDGE pids are 16 bit really. Avoid boxing the return value + (1- (ash 1 16)))) ;;;; iterate over the in-memory threads @@ -78,18 +81,28 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" ;;;; queues, locks ;; spinlocks use 0 as "free" value: higher-level locks use NIL +(declaim (inline get-spinlock release-spinlock)) + (defun get-spinlock (lock offset new-value) (declare (optimize (speed 3) (safety 0))) (loop until (eql (sb!vm::%instance-set-conditional lock offset 0 new-value) 0))) +;; this should do nothing if we didn't own the lock, so safe to use in +;; unwind-protect cleanups when lock acquisition failed for some reason +(defun release-spinlock (lock offset our-value) + (declare (optimize (speed 3) (safety 0))) + (sb!vm::%instance-set-conditional lock offset our-value 0)) + (defmacro with-spinlock ((queue) &body body) (with-unique-names (pid) - `(unwind-protect - (let ((,pid (current-thread-id))) - (get-spinlock ,queue 2 ,pid) - ,@body) - (setf (waitqueue-lock ,queue) 0)))) + `(let ((,pid (current-thread-id))) + (unwind-protect + (progn + (get-spinlock ,queue 2 ,pid) + ,@body) + (release-spinlock ,queue 2 ,pid))))) + ;;;; the higher-level locking operations are based on waitqueues @@ -104,12 +117,11 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" (sb!alien:define-alien-routine "block_sigcont" void) (sb!alien:define-alien-routine "unblock_sigcont_and_sleep" void) + ;;; this should only be called while holding the queue spinlock. ;;; it releases the spinlock before sleeping (defun wait-on-queue (queue &optional lock) (let ((pid (current-thread-id))) - ;; FIXME what should happen if we get interrupted when we've blocked - ;; the sigcont? For that matter, can we get interrupted? (block-sigcont) (when lock (release-mutex lock)) (sb!sys:without-interrupts @@ -128,12 +140,13 @@ SB-EXT:QUIT - the usual cleanup forms will be evaluated" ;;; this should only be called while holding the queue spinlock. (defun signal-queue-head (queue) (let ((p (car (waitqueue-data queue)))) - (when p (sb!unix:unix-kill p sb!unix:sigcont)))) + (when p (sb!unix:unix-kill p sb!unix::sig-dequeue)))) ;;;; mutex (defun get-mutex (lock &optional new-value (wait-p t)) - (declare (type mutex lock)) + (declare (type mutex lock) + (optimize (speed 3))) (let ((pid (current-thread-id))) (unless new-value (setf new-value pid)) (assert (not (eql new-value (mutex-value lock)))) @@ -257,8 +270,7 @@ restart if *BACKGROUND-THREADS-WAIT-FOR-DEBUGGER* says to do that instead" (sb!impl::repl-prompt-fun out-stream)))) (defun resume-stopped-thread (id) - (let ((pid (current-thread-id)) - (lock *session-lock*)) + (let ((lock *session-lock*)) (with-spinlock (lock) (setf (waitqueue-data lock) (cons id (delete id (waitqueue-data lock))))) diff --git a/src/runtime/alloc.c b/src/runtime/alloc.c index 8add020..ed6ec54 100644 --- a/src/runtime/alloc.c +++ b/src/runtime/alloc.c @@ -17,6 +17,7 @@ #include #include +#include "genesis/config.h" #include "runtime.h" #include "os.h" #include "sbcl.h" diff --git a/src/runtime/breakpoint.c b/src/runtime/breakpoint.c index 7aeb14b..878ada4 100644 --- a/src/runtime/breakpoint.c +++ b/src/runtime/breakpoint.c @@ -145,7 +145,8 @@ void handle_breakpoint(int signal, siginfo_t *info, os_context_t *context) fake_foreign_function_call(context); code = find_code(context); - + /* FIXME we're calling into Lisp with signals masked here. Is this + * the right thing to do? */ funcall3(SymbolFunction(HANDLE_BREAKPOINT), compute_offset(context, code), code, @@ -187,6 +188,8 @@ void *handle_fun_end_breakpoint(int signal, siginfo_t *info, code = find_code(context); codeptr = (struct code *)native_pointer(code); + /* FIXME again, calling into Lisp with signals masked. Is this + * sensible? */ funcall3(SymbolFunction(HANDLE_BREAKPOINT), compute_offset(context, code), code, diff --git a/src/runtime/cheneygc.c b/src/runtime/cheneygc.c index 53930cf..745c61b 100644 --- a/src/runtime/cheneygc.c +++ b/src/runtime/cheneygc.c @@ -136,6 +136,8 @@ collect_garbage(unsigned ignore) gettimeofday(&start_tv, (struct timezone *) 0); #endif + /* it's possible that signals are blocked already if this was called + * from a signal handler (e.g. with the sigsegv gc_trigger stuff) */ sigemptyset(&tmp); sigaddset_blockable(&tmp); sigprocmask(SIG_BLOCK, &tmp, &old); diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 4061ae6..b6917ab 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -687,7 +687,7 @@ add_new_area(int first_page, int offset, int size) max_new_areas = new_areas_index; } -/* Update the tables for the alloc_region. The region maybe added to +/* Update the tables for the alloc_region. The region may be added to * the new_areas. * * When done the alloc_region is set up so that the next quick alloc @@ -4249,9 +4249,13 @@ alloc(int nbytes) */ if (auto_gc_trigger && bytes_allocated > auto_gc_trigger) { /* set things up so that GC happens when we finish the PA - * section. */ + * section. We only do this if there wasn't a pending handler + * already, in case it was a gc. If it wasn't a GC, the next + * allocation will get us back to this point anyway, so no harm done + */ struct interrupt_data *data=th->interrupt_data; - maybe_defer_handler(interrupt_maybe_gc_int,data,0,0,0); + if(!data->pending_handler) + maybe_defer_handler(interrupt_maybe_gc_int,data,0,0,0); } new_obj = gc_alloc_with_region(nbytes,0,region,0); return (new_obj); diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index fdbbb9d..3250875 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -103,8 +103,7 @@ void sigaddset_blockable(sigset_t *s) sigaddset(s, SIGUSR1); sigaddset(s, SIGUSR2); #ifdef LISP_FEATURE_SB_THREAD - /* don't block STOP_FOR_GC, we need to be able to interrupt threads - * for GC purposes even when they are blocked on queues etc */ + sigaddset(s, SIG_STOP_FOR_GC); sigaddset(s, SIG_INTERRUPT_THREAD); #endif } @@ -276,7 +275,7 @@ interrupt_internal_error(int signal, siginfo_t *info, os_context_t *context, * before the Lisp error handling mechanism is set up. */ lose("internal error too early in init, can't recover"); } - undo_fake_foreign_function_call(context); + undo_fake_foreign_function_call(context); /* blocks signals again */ if (continuable) { arch_skip_instruction(context); } @@ -290,6 +289,8 @@ interrupt_handle_pending(os_context_t *context) thread=arch_os_get_current_thread(); data=thread->interrupt_data; + /* FIXME I'm not altogether sure this is appropriate if we're + * here as the result of a pseudo-atomic */ SetSymbolValue(INTERRUPT_PENDING, NIL,thread); /* restore the saved signal mask from the original signal (the @@ -407,7 +408,7 @@ interrupt_handle_now(int signal, siginfo_t *info, void *void_context) if (were_in_lisp) #endif { - undo_fake_foreign_function_call(context); + undo_fake_foreign_function_call(context); /* block signals again */ } #ifdef QSHOW_SIGNALS @@ -427,6 +428,7 @@ void run_deferred_handler(struct interrupt_data *data, void *v_context) { (*(data->pending_handler)) (data->pending_signal,&(data->pending_info), v_context); + data->pending_handler=0; } boolean @@ -505,23 +507,19 @@ sig_stop_for_gc_handler(int signal, siginfo_t *info, void *void_context) os_context_t *context = arch_os_get_context(&void_context); struct thread *thread=arch_os_get_current_thread(); struct interrupt_data *data=thread->interrupt_data; - sigset_t block; + if(maybe_defer_handler(sig_stop_for_gc_handler,data, signal,info,context)){ return; } - sigemptyset(&block); - sigaddset_blockable(&block); - sigprocmask(SIG_BLOCK, &block, 0); - /* need the context stored so it can have registers scavenged */ fake_foreign_function_call(context); get_spinlock(&all_threads_lock,thread->pid); countdown_to_gc--; release_spinlock(&all_threads_lock); - kill(getpid(),SIGSTOP); + kill(thread->pid,SIGSTOP); undo_fake_foreign_function_call(context); } @@ -680,16 +678,22 @@ interrupt_maybe_gc(int signal, siginfo_t *info, void *void_context) #endif -/* this is also used by from gencgc.c alloc() */ +/* this is also used by gencgc, in alloc() */ boolean interrupt_maybe_gc_int(int signal, siginfo_t *info, void *void_context) { + sigset_t new; os_context_t *context=(os_context_t *) void_context; fake_foreign_function_call(context); /* SUB-GC may return without GCing if *GC-INHIBIT* is set, in * which case we will be running with no gc trigger barrier * thing for a while. But it shouldn't be long until the end * of WITHOUT-GCING. */ + + sigemptyset(&new); + sigaddset_blockable(&new); + /* enable signals before calling into Lisp */ + sigprocmask(SIG_UNBLOCK,&new,0); funcall0(SymbolFunction(SUB_GC)); undo_fake_foreign_function_call(context); return 1; diff --git a/src/runtime/linux-os.c b/src/runtime/linux-os.c index 1256cd9..6e391f2 100644 --- a/src/runtime/linux-os.c +++ b/src/runtime/linux-os.c @@ -268,7 +268,7 @@ os_install_interrupt_handlers(void) undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC, sig_stop_for_gc_handler); #endif - undoably_install_low_level_interrupt_handler(SIGCONT, + undoably_install_low_level_interrupt_handler(SIG_DEQUEUE, sigcont_handler); } diff --git a/src/runtime/linux-os.h b/src/runtime/linux-os.h index cec30a5..1055b2d 100644 --- a/src/runtime/linux-os.h +++ b/src/runtime/linux-os.h @@ -39,5 +39,6 @@ typedef int os_vm_prot_t; #define SIG_MEMORY_FAULT SIGSEGV #define SIG_INTERRUPT_THREAD SIGRTMIN #define SIG_STOP_FOR_GC (SIGRTMIN+1) +#define SIG_DEQUEUE (SIGRTMIN+2) diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index 32cad8a..ae360ae 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -407,7 +407,7 @@ static void /* noreturn */ parent_loop(void) while(!all_threads) { sched_yield(); } - while(all_threads && (pid=waitpid(-1,&status,__WALL|WUNTRACED))) { + while(all_threads && (pid=waitpid(-1,&status,__WALL))) { struct thread *th; int real_errno=errno; if(pid==-1) { @@ -418,9 +418,9 @@ static void /* noreturn */ parent_loop(void) fprintf(stderr,"waitpid: %s\n",strerror(real_errno)); continue; } - th=find_thread_by_pid(pid); - if(!th) continue; if(WIFEXITED(status) || WIFSIGNALED(status)) { + th=find_thread_by_pid(pid); + if(!th) continue; fprintf(stderr,"waitpid : child %d %x exited \n", pid,th); destroy_thread(th); if(!all_threads) break; diff --git a/src/runtime/thread.c b/src/runtime/thread.c index ebdb2e4..eb8f241 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -270,7 +270,7 @@ void block_sigcont(void) */ sigset_t newset; sigemptyset(&newset); - sigaddset(&newset,SIGCONT); + sigaddset(&newset,SIG_DEQUEUE); sigprocmask(SIG_BLOCK, &newset, 0); } @@ -282,7 +282,7 @@ void unblock_sigcont_and_sleep(void) { sigset_t set; sigemptyset(&set); - sigaddset(&set,SIGCONT); + sigaddset(&set,SIG_DEQUEUE); do { errno=0; sigwaitinfo(&set,0); @@ -298,6 +298,14 @@ int interrupt_thread(pid_t pid, lispobj function) return sigqueue(pid, SIG_INTERRUPT_THREAD, sigval); } +/* stopping the world is a two-stage process. From this thread we signal + * all the others with SIG_STOP_FOR_GC. The handler for this thread does + * the usual pseudo-atomic checks (we don't want to stop a thread while + * it's in the middle of allocation) then kills _itself_ with SIGSTOP. + * At any given time, countdown_to_gc should reflect the number of threads + * signalled but which haven't yet come to rest + */ + void gc_stop_the_world() { /* stop all other threads by sending them SIG_STOP_FOR_GC */ diff --git a/tests/threads.impure.lisp b/tests/threads.impure.lisp index 9e4fc6b..e0973ff 100644 --- a/tests/threads.impure.lisp +++ b/tests/threads.impure.lisp @@ -154,4 +154,4 @@ ;; overall exit status is 0, not 104 (sleep 2) -;(sb-ext:quit :unix-status 104) +(sb-ext:quit :unix-status 104) diff --git a/tools-for-build/grovel_headers.c b/tools-for-build/grovel_headers.c index af38bdc..2254aa3 100644 --- a/tools-for-build/grovel_headers.c +++ b/tools-for-build/grovel_headers.c @@ -188,6 +188,9 @@ main(int argc, char *argv[]) DEFSIGNAL(SIGXCPU); DEFSIGNAL(SIGXFSZ); #endif - +#ifdef LISP_FEATURE_SB_THREAD + /* FIXME OAOOM alert: this information is duplicated in linux-os.h */ + defconstant("sig-dequeue",SIGRTMIN+2); +#endif return 0; } diff --git a/version.lisp-expr b/version.lisp-expr index bc8ad3b..93d9703 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.4" +"0.8.4.1" -- 1.7.10.4