From: David Lichteblau Date: Tue, 18 Sep 2012 15:12:09 +0000 (+0200) Subject: Preliminary work towards threads on win32 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=7aef55b130d95c384b63422807f1848faa9aba5a;p=sbcl.git Preliminary work towards threads on win32 * Implement SB-THREAD * Support WITH-TIMEOUT, etc. Implementation details: * Implement pthreads, futex API on top of Win32. * Adds support for the timer facility using sb-wtimer. * Implement an interruptable `nanosleep' using waitable timers. * Threading on Windows uses safepoints to stop the world. On this platform, either all or none of :SB-THREAD, :SB-SAFEPOINT, :SB-THRUPT, and :SB-WTIMER need to be enabled together. * On this platform, INTERRUPT-THREAD will not run interruptions in a target thread that is executing foreign code, even though the POSIX version of sb-thrupt still allows this (potentially unsafe) form of signalling by default. Does not yet include interruptible I/O, which will be made available separately. Slime users are requested to build SBCL without threads until then. Note that these changes alone are not yet sufficient to make SBCL on Windows an ideal backend. Users looking for a particularly stable or thread-enabled version of SBCL for Windows are still advised to use the well-known Windows branch instead. This is a merge of features developed earlier by Dmitry Kalyanov and Anton Kovalenko. --- diff --git a/src/code/cold-init.lisp b/src/code/cold-init.lisp index c343f4a..48a2027 100644 --- a/src/code/cold-init.lisp +++ b/src/code/cold-init.lisp @@ -230,7 +230,8 @@ (show-and-call stream-cold-init-or-reset) (show-and-call !loader-cold-init) (show-and-call !foreign-cold-init) - #!-win32 (show-and-call signal-cold-init-or-reinit) + #!-(and win32 (not sb-thread)) + (show-and-call signal-cold-init-or-reinit) (/show0 "enabling internal errors") (setf (sb!alien:extern-alien "internal_errors_enabled" boolean) t) @@ -351,7 +352,7 @@ process to continue normally." (os-cold-init-or-reinit) (thread-init-or-reinit) (stream-reinit t) - #!-win32 + #!-(and win32 (not sb-thread)) (signal-cold-init-or-reinit) (setf (sb!alien:extern-alien "internal_errors_enabled" boolean) t) (float-cold-init-or-reinit)) diff --git a/src/code/run-program.lisp b/src/code/run-program.lisp index 97d6d55..fee0dd7 100644 --- a/src/code/run-program.lisp +++ b/src/code/run-program.lisp @@ -145,7 +145,6 @@ #+sb-doc "List of process structures for all active processes.") -#-win32 (defvar *active-processes-lock* (sb-thread:make-mutex :name "Lock for active processes.")) @@ -153,11 +152,8 @@ ;;; mutex is needed. More importantly the sigchld signal handler also ;;; accesses it, that's why we need without-interrupts. (defmacro with-active-processes-lock (() &body body) - #-win32 `(sb-thread::with-system-mutex (*active-processes-lock*) - ,@body) - #+win32 - `(progn ,@body)) + ,@body)) (defstruct (process (:copier nil)) pid ; PID of child process diff --git a/src/code/target-exception.lisp b/src/code/target-exception.lisp index 803a758..0c5e7b7 100644 --- a/src/code/target-exception.lisp +++ b/src/code/target-exception.lisp @@ -101,3 +101,103 @@ ;;; I don't know if we still need this or not. Better safe for now. (defun receive-pending-interrupt () (receive-pending-interrupt)) + +(in-package "SB!UNIX") + +#!+sb-thread +(progn + (defun receive-pending-interrupt () + (receive-pending-interrupt)) + + (defmacro with-interrupt-bindings (&body body) + `(let* + ;; KLUDGE: Whatever is on the PCL stacks before the interrupt + ;; handler runs doesn't really matter, since we're not on the + ;; same call stack, really -- and if we don't bind these (esp. + ;; the cache one) we can get a bogus metacircle if an interrupt + ;; handler calls a GF that was being computed when the interrupt + ;; hit. + ((sb!pcl::*cache-miss-values-stack* nil) + (sb!pcl::*dfun-miss-gfs-on-stack* nil)) + ,@body)) + +;;; Evaluate CLEANUP-FORMS iff PROTECTED-FORM does a non-local exit. + (defmacro nlx-protect (protected-form &rest cleanup-froms) + (with-unique-names (completep) + `(let ((,completep nil)) + (without-interrupts + (unwind-protect + (progn + (allow-with-interrupts + ,protected-form) + (setq ,completep t)) + (unless ,completep + ,@cleanup-froms)))))) + + (declaim (inline %unblock-deferrable-signals)) + (sb!alien:define-alien-routine ("unblock_deferrable_signals" + %unblock-deferrable-signals) + sb!alien:void + (where sb!alien:unsigned) + (old sb!alien:unsigned)) + + (defun block-deferrable-signals () + (%block-deferrable-signals 0 0)) + + (defun unblock-deferrable-signals () + (%unblock-deferrable-signals 0 0)) + + (declaim (inline %block-deferrables-and-return-mask %apply-sigmask)) + (sb!alien:define-alien-routine ("block_deferrables_and_return_mask" + %block-deferrables-and-return-mask) + sb!alien:unsigned) + (sb!alien:define-alien-routine ("apply_sigmask" + %apply-sigmask) + sb!alien:void + (mask sb!alien:unsigned)) + + (defmacro without-interrupts/with-deferrables-blocked (&body body) + (let ((mask-var (gensym))) + `(without-interrupts + (let ((,mask-var (%block-deferrables-and-return-mask))) + (unwind-protect + (progn ,@body) + (%apply-sigmask ,mask-var)))))) + + (defun invoke-interruption (function) + (without-interrupts + ;; Reset signal mask: the C-side handler has blocked all + ;; deferrable signals before funcalling into lisp. They are to be + ;; unblocked the first time interrupts are enabled. With this + ;; mechanism there are no extra frames on the stack from a + ;; previous signal handler when the next signal is delivered + ;; provided there is no WITH-INTERRUPTS. + (let ((sb!unix::*unblock-deferrables-on-enabling-interrupts-p* t)) + (with-interrupt-bindings + (let ((sb!debug:*stack-top-hint* + (nth-value 1 (sb!kernel:find-interrupted-name-and-frame)))) + (allow-with-interrupts + (nlx-protect + (funcall function) + ;; We've been running with deferrables + ;; blocked in Lisp called by a C signal + ;; handler. If we return normally the sigmask + ;; in the interrupted context is restored. + ;; However, if we do an nlx the operating + ;; system will not restore it for us. + (when sb!unix::*unblock-deferrables-on-enabling-interrupts-p* + ;; This means that storms of interrupts + ;; doing an nlx can still run out of stack. + (unblock-deferrable-signals))))))))) + + (defmacro in-interruption ((&key) &body body) + #!+sb-doc + "Convenience macro on top of INVOKE-INTERRUPTION." + `(dx-flet ((interruption () ,@body)) + (invoke-interruption #'interruption))) + + (defun sb!kernel:signal-cold-init-or-reinit () + #!+sb-doc + "Enable all the default signals that Lisp knows how to deal with." + (unblock-deferrable-signals) + (values))) diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp index 5f61722..773fe9c 100644 --- a/src/code/target-thread.lisp +++ b/src/code/target-thread.lisp @@ -1605,12 +1605,12 @@ the state of a thread: (interrupt-thread thread #'break) Short version: be careful out there." - #!+win32 + #!+(and (not sb-thread) win32) + #!+(and (not sb-thread) win32) (declare (ignore thread)) - #!+win32 (with-interrupt-bindings (with-interrupts (funcall function))) - #!-win32 + #!-(and (not sb-thread) win32) (let ((os-thread (thread-os-thread thread))) (cond ((not os-thread) (error 'interrupt-thread-error :thread thread)) diff --git a/src/code/toplevel.lisp b/src/code/toplevel.lisp index f3f226b..a72e064 100644 --- a/src/code/toplevel.lisp +++ b/src/code/toplevel.lisp @@ -202,7 +202,7 @@ any non-negative real number." :format-arguments (list seconds) :datum seconds :expected-type '(real 0))) - #!-win32 + #!-(and win32 (not sb-thread)) (multiple-value-bind (sec nsec) (if (integerp seconds) (values seconds 0) @@ -216,7 +216,7 @@ any non-negative real number." do (decf sec (expt 10 8)) (sb!unix:nanosleep (expt 10 8) 0)) (sb!unix:nanosleep sec nsec)) - #!+win32 + #!+(and win32 (not sb-thread)) (sb!win32:millisleep (truncate (* seconds 1000))) nil) diff --git a/src/code/win32.lisp b/src/code/win32.lisp index 25b00ad..1f8e7ed 100644 --- a/src/code/win32.lisp +++ b/src/code/win32.lisp @@ -154,10 +154,21 @@ ;;;; System Functions -;;; Sleep for MILLISECONDS milliseconds. +#!-sb-thread (define-alien-routine ("Sleep@4" millisleep) void (milliseconds dword)) +#!+sb-thread +(defun sb!unix:nanosleep (sec nsec) + (let ((*allow-with-interrupts* *interrupts-enabled*)) + (without-interrupts + (let ((timer (sb!impl::os-create-wtimer))) + (sb!impl::os-set-wtimer timer sec nsec) + (unwind-protect + (do () ((with-local-interrupts + (zerop (sb!impl::os-wait-for-wtimer timer))))) + (sb!impl::os-close-wtimer timer)))))) + #!+sb-unicode (progn (defvar *ansi-codepage* nil) diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp index bfc0281..35e7cb5 100644 --- a/src/compiler/generic/objdef.lisp +++ b/src/compiler/generic/objdef.lisp @@ -398,6 +398,7 @@ (control-stack-guard-page-protected) (alien-stack-start :c-type "lispobj *" :length #!+alpha 2 #!-alpha 1) (alien-stack-pointer :c-type "lispobj *" :length #!+alpha 2 #!-alpha 1) + #!+win32 (private-events :c-type "struct private_events" :length 2) (this :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1) (prev :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1) (next :c-type "struct thread *" :length #!+alpha 2 #!-alpha 1) diff --git a/src/runtime/Config.x86-win32 b/src/runtime/Config.x86-win32 index f77b20a..a047e63 100644 --- a/src/runtime/Config.x86-win32 +++ b/src/runtime/Config.x86-win32 @@ -14,7 +14,8 @@ TARGET=sbcl.exe ASSEM_SRC = x86-assem.S ARCH_SRC = x86-arch.c -OS_SRC = win32-os.c x86-win32-os.c os-common.c +OS_SRC = win32-os.c x86-win32-os.c os-common.c pthreads_win32.c + # The "--Wl,--export-dynamic" flags are here to help people # experimenting with callbacks from C to SBCL, by allowing linkage to # SBCL src/runtime/*.c symbols from C. Work on this is good, but it's @@ -35,7 +36,7 @@ endif GC_SRC = gencgc.c -CFLAGS = -g -Wall -O3 -fno-omit-frame-pointer +CFLAGS = -g -Wall -O3 -fno-omit-frame-pointer -mno-cygwin -march=i686 -DWINVER=0x0501 ASFLAGS = $(CFLAGS) CPP = cpp diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index 823410c..a89d59c 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -26,10 +26,14 @@ #include #include -#include #include #include #include "sbcl.h" +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +#include "pthreads_win32.h" +#else +#include +#endif #include "runtime.h" #include "os.h" #include "interr.h" @@ -3321,7 +3325,7 @@ preserve_context_registers (os_context_t *c) /* On Darwin the signal context isn't a contiguous block of memory, * so just preserve_pointering its contents won't be sufficient. */ -#if defined(LISP_FEATURE_DARWIN) +#if defined(LISP_FEATURE_DARWIN)||defined(LISP_FEATURE_WIN32) #if defined LISP_FEATURE_X86 preserve_pointer((void*)*os_context_register_addr(c,reg_EAX)); preserve_pointer((void*)*os_context_register_addr(c,reg_ECX)); @@ -3350,9 +3354,11 @@ preserve_context_registers (os_context_t *c) #error "preserve_context_registers needs to be tweaked for non-x86 Darwin" #endif #endif +#if !defined(LISP_FEATURE_WIN32) for(ptr = ((void **)(c+1))-1; ptr>=(void **)c; ptr--) { preserve_pointer(*ptr); } +#endif } #endif diff --git a/src/runtime/interrupt.c b/src/runtime/interrupt.c index fe09ec2..9b82a58 100644 --- a/src/runtime/interrupt.c +++ b/src/runtime/interrupt.c @@ -102,7 +102,7 @@ union interrupt_handler interrupt_handlers[NSIG]; * work for SIGSEGV and similar. It is good enough for timers, and * maybe all deferrables. */ -#ifdef LISP_FEATURE_SB_THREAD +#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_WIN32) static void add_handled_signals(sigset_t *sigset) { @@ -121,7 +121,7 @@ void block_signals(sigset_t *what, sigset_t *where, sigset_t *old); static boolean maybe_resignal_to_lisp_thread(int signal, os_context_t *context) { -#ifdef LISP_FEATURE_SB_THREAD +#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_WIN32) if (!pthread_getspecific(lisp_thread)) { if (!(sigismember(&deferrable_sigset,signal))) { corruption_warning_and_maybe_lose @@ -175,7 +175,7 @@ maybe_resignal_to_lisp_thread(int signal, os_context_t *context) static void run_deferred_handler(struct interrupt_data *data, os_context_t *context); -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) static void store_signal_data_for_later (struct interrupt_data *data, void *handler, int signal, siginfo_t *info, @@ -240,7 +240,7 @@ boolean all_signals_blocked_p(sigset_t *sigset, sigset_t *sigset2, const char *name) { -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) int i; boolean has_blocked = 0, has_unblocked = 0; sigset_t current; @@ -314,7 +314,7 @@ sigset_t gc_sigset; #endif -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) boolean deferrables_blocked_p(sigset_t *sigset) { @@ -325,7 +325,7 @@ deferrables_blocked_p(sigset_t *sigset) void check_deferrables_unblocked_or_lose(sigset_t *sigset) { -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) if (deferrables_blocked_p(sigset)) lose("deferrables blocked\n"); #endif @@ -334,13 +334,13 @@ check_deferrables_unblocked_or_lose(sigset_t *sigset) void check_deferrables_blocked_or_lose(sigset_t *sigset) { -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) if (!deferrables_blocked_p(sigset)) lose("deferrables unblocked\n"); #endif } -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) boolean blockables_blocked_p(sigset_t *sigset) { @@ -351,7 +351,7 @@ blockables_blocked_p(sigset_t *sigset) void check_blockables_unblocked_or_lose(sigset_t *sigset) { -#if !defined(LISP_FEATURE_WIN32) +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) if (blockables_blocked_p(sigset)) lose("blockables blocked\n"); #endif @@ -361,6 +361,24 @@ void check_blockables_blocked_or_lose(sigset_t *sigset) { #if !defined(LISP_FEATURE_WIN32) + /* On Windows, there are no actual signals, but since the win32 port + * tracks the sigmask and checks it explicitly, some functions are + * still required to keep the mask set up properly. (After all, the + * goal of the sigmask emulation is to not have to change all the + * call sites in the first place.) + * + * However, this does not hold for all signals equally: While + * deferrables matter ("is interrupt-thread okay?"), it is not worth + * having to set up blockables properly (which include the + * non-existing GC signals). + * + * Yet, as the original comment explains it: + * Adjusting FREE-INTERRUPT-CONTEXT-INDEX* and other aspecs of + * fake_foreign_function_call machinery are sometimes useful here[...]. + * + * So we merely skip this assertion. + * -- DFL, trying to expand on a comment by AK. + */ if (!blockables_blocked_p(sigset)) lose("blockables unblocked\n"); #endif @@ -397,7 +415,7 @@ check_gc_signals_blocked_or_lose(sigset_t *sigset) void block_deferrable_signals(sigset_t *where, sigset_t *old) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) block_signals(&deferrable_sigset, where, old); #endif } @@ -405,7 +423,7 @@ block_deferrable_signals(sigset_t *where, sigset_t *old) void block_blockable_signals(sigset_t *where, sigset_t *old) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) block_signals(&blockable_sigset, where, old); #endif } @@ -414,7 +432,7 @@ block_blockable_signals(sigset_t *where, sigset_t *old) void block_gc_signals(sigset_t *where, sigset_t *old) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) block_signals(&gc_sigset, where, old); #endif } @@ -423,7 +441,7 @@ block_gc_signals(sigset_t *where, sigset_t *old) void unblock_deferrable_signals(sigset_t *where, sigset_t *old) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) if (interrupt_handler_pending_p()) lose("unblock_deferrable_signals: losing proposition\n"); #ifndef LISP_FEATURE_SB_SAFEPOINT @@ -436,7 +454,7 @@ unblock_deferrable_signals(sigset_t *where, sigset_t *old) void unblock_blockable_signals(sigset_t *where, sigset_t *old) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) unblock_signals(&blockable_sigset, where, old); #endif } @@ -454,7 +472,7 @@ unblock_gc_signals(sigset_t *where, sigset_t *old) void unblock_signals_in_context_and_maybe_warn(os_context_t *context) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) sigset_t *sigset = os_context_sigmask_addr(context); #ifndef LISP_FEATURE_SB_SAFEPOINT if (all_signals_blocked_p(sigset, &gc_sigset, "gc")) { @@ -548,7 +566,7 @@ in_leaving_without_gcing_race_p(struct thread *thread) void check_interrupt_context_or_lose(os_context_t *context) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) struct thread *thread = arch_os_get_current_thread(); struct interrupt_data *data = thread->interrupt_data; int interrupt_deferred_p = (data->pending_handler != 0); @@ -793,7 +811,7 @@ interrupt_internal_error(os_context_t *context, boolean continuable) #endif context_sap = alloc_sap(context); -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) thread_sigmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); #endif @@ -1048,7 +1066,7 @@ interrupt_handle_now(int signal, siginfo_t *info, os_context_t *context) check_blockables_blocked_or_lose(0); -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) if (sigismember(&deferrable_sigset,signal)) check_interrupts_enabled_or_lose(context); #endif @@ -1115,11 +1133,11 @@ interrupt_handle_now(int signal, siginfo_t *info, os_context_t *context) FSHOW_SIGNAL((stderr,"/calling C-level handler\n")); -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) /* Allow signals again. */ thread_sigmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); -#endif (*handler.c)(signal, info, context); +#endif } if (were_in_lisp) @@ -1928,17 +1946,21 @@ sigabrt_handler(int signal, siginfo_t *info, os_context_t *context) void interrupt_init(void) { -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) int i; SHOW("entering interrupt_init()"); +#ifndef LISP_FEATURE_WIN32 see_if_sigaction_nodefer_works(); +#endif sigemptyset(&deferrable_sigset); sigemptyset(&blockable_sigset); sigemptyset(&gc_sigset); sigaddset_deferrable(&deferrable_sigset); sigaddset_blockable(&blockable_sigset); sigaddset_gc(&gc_sigset); +#endif +#ifndef LISP_FEATURE_WIN32 /* Set up high level handler information. */ for (i = 0; i < NSIG; i++) { interrupt_handlers[i].c = @@ -1950,8 +1972,8 @@ interrupt_init(void) (void (*)(int, siginfo_t*, os_context_t*))SIG_DFL; } undoably_install_low_level_interrupt_handler(SIGABRT, sigabrt_handler); - SHOW("returning from interrupt_init()"); #endif + SHOW("returning from interrupt_init()"); } #ifndef LISP_FEATURE_WIN32 @@ -2000,7 +2022,7 @@ unhandled_trap_error(os_context_t *context) unblock_gc_signals(0, 0); #endif context_sap = alloc_sap(context); -#ifndef LISP_FEATURE_WIN32 +#if !defined(LISP_FEATURE_WIN32) || defined(LISP_FEATURE_SB_THREAD) thread_sigmask(SIG_SETMASK, os_context_sigmask_addr(context), 0); #endif funcall1(StaticSymbolFunction(UNHANDLED_TRAP_ERROR), context_sap); @@ -2014,11 +2036,13 @@ void handle_trap(os_context_t *context, int trap) { switch(trap) { +#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD)) case trap_PendingInterrupt: FSHOW((stderr, "/\n")); arch_skip_instruction(context); interrupt_handle_pending(context); break; +#endif case trap_Error: case trap_Cerror: FSHOW((stderr, "/\n", trap)); diff --git a/src/runtime/interrupt.h b/src/runtime/interrupt.h index 3a6a51b..83514d7 100644 --- a/src/runtime/interrupt.h +++ b/src/runtime/interrupt.h @@ -12,7 +12,7 @@ #if !defined(_INCLUDE_INTERRUPT_H_) #define _INCLUDE_INTERRUPT_H_ -#include +#include "runtime.h" #include /* @@ -24,7 +24,12 @@ * stack by the kernel, so copying a libc-sized sigset_t into it will * overflow and cause other data on the stack to be corrupted */ /* FIXME: do not rely on NSIG being a multiple of 8 */ -#define REAL_SIGSET_SIZE_BYTES ((NSIG/8)) + +#ifdef LISP_FEATURE_WIN32 +# define REAL_SIGSET_SIZE_BYTES (4) +#else +# define REAL_SIGSET_SIZE_BYTES ((NSIG/8)) +#endif static inline void sigcopyset(sigset_t *new, sigset_t *old) diff --git a/src/runtime/os-common.c b/src/runtime/os-common.c index fa845bf..698d0c9 100644 --- a/src/runtime/os-common.c +++ b/src/runtime/os-common.c @@ -75,7 +75,7 @@ os_get_errno(void) } -#if defined(LISP_FEATURE_SB_THREAD) && !defined(CANNOT_USE_POSIX_SEM_T) +#if defined(LISP_FEATURE_SB_THREAD) && (!defined(CANNOT_USE_POSIX_SEM_T) || defined(LISP_FEATURE_WIN32)) void os_sem_init(os_sem_t *sem, unsigned int value) diff --git a/src/runtime/parse.c b/src/runtime/parse.c index beedc29..66292af 100644 --- a/src/runtime/parse.c +++ b/src/runtime/parse.c @@ -14,9 +14,13 @@ #include #include #include -#include #include "sbcl.h" +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +#include "pthreads_win32.h" +#else +#include +#endif #include "runtime.h" #if defined(LISP_FEATURE_SB_LDB) diff --git a/src/runtime/print.c b/src/runtime/print.c index da8744e..67a7c20 100644 --- a/src/runtime/print.c +++ b/src/runtime/print.c @@ -215,6 +215,9 @@ fshow_fun(void __attribute__((__unused__)) *ignored, #ifdef LISP_FEATURE_GENCGC #include "gencgc-alloc-region.h" /* genesis/thread.h needs this */ #endif +#if defined(LISP_FEATURE_WIN32) +# include "win32-thread-private-events.h" /* genesis/thread.h needs this */ +#endif #include "genesis/static-symbols.h" #include "genesis/primitive-objects.h" #include "genesis/static-symbols.h" diff --git a/src/runtime/pthreads_win32.c b/src/runtime/pthreads_win32.c new file mode 100644 index 0000000..c307da2 --- /dev/null +++ b/src/runtime/pthreads_win32.c @@ -0,0 +1,1582 @@ +#include "sbcl.h" +#ifdef LISP_FEATURE_SB_THREAD /* entire file */ + +#define PTHREAD_INTERNALS +#include "pthreads_win32.h" +#include +#include +#include +#include + +#ifdef PTHREAD_DEBUG_OUTPUT +#define pthshow(fmt,...) \ + do { \ + fprintf(stderr,fmt "\n", __VA_ARGS__); \ + fflush(stderr); \ + } while (0) + +#define DEBUG_OWN(cs) do {(cs)->owner=pthread_self(); } while(0) +#define DEBUG_RELEASE(cs) do {(cs)->owner=0;} while(0) + +#else +#define pthshow(fmt,...) do {} while (0) +#define DEBUG_OWN(cs) do {} while(0) +#define DEBUG_RELEASE(cs) do {} while(0) +#endif + + +struct freelist_cell { + struct freelist_cell * next; + void* data; +}; + +struct freelist { + void* (*create_fn)(); + pthread_mutex_t lock; + struct freelist_cell * empty; + struct freelist_cell * full; + unsigned int count; +}; + +#define FREELIST_INITIALIZER(create_fn) \ + { \ + event_create, PTHREAD_MUTEX_INITIALIZER, \ + NULL, NULL, 0 \ + } \ + + +static void* freelist_get(struct freelist *fl) +{ + void* result = NULL; + if (fl->full) { + pthread_mutex_lock(&fl->lock); + if (fl->full) { + struct freelist_cell *cell = fl->full; + fl->full = cell->next; + result = cell->data; + cell->next = fl->empty; + fl->empty = cell; + } + pthread_mutex_unlock(&fl->lock); + } + if (!result) { + result = fl->create_fn(); + } + return result; +} + +static void freelist_return(struct freelist *fl, void*data) +{ + struct freelist_cell* cell = NULL; + if (fl->empty) { + pthread_mutex_lock(&fl->lock); + if (fl->empty) { + cell = fl->empty; + fl->empty = cell->next; + goto add_locked; + } + pthread_mutex_unlock(&fl->lock); + } + if (!cell) { + int i,n=32; + cell = malloc(sizeof(*cell)*n); + for (i=0; i<(n-1); ++i) + cell[i].next = &cell[i+1]; + cell[i].next = NULL; + } + + pthread_mutex_lock(&fl->lock); + ++fl->count; + add_locked: + cell->data = data; + cell->next = fl->full; + fl->full = cell; + pthread_mutex_unlock(&fl->lock); +} + +int pthread_attr_init(pthread_attr_t *attr) +{ + attr->stack_size = 0; + return 0; +} + +int pthread_attr_destroy(pthread_attr_t *attr) +{ + return 0; +} + +int pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize) +{ + fprintf(stderr, "pthread_attr_setstack called\n"); + ExitProcess(1); + return 0; +} + +int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) +{ + attr->stack_size = stacksize; + return 0; +} + + +typedef unsigned char boolean; + +/* TLS management internals */ + +static DWORD thread_self_tls_index; + +static void (*tls_destructors[PTHREAD_KEYS_MAX])(void*); +static boolean tls_used[PTHREAD_KEYS_MAX]; +static pthread_key_t tls_max_used_key; +static pthread_mutex_t thread_key_lock = PTHREAD_MUTEX_INITIALIZER; +static void tls_call_destructors(); +static pthread_t tls_impersonate(pthread_t other) { + pthread_t old = pthread_self(); + TlsSetValue(thread_self_tls_index,other); + return old; +} + +static void do_nothing() {} +/* Fiber context hooks */ +void (*pthread_save_context_hook)() = do_nothing; +void (*pthread_restore_context_hook)() = do_nothing; + +/* Some parts of pthread_np API provide access to Windows NT Fibers + (cooperatively scheduled coroutines). Each fiber is wrapped in its + own pthread. + + Fibers may be entered by different threads during their lifetime, + i.e. they are orthogonal to threads. + + Contrary to the raw NT Fibers API, we will distinguish two kinds of + objects: fibers-created-as-fibers and any other thing (thread that + is not a fiber, thread converted to fiber, system thread + noticed). Consequently, though there is no "main fiber" in NT, + there _is_ a main pthread for each (wrapped) system thread, living + or dying with this system thread. It may be converted to fiber, but + its "fiberness" is incidental, only to be able to switch into + another fibers or create them. + + Any fiber that is currently running belongs to some thread + (fiber-created-as-thread, to be exact). Call it FCAT group. + + [1] Entrance lock: prevent double entry. + + [2] Suspend for fibers -> "try locking entrance lock; if failed, do + real thread suspend" + + [3] Resume for fibers -> two strategies depending on what [2] done. + + [4] Exit/death for fibers -> switch to its FCAT group. + + [2],[3],[4] doesn't apply to threads-converted-to-fibers: full + stop/resume is done on them if there is no cooperatively-accessed + published context (of which see below). +*/ +void pthread_np_suspend(pthread_t thread) +{ + pthread_mutex_lock(&thread->fiber_lock); + if (thread->fiber_group) { + CONTEXT context; + SuspendThread(thread->fiber_group->handle); + context.ContextFlags = CONTEXT_FULL; + GetThreadContext(thread->fiber_group->handle, &context); + } +} + +/* Momentary suspend/getcontext/resume without locking or preventing + fiber reentrance. This call is for asymmetric synchronization, + ensuring that the thread sees global state before doing any + globally visible stores. +*/ +void pthread_np_serialize(pthread_t thread) +{ + CONTEXT winctx; + winctx.ContextFlags = CONTEXT_INTEGER; + if (!thread->created_as_fiber) { + SuspendThread(thread->handle); + GetThreadContext(thread->handle,&winctx); + ResumeThread(thread->handle); + } +} + +int pthread_np_get_thread_context(pthread_t thread, CONTEXT* context) +{ + context->ContextFlags = CONTEXT_FULL; + return thread->fiber_group && + GetThreadContext(thread->fiber_group->handle, context) != 0; +} + +void pthread_np_resume(pthread_t thread) +{ + HANDLE host_thread = thread->fiber_group ? thread->fiber_group->handle : NULL; + /* Unlock first, _then_ resume, or we may end up accessing freed + pthread structure (e.g. at startup with CREATE_SUSPENDED) */ + pthread_mutex_unlock(&thread->fiber_lock); + if (host_thread) { + ResumeThread(host_thread); + } +} + +/* FIXME shouldn't be used. */ +void pthread_np_request_interruption(pthread_t thread) +{ + if (thread->waiting_cond) { + pthread_cond_broadcast(thread->waiting_cond); + } +} + +/* Thread identity, as much as pthreads are concerned, is determined + by pthread_t structure that is stored in TLS slot + (thread_self_tls_index). This slot is reassigned when fibers are + switched with pthread_np API. + + Two reasons for not using fiber-local storage for this purpose: (1) + Fls is too young: all other things work with Win2000, it requires + WinXP; (2) this implementation works also with threads that aren't + fibers, and it's a good thing. + + There is one more case, besides fiber switching, when pthread_self + identity migrates between system threads: for non-main system + thread that is not [pthread_create]d, thread-specific data + destructors run in a thread from a system thread pool, after the + original thread dies. In order to provide compatibility with + classic pthread TSD, the system pool thread acquires dead thread's + identity for the duration of destructor calls. +*/ +pthread_t pthread_self() +{ + return (pthread_t)TlsGetValue(thread_self_tls_index); +} + +const char * state_to_str(pthread_thread_state state) +{ + switch (state) { + case pthread_state_running: return "running"; + case pthread_state_finished: return "finished"; + case pthread_state_joined: return "joined"; + default: return "unknown"; + } +} + +/* Two kinds of threads (or fibers) are supported: (1) created by + pthread_create, (2) created independently and noticed by + pthread_np_notice_thread. The first kind is running a predefined + thread function or fiber function; thread_or_fiber_function + incorporates whatever they have in common. +*/ +static void thread_or_fiber_function(pthread_t self) +{ + pthread_t prev = tls_impersonate(self); + void* arg = self->arg; + pthread_fn fn = self->start_routine; + + if (prev) { + pthread_mutex_lock(&prev->fiber_lock); + prev->fiber_group = NULL; + /* Previous fiber, that started us, had assigned our + fiber_group. Now we clear its fiber_group. */ + pthread_mutex_unlock(&prev->fiber_lock); + } + self->retval = fn(arg); + pthread_mutex_lock(&self->lock); + self->state = pthread_state_finished; + pthread_cond_broadcast(&self->cond); + while (!self->detached && self->state != pthread_state_joined) { + if (self->created_as_fiber) { + pthread_mutex_unlock(&self->lock); + pthread_np_switch_to_fiber(self->fiber_group); + pthread_mutex_lock(&self->lock); + } else { + pthread_cond_wait(&self->cond, &self->lock); + } + } + pthread_mutex_unlock(&self->lock); + pthread_mutex_destroy(&self->lock); + pthread_mutex_destroy(&self->fiber_lock); + pthread_cond_destroy(&self->cond); + tls_call_destructors(); +} + +/* Thread function for [pthread_create]d threads. Thread may become a + fiber later, but (as stated above) it isn't supposed to be + reattached to other system thread, even after it happens. +*/ +DWORD WINAPI Thread_Function(LPVOID param) +{ + pthread_t self = (pthread_t) param; + + self->teb = NtCurrentTeb(); + thread_or_fiber_function(param); + CloseHandle(self->handle); + { + void* fiber = self->fiber; + free(self); + if (fiber) { + /* If thread was converted to fiber, deleting the fiber from + itself exits the thread. There are some rumors on possible + memory leaks if we just ExitThread or return here, hence the + statement below. However, no memory leaks on bare ExitThread + were observed yet. */ + DeleteFiber(GetCurrentFiber()); + } + } + return 0; +} + +/* Fiber can't delete itself without exiting the current thread + simultaneously. We arrange for some other fiber calling + fiber_destructor when fiber dies but doesn't want to terminate its + thread. */ +static void fiber_destructor(void* fiber) { DeleteFiber(fiber); } + +VOID CALLBACK Fiber_Function(LPVOID param) +{ + pthread_t self = (pthread_t) param; + thread_or_fiber_function(param); + { + /* fiber_group is a main thread into which we are to call */ + pthread_t group = self->fiber_group; + free(self); + /* pthread_np_run_in_fiber (see below) normally switches back to + caller. Nullify our identity, so it knows there is nothing to + switch to, and continues running instead. */ + tls_impersonate(NULL); + if (group) { + /* Every running [pthread_create]d fiber runs in some thread + that has its own pthread_self identity (that was created as + thread and later converted to fiber). `group' field of + running fiber always points to that other pthread. + + Now switch to our group ("current master fiber created as + thread"), asking it to delete our (OS) fiber data with + fiber_destructor. */ + pthread_np_run_in_fiber(group, fiber_destructor, GetCurrentFiber()); + } + /* Within current pthread API we never end up here. + + BTW, if fibers are ever pooled, to avoid stack space reallocation + etc, jumping to the beginning of Fiber_Function should be the + thing to do here. */ + DeleteFiber(GetCurrentFiber()); /* Exits. See Thread_Function for + explanation -- why not + ExitThread. */ + } +} + +/* Signals */ +struct sigaction signal_handlers[NSIG]; + +/* Never called for now */ +int sigaction(int signum, const struct sigaction* act, struct sigaction* oldact) +{ + struct sigaction newact = *act; + if (oldact) + *oldact = signal_handlers[signum]; + if (!(newact.sa_flags & SA_SIGINFO)) { + newact.sa_sigaction = (typeof(newact.sa_sigaction))newact.sa_handler; + } + signal_handlers[signum] = newact; + return 0; +} + +/* Create thread or fiber, depending on current thread's "fiber + factory mode". In the latter case, switch into newly-created fiber + immediately. +*/ +int pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine) (void *), void *arg) +{ + pthread_t pth = (pthread_t)calloc(sizeof(pthread_thread),1); + pthread_t self = pthread_self(); + int i; + HANDLE createdThread = NULL; + + if (self && self->fiber_factory) { + pth->fiber = CreateFiber (attr ? attr->stack_size : 0, Fiber_Function, pth); + if (!pth->fiber) return 1; + pth->created_as_fiber = 1; + /* Has no fiber-group until someone enters it (we will) */ + } else { + createdThread = CreateThread(NULL, attr ? attr->stack_size : 0, + Thread_Function, pth, CREATE_SUSPENDED, NULL); + if (!createdThread) return 1; + /* FCAT is its own fiber-group [initially] */ + pth->fiber_group = pth; + pth->handle = createdThread; + } + pth->start_routine = start_routine; + pth->arg = arg; + if (self) { + pth->blocked_signal_set = self->blocked_signal_set; + } else { + sigemptyset(&pth->blocked_signal_set); + } + pth->state = pthread_state_running; + pthread_mutex_init(&pth->lock, NULL); + pthread_mutex_init(&pth->fiber_lock, NULL); + pthread_cond_init(&pth->cond, NULL); + pth->detached = 0; + if (thread) *thread = pth; + if (pth->fiber) { + pthread_np_switch_to_fiber(pth); + } else { + /* Resume will unlock, so we lock here */ + pthread_mutex_lock(&pth->fiber_lock); + pthread_np_resume(pth); + } + return 0; +} + +int pthread_equal(pthread_t thread1, pthread_t thread2) +{ + return thread1 == thread2; +} + +int pthread_detach(pthread_t thread) +{ + int retval = 0; + pthread_mutex_lock(&thread->lock); + thread->detached = 1; + pthread_cond_broadcast(&thread->cond); + pthread_mutex_unlock(&thread->lock); + return retval; +} + +int pthread_join(pthread_t thread, void **retval) +{ + int fiberp = thread->created_as_fiber; + pthread_mutex_lock(&thread->lock); + while (thread->state != pthread_state_finished) { + if (fiberp) { + /* just trying */ + pthread_mutex_unlock(&thread->lock); + pthread_np_switch_to_fiber(thread); + pthread_mutex_lock(&thread->lock); + } else { + pthread_cond_wait(&thread->cond, &thread->lock); + } + } + thread->state = pthread_state_joined; + pthread_cond_broadcast(&thread->cond); + if (retval) + *retval = thread->retval; + pthread_mutex_unlock(&thread->lock); + if (fiberp) + pthread_np_switch_to_fiber(thread); + return 0; +} + +/* We manage our own TSD instead of relying on system TLS for anything + other than pthread identity itself. Reasons: (1) Windows NT TLS + slots are expensive, (2) pthread identity migration requires only + one TLS slot assignment, instead of massive copying. */ +int pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) +{ + pthread_key_t index; + boolean success = 0; + pthread_mutex_lock(&thread_key_lock); + for (index = 0; index < PTHREAD_KEYS_MAX; ++index) { + if (!tls_used[index]) { + if (tls_max_used_keyspecifics[key]; +} + +/* Internal function calling destructors for current pthread */ +static void tls_call_destructors() +{ + pthread_key_t key; + int i; + int called; + + for (i = 0; iblocked_signal_set; + if (set) { + switch (how) { + case SIG_BLOCK: + self->blocked_signal_set |= *set; + break; + case SIG_UNBLOCK: + self->blocked_signal_set &= ~(*set); + break; + case SIG_SETMASK: + self->blocked_signal_set = *set; + break; + } + } + return 0; +} + +pthread_mutex_t mutex_init_lock; + +int pthread_mutex_init(pthread_mutex_t * mutex, const pthread_mutexattr_t * attr) +{ + *mutex = (struct _pthread_mutex_info*)malloc(sizeof(struct _pthread_mutex_info)); + InitializeCriticalSection(&(*mutex)->cs); + (*mutex)->file = " (free) "; + return 0; +} + +int pthread_mutexattr_init(pthread_mutexattr_t* attr) +{ + return 0; +} +int pthread_mutexattr_destroy(pthread_mutexattr_t* attr) +{ + return 0; +} + +int pthread_mutexattr_settype(pthread_mutexattr_t* attr,int mutex_type) +{ + return 0; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + if (*mutex != PTHREAD_MUTEX_INITIALIZER) { + pthread_np_assert_live_mutex(mutex,"destroy"); + DeleteCriticalSection(&(*mutex)->cs); + free(*mutex); + *mutex = &DEAD_MUTEX; + } + return 0; +} + +/* Add pending signal to (other) thread */ +void pthread_np_add_pending_signal(pthread_t thread, int signum) +{ + /* See __sync_fetch_and_or() for gcc 4.4, at least. As some + people are still using gcc 3.x, I prefer to do this in asm. + + For win64 we'll HAVE to rewrite it. __sync_fetch_and_or() seems + to be a rational choice -- there are plenty of GCCisms in SBCL + anyway. + */ + sigset_t to_add = 1<pending_signal_set):"r"(to_add)); +} + +static void futex_interrupt(pthread_t thread); + +/* This pthread_kill doesn't do anything to notify target pthread of a + * new pending signal. + * + * DFL: ... or so the original comment claimed, but that was before + * futexes. Now that we wake up futexes, it's not entirely accurate + * anymore, is it? */ +int pthread_kill(pthread_t thread, int signum) +{ + pthread_np_add_pending_signal(thread,signum); + futex_interrupt(thread); + return 0; +} + +void pthread_np_remove_pending_signal(pthread_t thread, int signum) +{ + sigset_t to_and = ~(1<pending_signal_set):"r"(to_and)); +} + +sigset_t pthread_np_other_thread_sigpending(pthread_t thread) +{ + return + InterlockedCompareExchange((volatile LONG*)&thread->pending_signal_set, + 0, 0); +} + +/* Mutex implementation uses CRITICAL_SECTIONs. Somethings to keep in + mind: (1) uncontested locking is cheap; (2) long wait on a busy + lock causes exception, so it should never be attempted; (3) those + mutexes are recursive; (4) one thread locks, the other unlocks -> + the next one hangs. */ +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + pthread_np_assert_live_mutex(mutex,"lock"); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_lock(&mutex_init_lock); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_init(mutex, NULL); + } + pthread_mutex_unlock(&mutex_init_lock); + } + EnterCriticalSection(&(*mutex)->cs); + DEBUG_OWN(*mutex); + return 0; +} + +int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + pthread_np_assert_live_mutex(mutex,"trylock"); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_lock(&mutex_init_lock); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_init(mutex, NULL); + } + pthread_mutex_unlock(&mutex_init_lock); + } + if (TryEnterCriticalSection(&(*mutex)->cs)) { + DEBUG_OWN(*mutex); + return 0; + } + else + return EBUSY; +} + +/* Versions of lock/trylock useful for debugging. Our header file + conditionally redefines lock/trylock to call them. */ + +int pthread_mutex_lock_annotate_np(pthread_mutex_t *mutex, const char* file, int line) +{ + int contention = 0; + pthread_np_assert_live_mutex(mutex,"lock"); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_lock(&mutex_init_lock); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_init(mutex, NULL); + pthshow("Mutex #x%p: automatic initialization; #x%p %s +%d", + mutex, *mutex, + file, line); + } + pthread_mutex_unlock(&mutex_init_lock); + } + if ((*mutex)->owner) { + pthshow("Mutex #x%p -> #x%p: contention; owned by #x%p, wanted by #x%p", + mutex, *mutex, + (*mutex)->owner, + pthread_self()); + pthshow("Mutex #x%p -> #x%p: contention notes: old %s +%d, new %s +%d", + mutex, *mutex, + (*mutex)->file,(*mutex)->line, file, line); + contention = 1; + } + EnterCriticalSection(&(*mutex)->cs); + if (contention) { + pthshow("Mutex #x%p -> #x%p: contention end; left by #x%p, taken by #x%p", + mutex, *mutex, + (*mutex)->owner, + pthread_self()); + pthshow("Mutex #x%p -> #x%p: contention notes: old %s +%d, new %s +%d", + mutex, *mutex, + (*mutex)->file,(*mutex)->line, file, line); + } + (*mutex)->owner = pthread_self(); + (*mutex)->file = file; + (*mutex)->line = line; + return 0; +} + +int pthread_mutex_trylock_annotate_np(pthread_mutex_t *mutex, const char* file, int line) +{ + int contention = 0; + pthread_np_assert_live_mutex(mutex,"trylock"); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_lock(&mutex_init_lock); + if (*mutex == PTHREAD_MUTEX_INITIALIZER) { + pthread_mutex_init(mutex, NULL); + } + pthread_mutex_unlock(&mutex_init_lock); + } + if ((*mutex)->owner) { + pthshow("Mutex #x%p -> #x%p: tried contention; owned by #x%p, wanted by #x%p", + mutex, *mutex, + (*mutex)->owner, + pthread_self()); + pthshow("Mutex #x%p -> #x%p: contention notes: old %s +%d, new %s +%d", + mutex, *mutex, + (*mutex)->file,(*mutex)->line, file, line); + contention = 1; + } + if (TryEnterCriticalSection(&(*mutex)->cs)) { + if (contention) { + pthshow("Mutex #x%p -> #x%p: contention end; left by #x%p, taken by #x%p", + mutex, *mutex, + (*mutex)->owner, + pthread_self()); + pthshow("Mutex #x%p -> #x%p: contention notes: old %s +%d, new %s +%d", + mutex, *mutex, + (*mutex)->file,(*mutex)->line, file, line); + } + (*mutex)->owner = pthread_self(); + (*mutex)->file = file; + (*mutex)->line = line; + return 0; + } + else + return EBUSY; +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + /* Owner is for debugging only; NB if mutex is used recursively, + owner field will lie. */ + pthread_np_assert_live_mutex(mutex,"unlock"); + DEBUG_RELEASE(*mutex); + LeaveCriticalSection(&(*mutex)->cs); + return 0; +} + +/* Condition variables implemented with events and wakeup queues. */ + +/* Thread-local wakeup events are kept in TSD to avoid kernel object + creation on each call to pthread_cond_[timed]wait */ +static pthread_key_t cv_event_key; + +/* .info field in wakeup record is an "opportunistic" indicator that + wakeup has happened. On timeout from WaitForSingleObject, thread + doesn't know (1) whether to reset event, (2) whether to (try) to + find and unlink wakeup record. Let's let it know (of course, + it will know for sure only under cv_wakeup_lock). */ + +#define WAKEUP_WAITING_NOTIMEOUT 0 +#define WAKEUP_WAITING_TIMEOUT 4 + +#define WAKEUP_HAPPENED 1 +#define WAKEUP_BY_INTERRUPT 2 + +static void* event_create() +{ + return (void*)CreateEvent(NULL,FALSE,FALSE,NULL); +} + +static struct freelist event_freelist = FREELIST_INITIALIZER(event_create); + + +unsigned int pthread_free_event_pool_size() +{ + return event_freelist.count; +} + +static HANDLE fe_get_event() +{ + return (HANDLE)freelist_get(&event_freelist); +} + +static void fe_return_event(HANDLE handle) +{ + freelist_return(&event_freelist, (void*)handle); +} + +static void cv_event_destroy(void* event) +{ + CloseHandle((HANDLE)event); +} + +static HANDLE cv_default_event_get_fn() +{ + HANDLE event = pthread_getspecific(cv_event_key); + if (!event) { + event = CreateEvent(NULL, FALSE, FALSE, NULL); + pthread_setspecific(cv_event_key, event); + } else { + /* ResetEvent(event); used to be here. Let's try without. It's + safe in pthread_cond_wait: if WaitForSingleObjectEx ever + returns, event is reset automatically, and the wakeup queue item + is removed by the signaller under wakeup_lock. + + pthread_cond_timedwait should reset the event if + cv_wakeup_remove failed to find its wakeup record, otherwise + it's safe too. */ + } + return event; +} + +static void cv_default_event_return_fn(HANDLE event) +{ + /* ResetEvent(event); could be here as well (and used to be). + Avoiding syscalls makes sense, however. */ +} + +static pthread_condattr_t cv_default_attr = { + 0, /* alertable */ + fe_get_event, + fe_return_event, + /* cv_default_event_get_fn, /\* get_fn *\/ */ + /* cv_default_event_return_fn /\* return_fn *\/ */ +}; + +int pthread_cond_init(pthread_cond_t * cv, const pthread_condattr_t * attr) +{ + if (!attr) + attr = &cv_default_attr; + pthread_mutex_init(&cv->wakeup_lock, NULL); + cv->first_wakeup = NULL; + cv->last_wakeup = NULL; + cv->alertable = attr->alertable; + cv->get_fn = attr->get_fn; + cv->return_fn = attr->return_fn; + return 0; +} + +int pthread_condattr_init(pthread_condattr_t *attr) +{ + *attr = cv_default_attr; + return 0; +} + +int pthread_condattr_destroy(pthread_condattr_t *attr) +{ + return 0; +} +int pthread_condattr_setevent_np(pthread_condattr_t *attr, + cv_event_get_fn get_fn, cv_event_return_fn ret_fn) +{ + attr->get_fn = get_fn ? get_fn : fe_get_event;// cv_default_event_get_fn; + attr->return_fn = ret_fn ? ret_fn : fe_return_event; // cv_default_event_return_fn; + return 0; +} + +int pthread_cond_destroy(pthread_cond_t *cv) +{ + pthread_mutex_destroy(&cv->wakeup_lock); + return 0; +} + +int pthread_cond_broadcast(pthread_cond_t *cv) +{ + int count = 0; + + HANDLE postponed[128]; + int npostponed = 0,i; + + /* No strict requirements to memory visibility model, because of + mutex unlock around waiting. */ + if (!cv->first_wakeup) + return 0; + pthread_mutex_lock(&cv->wakeup_lock); + while (cv->first_wakeup) + { + struct thread_wakeup * w = cv->first_wakeup; + HANDLE waitevent = w->event; + cv->first_wakeup = w->next; + w->info = WAKEUP_HAPPENED; + postponed[npostponed++] = waitevent; + if (/* w->info == WAKEUP_WAITING_TIMEOUT || */ npostponed == + sizeof(postponed)/sizeof(postponed[0])) { + for (i=0; ilast_wakeup = NULL; + pthread_mutex_unlock(&cv->wakeup_lock); + for (i=0; ifirst_wakeup) + return 0; + pthread_mutex_lock(&cv->wakeup_lock); + w = cv->first_wakeup; + if (w) { + HANDLE waitevent = w->event; + cv->first_wakeup = w->next; + if (!cv->first_wakeup) + cv->last_wakeup = NULL; + w->info = WAKEUP_HAPPENED; + SetEvent(waitevent); + } + pthread_mutex_unlock(&cv->wakeup_lock); + return 0; +} + +/* Return value is used for futexes: 0=ok, 1 on unexpected word change. */ +int cv_wakeup_add(struct pthread_cond_t* cv, struct thread_wakeup* w) +{ + HANDLE event; + w->next = NULL; + pthread_mutex_lock(&cv->wakeup_lock); + if (w->uaddr) { + if (w->uval != *w->uaddr) { + pthread_mutex_unlock(&cv->wakeup_lock); + return 1; + } + pthread_self()->futex_wakeup = w; + } + event = cv->get_fn(); + w->event = event; + if (cv->last_wakeup == w) { + fprintf(stderr, "cv->last_wakeup == w\n"); + fflush(stderr); + ExitProcess(0); + } + if (cv->last_wakeup != NULL) + { + cv->last_wakeup->next = w; + cv->last_wakeup = w; + } + else + { + cv->first_wakeup = w; + cv->last_wakeup = w; + } + pthread_mutex_unlock(&cv->wakeup_lock); + return 0; +} + +/* Return true if wakeup found, false if missing */ +int cv_wakeup_remove(struct pthread_cond_t* cv, struct thread_wakeup* w) +{ + int result = 0; + if (w->info == WAKEUP_HAPPENED || w->info == WAKEUP_BY_INTERRUPT) + goto finish; + pthread_mutex_lock(&cv->wakeup_lock); + { + if (w->info == WAKEUP_HAPPENED || w->info == WAKEUP_BY_INTERRUPT) + goto unlock; + if (cv->first_wakeup == w) { + cv->first_wakeup = w->next; + if (cv->last_wakeup == w) + cv->last_wakeup = NULL; + result = 1; + } else { + struct thread_wakeup * prev = cv->first_wakeup; + while (prev && prev->next != w) + prev = prev->next; + if (!prev) { + goto unlock; + } + prev->next = w->next; + if (cv->last_wakeup == w) + cv->last_wakeup = prev; + result = 1; + } + } + unlock: + pthread_mutex_unlock(&cv->wakeup_lock); + finish: + return result; +} + + +int pthread_cond_wait(pthread_cond_t * cv, pthread_mutex_t * cs) +{ + struct thread_wakeup w; + w.uaddr = 0; + w.info = WAKEUP_WAITING_NOTIMEOUT; + cv_wakeup_add(cv, &w); + if (cv->last_wakeup->next == cv->last_wakeup) { + pthread_np_lose(5,"cv->last_wakeup->next == cv->last_wakeup\n"); + } + if (cv->last_wakeup->next != NULL) { + pthread_np_lose(5,"cv->last_wakeup->next == cv->last_wakeup\n"); + } + pthread_self()->waiting_cond = cv; + DEBUG_RELEASE(*cs); + pthread_mutex_unlock(cs); + do { + if (cv->alertable) { + while (WaitForSingleObjectEx(w.event, INFINITE, TRUE) == WAIT_IO_COMPLETION); + } else { + WaitForSingleObject(w.event, INFINITE); + } + } while (w.info == WAKEUP_WAITING_NOTIMEOUT); + pthread_self()->waiting_cond = NULL; + /* Event is signalled once, wakeup is dequeued by signaller. */ + cv->return_fn(w.event); + pthread_mutex_lock(cs); + DEBUG_OWN(*cs); + return 0; +} + +int pthread_cond_timedwait(pthread_cond_t * cv, pthread_mutex_t * cs, + const struct timespec * abstime) +{ + DWORD rv; + struct thread_wakeup w; + pthread_t self = pthread_self(); + + w.info = WAKEUP_WAITING_TIMEOUT; + w.uaddr = 0; + cv_wakeup_add(cv, &w); + if (cv->last_wakeup->next == cv->last_wakeup) { + fprintf(stderr, "cv->last_wakeup->next == cv->last_wakeup\n"); + ExitProcess(0); + } + self->waiting_cond = cv; + DEBUG_RELEASE(*cs); + /* barrier (release); waiting_cond globally visible */ + pthread_mutex_unlock(cs); + { + struct timeval cur_tm; + long sec, msec; + gettimeofday(&cur_tm, NULL); + sec = abstime->tv_sec - cur_tm.tv_sec; + msec = sec * 1000 + abstime->tv_nsec / 1000000 - cur_tm.tv_usec / 1000; + if (msec < 0) + msec = 0; + do { + if (cv->alertable) { + while ((rv = WaitForSingleObjectEx(w.event, msec, TRUE)) + == WAIT_IO_COMPLETION); + } else { + rv = WaitForSingleObject(w.event, msec); + } + } while (rv == WAIT_OBJECT_0 && w.info == WAKEUP_WAITING_TIMEOUT); + } + self->waiting_cond = NULL; + + if (rv == WAIT_TIMEOUT) { + if (!cv_wakeup_remove(cv, &w)) { + /* Someone removed our wakeup record: though we got a timeout, + event was (will be) signalled before we are here. + Consume this wakeup. */ + WaitForSingleObject(w.event, INFINITE); + } + } + cv->return_fn(w.event); + pthread_mutex_lock(cs); + DEBUG_OWN(*cs); + if (rv == WAIT_TIMEOUT) + return ETIMEDOUT; + else + return 0; +} + +int sched_yield() +{ + /* http://stackoverflow.com/questions/1383943/switchtothread-vs-sleep1 + SwitchToThread(); was here. Unsure what's better for us, just trying.. */ + + if(!SwitchToThread()) + Sleep(0); + return 0; +} + +void pthread_lock_structures() +{ + pthread_mutex_lock(&mutex_init_lock); +} + +void pthread_unlock_structures() +{ + pthread_mutex_unlock(&mutex_init_lock); +} + +static int pthread_initialized = 0; + +static pthread_cond_t futex_pseudo_cond; + +void pthreads_win32_init() +{ + if (!pthread_initialized) { + thread_self_tls_index = TlsAlloc(); + pthread_mutex_init(&mutex_init_lock, NULL); + pthread_np_notice_thread(); + pthread_key_create(&cv_event_key,cv_event_destroy); + pthread_cond_init(&futex_pseudo_cond, NULL); + pthread_initialized = 1; + } +} + +static +VOID CALLBACK pthreads_win32_unnotice(void* parameter, BOOLEAN timerOrWait) +{ + pthread_t pth = parameter; + pthread_t self = tls_impersonate(pth); + + tls_call_destructors(); + CloseHandle(pth->handle); + /* + if (pth->fiber && pth->own_fiber) { + DeleteFiber(pth->fiber); + } */ + UnregisterWait(pth->wait_handle); + + tls_impersonate(self); + pthread_mutex_destroy(&pth->fiber_lock); + pthread_mutex_destroy(&pth->lock); + free(pth); +} + +int pthread_np_notice_thread() +{ + if (!pthread_self()) { + pthread_t pth = (pthread_t)calloc(sizeof(pthread_thread),1); + pth->teb = NtCurrentTeb(); + pthread_mutex_init(&pth->fiber_lock,NULL); + pthread_mutex_init(&pth->lock,NULL); + pth->state = pthread_state_running; + pth->fiber_group = pth; + + sigemptyset(&pth->blocked_signal_set); + + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &pth->handle, 0, TRUE, + DUPLICATE_SAME_ACCESS); + tls_impersonate(pth); + + if (pthread_initialized) { + RegisterWaitForSingleObject(&pth->wait_handle, + pth->handle, + pthreads_win32_unnotice, + pth, + INFINITE, + WT_EXECUTEONLYONCE); + } + return 1; + } else { + return 0; + } +} + +int pthread_np_convert_self_to_fiber() +{ + pthread_t pth = pthread_self(); + if (!pth) + return 1; + if (!pth->fiber) { + void* fiber = GetCurrentFiber(); + /* Beware: undocumented (but widely used) method below to check if + the thread is already converted. */ + if (fiber != NULL && fiber != (void*)0x1E00) { + pth->fiber = fiber; + pth->own_fiber = 0; + } else { + pth->fiber = ConvertThreadToFiber(pth); + pth->own_fiber = 1; + } + if (!pth->fiber) + return 1; + } + return 0; +} + +int pthread_np_set_fiber_factory_mode(int on) +{ + pthread_t pth = pthread_self(); + if (on && pthread_np_convert_self_to_fiber()) { + return 1; + } + pth->fiber_factory = on; + return 0; +} + +int pthread_np_switch_to_fiber(pthread_t pth) +{ + pthread_t self = pthread_self(); + + again: + if (pth == self) { + /* Switch to itself is a successful no-op. + NB. SwitchToFiber(GetCurrentFiber()) is not(!). */ + return 0; + } + + if (!pth->fiber) { + /* Switch to not-a-fiber-at-all */ + return -1; + } + + if (!pth->created_as_fiber) { + /* Switch to main thread (group): fails if... */ + if (self && (self->fiber_group != pth)) { + /* ...trying to switch from [under] one main thread into another */ + return -1; + } + } + if (!self && pth->created_as_fiber) { + /* Switch to free fiber from non-noticed thread */ + return -1; + } + + if (self && pthread_np_convert_self_to_fiber()) { + /* Current thread can't become a fiber (and run fibers) */ + return -1; + } + + /* If target fiber is suspened, we wait here. */ + pthread_mutex_lock(&pth->fiber_lock); + if (pth->fiber_group) { + /* Reentering a running fiber */ + pthread_mutex_unlock(&pth->fiber_lock); + /* Don't wait for a running fiber here, just fail. If an + application wants to wait, it should use some separate + synchronization. */ + return -1; + } + if (self) { + /* Target fiber group is like mine */ + pth->fiber_group = self->fiber_group; + } else { + /* Switch-from-null-self (always into thread, usually from + terminating fiber) */ + pth->fiber_group = pth; + } + /* Target fiber now marked as busy */ + pthread_mutex_unlock(&pth->fiber_lock); + + if (self) { + pthread_save_context_hook(); + } + /* NB we don't set pthread TLS, let target fiber do it by itself. */ + SwitchToFiber(pth->fiber); + + /* When we return here... */ + pth = tls_impersonate(self); + + /* Now pth contains fiber that entered this one */ + pthread_restore_context_hook(); + + if (pth) { + pthread_mutex_lock(&pth->fiber_lock); + if (pth->fiber_group == self->fiber_group) { + pth->fiber_group = NULL; + } + pthread_mutex_unlock(&pth->fiber_lock); + } + /* Self surely is not NULL, or we'd never be here */ + + /* Implement call-in-fiber */ + if (self->fiber_callback) { + void (*cb)(void*) = self->fiber_callback; + void *ctx = self->fiber_callback_context; + + /* Nested callbacks and fiber switches are possible, so clean + up a cb pointer here */ + self->fiber_callback = NULL; + self->fiber_callback_context = NULL; + cb(ctx); + if (pth) { + /* Return to caller without recursive + pthread_np_switch_to_fiber. This way, an "utility fiber" + serving multiple callbacks won't grow its stack to infinity */ + goto again; + } + /* There is no `callback client' pretending to be returned + into: it means callback shouldn't yield to caller. */ + } + return 0; /* success */ +} + +int pthread_np_run_in_fiber(pthread_t pth, void (*callback)(void*), + void* context) +{ + pth->fiber_callback = callback; + pth->fiber_callback_context = context; + return pthread_np_switch_to_fiber(pth); +} + +HANDLE pthread_np_get_handle(pthread_t pth) +{ + return pth->handle; +} + +void* pthread_np_get_lowlevel_fiber(pthread_t pth) +{ + return pth->fiber; +} + +int pthread_np_delete_lowlevel_fiber(void* fiber) +{ + DeleteFiber(fiber); + return 0; +} + +int sigemptyset(sigset_t *set) +{ + *set = 0; + return 0; +} + +int sigfillset(sigset_t *set) +{ + *set = 0xfffffffful; + return 0; +} + +int sigaddset(sigset_t *set, int signum) +{ + *set |= 1 << signum; + return 0; +} + +int sigdelset(sigset_t *set, int signum) +{ + *set &= ~(1 << signum); + return 0; +} + +int sigismember(const sigset_t *set, int signum) +{ + return (*set & (1 << signum)) != 0; +} +int sigpending(sigset_t *set) +{ + int i; + *set = InterlockedCompareExchange((volatile LONG*)&pthread_self()->pending_signal_set, + 0, 0); + return 0; +} + + +#define FUTEX_EWOULDBLOCK 3 +#define FUTEX_EINTR 2 +#define FUTEX_ETIMEDOUT 1 + +int +futex_wait(volatile intptr_t *lock_word, intptr_t oldval, long sec, unsigned long usec) +{ + struct thread_wakeup w; + pthread_t self = pthread_self(); + DWORD msec = sec<0 ? INFINITE : (sec*1000 + usec/1000); + DWORD wfso; + int result; + sigset_t pendset, blocked; + int maybeINTR; + int info = sec<0 ? WAKEUP_WAITING_NOTIMEOUT: WAKEUP_WAITING_TIMEOUT; + + sigpending(&pendset); + if (pendset & ~self->blocked_signal_set) + return FUTEX_EINTR; + w.uaddr = lock_word; + w.uval = oldval; + w.info = info; + + if (cv_wakeup_add(&futex_pseudo_cond,&w)) { + return FUTEX_EWOULDBLOCK; + } + self->futex_wakeup = &w; + do { + wfso = WaitForSingleObject(w.event, msec); + } while (wfso == WAIT_OBJECT_0 && w.info == info); + self->futex_wakeup = NULL; + sigpending(&pendset); + maybeINTR = (pendset & ~self->blocked_signal_set)? FUTEX_EINTR : 0; + + switch(wfso) { + case WAIT_TIMEOUT: + if (!cv_wakeup_remove(&futex_pseudo_cond,&w)) { + /* timeout, but someone other removed wakeup. */ + result = maybeINTR; + WaitForSingleObject(w.event,INFINITE); + } else { + result = FUTEX_ETIMEDOUT; + } + break; + case WAIT_OBJECT_0: + result = maybeINTR; + break; + default: + result = -1; + break; + } + futex_pseudo_cond.return_fn(w.event); + return result; +} + +int +futex_wake(volatile intptr_t *lock_word, int n) +{ + pthread_cond_t *cv = &futex_pseudo_cond; + int result = 0; + struct thread_wakeup *w, *prev; + HANDLE postponed[128]; + int npostponed = 0,i; + + if (n==0) return 0; + + pthread_mutex_lock(&cv->wakeup_lock); + for (w = cv->first_wakeup, prev = NULL; w && n;) { + if (w->uaddr == lock_word) { + HANDLE event = w->event; + int oldinfo = w->info; + w->info = WAKEUP_HAPPENED; + if (cv->last_wakeup == w) + cv->last_wakeup = prev; + w = w->next; + if (!prev) { + cv->first_wakeup = w; + } else { + prev->next = w; + } + n--; + postponed[npostponed++] = event; + if (npostponed == sizeof(postponed)/sizeof(postponed[0])) { + for (i=0; inext; + } + } + pthread_mutex_unlock(&cv->wakeup_lock); + for (i=0; ifutex_wakeup) { + pthread_cond_t *cv = &futex_pseudo_cond; + struct thread_wakeup *w; + HANDLE event; + pthread_mutex_lock(&cv->wakeup_lock); + if ((w = thread->futex_wakeup)) { + /* we are taking wakeup_lock recursively - ok with + CRITICAL_SECTIONs */ + if (cv_wakeup_remove(&futex_pseudo_cond,w)) { + event = w->event; + w->info = WAKEUP_BY_INTERRUPT; + thread->futex_wakeup = NULL; + } else { + w = NULL; + } + } + if (w) { + SetEvent(event); + } + pthread_mutex_unlock(&cv->wakeup_lock); + } +} + +void pthread_np_lose(int trace_depth, const char* fmt, ...) +{ + va_list header; + void* frame; + int n = 0; + void** lastseh; + + va_start(header,fmt); + vfprintf(stderr,fmt,header); + for (lastseh = *(void**)NtCurrentTeb(); + lastseh && (lastseh!=(void*)0xFFFFFFFF); + lastseh = *lastseh); + + fprintf(stderr, "Backtrace: %s (pthread %p)\n", header, pthread_self()); + for (frame = __builtin_frame_address(0); frame; frame=*(void**)frame) + { + if ((n++)>trace_depth) + return; + fprintf(stderr, "[#%02d]: ebp = 0x%p, ret = 0x%p\n",n, + frame, ((void**)frame)[1]); + } + ExitProcess(0); +} + +int +sem_init(sem_t *sem, int pshared_not_implemented, unsigned int value) +{ + sem_t semh = CreateSemaphore(NULL, value, SEM_VALUE_MAX, NULL); + if (!semh) + return -1; + *sem = semh; + return 0; +} + +int +sem_post(sem_t *sem) +{ + return !ReleaseSemaphore(*sem, 1, NULL); +} + +static int +sem_wait_timeout(sem_t *sem, DWORD ms) +{ + switch (WaitForSingleObject(*sem, ms)) { + case WAIT_OBJECT_0: + return 0; + case WAIT_TIMEOUT: + /* errno = EAGAIN; */ + return -1; + default: + /* errno = EINVAL; */ + return -1; + } +} + +int +sem_wait(sem_t *sem) +{ + return sem_wait_timeout(sem, INFINITE); +} + +int +sem_trywait(sem_t *sem) +{ + return sem_wait_timeout(sem, 0); +} + +int +sem_destroy(sem_t *sem) +{ + return !CloseHandle(*sem); +} + +#endif diff --git a/src/runtime/pthreads_win32.h b/src/runtime/pthreads_win32.h new file mode 100644 index 0000000..2d4b066 --- /dev/null +++ b/src/runtime/pthreads_win32.h @@ -0,0 +1,421 @@ +#ifndef WIN32_PTHREAD_INCLUDED +#define WIN32_PTHREAD_INCLUDED + +#include +#include +#include + +#ifndef _SIGSET_T +typedef int sigset_t; +#endif + + +#define WIN32_LEAN_AND_MEAN +#include +#include + +/* 0 - Misc */ + +#define SIG_IGN ((void (*)(int, siginfo_t, void*))-1) +#define SIG_DFL ((void (*)(int, siginfo_t, void*))-2) + +#define SIGHUP 1 +#define SIGINT 2 /* Interactive attention */ +#define SIGQUIT 3 +#define SIGILL 4 /* Illegal instruction */ +#define SIGPIPE 5 +#define SIGALRM 6 +#define SIGURG 7 +#define SIGFPE 8 /* Floating point error */ +#define SIGTSTP 9 +#define SIGCHLD 10 +#define SIGSEGV 11 /* Segmentation violation */ +#define SIGIO 12 +#define SIGXCPU 13 +#define SIGXFSZ 14 +#define SIGTERM 15 /* Termination request */ +#define SIGVTALRM 16 +#define SIGPROF 17 +#define SIGWINCH 18 +#define SIGBREAK 21 /* Control-break */ +#define SIGABRT 22 /* Abnormal termination (abort) */ + +#define SIGRTMIN 23 + +#define NSIG 32 /* maximum signal number + 1 */ + +/* To avoid overusing system TLS, pthread provides its own */ +#define PTHREAD_KEYS_MAX 128 + +#define PTHREAD_DESTRUCTOR_ITERATIONS 4 + +void pthreads_win32_init(); + +/* 1 - Thread */ + +typedef struct pthread_thread* pthread_t; + +typedef struct pthread_attr_t { + unsigned int stack_size; +} pthread_attr_t; + +int pthread_attr_init(pthread_attr_t *attr); +int pthread_attr_destroy(pthread_attr_t *attr); +int pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize); +int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); + +typedef void (*pthread_cleanup_fn)(void* arg); + +#define pthread_cleanup_push(fn, arg) { pthread_cleanup_fn __pthread_fn = fn; void *__pthread_arg = arg; +#define pthread_cleanup_pop(execute) if (execute) __pthread_fn(__pthread_arg); } + +int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg); +int pthread_equal(pthread_t thread1, pthread_t thread2); +int pthread_detach(pthread_t thread); +int pthread_join(pthread_t thread, void **retval); +int pthread_kill(pthread_t thread, int signum); + +#ifndef PTHREAD_INTERNALS +pthread_t pthread_self(void) __attribute__((__const__)); +#else +pthread_t pthread_self(void); +#endif + +typedef DWORD pthread_key_t; +int pthread_key_create(pthread_key_t *key, void (*destructor)(void*)); + +#define SIG_BLOCK 1 +#define SIG_UNBLOCK 2 +#define SIG_SETMASK 3 +#ifdef PTHREAD_INTERNALS +int pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset); +#endif + +/* 1a - Thread non-portable */ + +void pthread_np_suspend(pthread_t thread); +void pthread_np_suspend_with_signal(pthread_t thread, int signum); + +/* Momentary suspend/getcontext/resume without locking or preventing + fiber reentrance. This call is for asymmetric synchronization, + ensuring that the thread sees global state before doing any + globally visible stores. +*/ +void pthread_np_serialize(pthread_t thread); + +void pthread_np_resume(pthread_t thread); +void pthread_np_request_interruption(pthread_t thread); +CONTEXT* pthread_np_publish_context(CONTEXT* maybe_save_old_one); +void pthread_np_unpublish_context(); +void pthread_np_get_my_context_subset(CONTEXT* ctx); + +/* 2 - Mutex */ + +typedef struct _pthread_mutex_info { + char padding[64]; + CRITICAL_SECTION cs; + pthread_t owner; + const char* file; + int line; +} __attribute__((aligned(128))) *pthread_mutex_t; + +typedef int pthread_mutexattr_t; +#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t)-1) +int pthread_mutex_init(pthread_mutex_t * mutex, const pthread_mutexattr_t * attr); +int pthread_mutexattr_init(pthread_mutexattr_t*); +int pthread_mutexattr_destroy(pthread_mutexattr_t*); +int pthread_mutexattr_settype(pthread_mutexattr_t*, int); +#define PTHREAD_MUTEX_ERRORCHECK 0 +int pthread_mutex_destroy(pthread_mutex_t *mutex); +int pthread_mutex_lock(pthread_mutex_t *mutex); +int pthread_mutex_trylock(pthread_mutex_t *mutex); +int pthread_mutex_lock_annotate_np(pthread_mutex_t *mutex, const char* file, int line); +int pthread_mutex_trylock_annotate_np(pthread_mutex_t *mutex, const char* file, int line); +int pthread_mutex_unlock(pthread_mutex_t *mutex); + +/* 3 - Condition variable */ + +typedef struct thread_wakeup { + HANDLE event; + struct thread_wakeup *next; + volatile intptr_t *uaddr; + intptr_t uval; + int info; +} thread_wakeup; + +typedef HANDLE (*cv_event_get_fn)(); +typedef void (*cv_event_return_fn)(HANDLE event); + +typedef struct pthread_cond_t { + pthread_mutex_t wakeup_lock; + struct thread_wakeup *first_wakeup; + struct thread_wakeup *last_wakeup; + unsigned char alertable; + cv_event_get_fn get_fn; + cv_event_return_fn return_fn; +} pthread_cond_t; + +typedef struct pthread_condattr_t { + unsigned char alertable; + cv_event_get_fn get_fn; + cv_event_return_fn return_fn; +} pthread_condattr_t; + +#ifndef _TIMESPEC_DEFINED +typedef struct timespec { + time_t tv_sec; + long tv_nsec; +} timespec; +#endif + +// not implemented: PTHREAD_COND_INITIALIZER +int pthread_condattr_init(pthread_condattr_t *attr); +int pthread_condattr_destroy(pthread_condattr_t *attr); +int pthread_condattr_setevent_np(pthread_condattr_t *attr, + cv_event_get_fn get_fn, cv_event_return_fn ret_fn); +int pthread_cond_destroy(pthread_cond_t *cond); +int pthread_cond_init(pthread_cond_t * cond, const pthread_condattr_t * attr); +int pthread_cond_broadcast(pthread_cond_t *cond); +int pthread_cond_signal(pthread_cond_t *cond); +int pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * mutex, const struct timespec * abstime); +int pthread_cond_wait(pthread_cond_t * cond, pthread_mutex_t * mutex); + +#define ETIMEDOUT 123 //Something + +int sched_yield(); + +void pthread_lock_structures(); +void pthread_unlock_structures(); + +typedef void *(*pthread_fn)(void*); + +typedef enum { + pthread_state_running, + pthread_state_finished, + pthread_state_joined +} pthread_thread_state; + +typedef struct pthread_thread { + pthread_fn start_routine; + void* arg; + HANDLE handle; + pthread_cond_t *waiting_cond; + void *futex_wakeup; + sigset_t blocked_signal_set; + volatile sigset_t pending_signal_set; + void * retval; + + pthread_mutex_t lock; + pthread_cond_t cond; + int detached; + pthread_thread_state state; + + /* Boolean flag: thread will produce fibers instead of threads with + pthread_create */ + int fiber_factory; + + /* NULL if current thread has no fibers and is not a fiber; LPVOID + returned by CreateFiber or ConvertThreadToFiber otherwise */ + void* fiber; + + /* True if pthreads_win32 created fiber, false if it was already + present and just captured. We should delete our fiber when not + needed, but external fibers should be left intact. */ + int own_fiber; + + /* True if thread was created as fiber */ + int created_as_fiber; + + /* For noticed foreign threads, wait_handle contains a result of + RegisterWaitForSingleObject. */ + HANDLE wait_handle; + + /* FCAT group of a fiber. */ + pthread_t fiber_group; + + /* Mutex preventing double-entering a fiber */ + pthread_mutex_t fiber_lock; + + /* When fiber switches to another fiber (dying or not) it makes + another's fiber_prev point to it. If it's dead, the fiber entered + should clean up. */ + pthread_t fiber_prev; + + /* For non-running fiber, this field provides context of its + last-known running state: not for jumps et al., but for + conservative stack GCing. + + With pthread_np_publish_context and pthread_np_unpublish_context + application may manage its thread context cooperatively, not + requiring real SuspendThread and ResumeThread for threads that + don't do anything interesting (as defined by application). + + Esp field of fiber_context is used as a validity flag (must not + be NULL). */ + CONTEXT fiber_context; + + /* Thread TEB base (mostly informative/debugging) */ + void* teb; + + /* For fiber-callouts (call-in-fiber) support. When switched into, + any fiber should execute fiber_callback and switch back to + fiber_prev. */ + void (*fiber_callback)(void* context); + void *fiber_callback_context; + + /* Pthread TLS, detached from windows system TLS */ + void *specifics[PTHREAD_KEYS_MAX]; +} pthread_thread; + +#define PTHREAD_ONCE_INIT 0 + +typedef int pthread_once_t; +int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)); + +static inline int pthread_setspecific(pthread_key_t key, const void *value) +{ + pthread_self()->specifics[key] = (void*)value; + return 0; +} + +typedef struct { + int bogus; +} siginfo_t; + +#define SA_SIGINFO (1u<<1) +#define SA_NODEFER (1u<<2) +#define SA_RESTART (1u<<3) +#define SA_ONSTACK (1u<<4) + +struct sigaction { + void (*sa_handler)(int); + void (*sa_sigaction)(int, siginfo_t*, void*); + sigset_t sa_mask; + int sa_flags; +}; +int sigaction(int signum, const struct sigaction* act, struct sigaction* oldact); + +int sigpending(sigset_t *set); + +void pthread_np_add_pending_signal(pthread_t thread, int signum); +void pthread_np_remove_pending_signal(pthread_t thread, int signum); +sigset_t pthread_np_other_thread_sigpending(pthread_t thread); + +int pthread_np_notice_thread(); +int pthread_np_get_thread_context(pthread_t thread, CONTEXT* context); +int pthread_np_convert_self_to_fiber(); +int pthread_np_switch_to_fiber(pthread_t fiber); +int pthread_np_run_in_fiber(pthread_t pth, void (*callback)(void*), + void* context); +int pthread_np_set_fiber_factory_mode(int on); +int pthread_np_fiber_save_tls(int slot, int enable); +HANDLE pthread_np_get_handle(pthread_t pth); +void* pthread_np_get_lowlevel_fiber(pthread_t pth); +int pthread_np_delete_lowlevel_fiber(void* ll_fiber); +int pthread_np_ack_pending_signals(void* ucontext_arg); + +/* Fiber context hooks */ +extern void (*pthread_save_context_hook)(); +extern void (*pthread_restore_context_hook)(); + +int sigemptyset(sigset_t *set); +int sigfillset(sigset_t *set); +int sigaddset(sigset_t *set, int signum); +int sigdelset(sigset_t *set, int signum); +int sigismember(const sigset_t *set, int signum); + +typedef int sig_atomic_t; + +/* Futexes */ +int futex_wait(volatile intptr_t *lock_word, intptr_t oldval, long sec, unsigned long usec); +int futex_wake(volatile intptr_t *lock_word, int n); + +/* Debugging */ +void pthread_np_lose(int trace_depth, const char* fmt, ...); +struct _pthread_mutex_info DEAD_MUTEX; + +static inline void pthread_np_assert_live_mutex(pthread_mutex_t* ptr, + const char *action) +{ + if (*ptr == &DEAD_MUTEX) { + pthread_np_lose(5,"Trying to %s dead mutex %p\n",action,ptr); + } +} + +typedef HANDLE sem_t; + +#define SEM_VALUE_MAX (int) (~0U >>1) + +int sem_init(sem_t *sem, int pshared_not_implemented, unsigned int value); +int sem_post(sem_t *sem); +int sem_wait(sem_t *sem); +int sem_trywait(sem_t *sem); +int sem_destroy(sem_t *sem); + +#ifndef PTHREAD_INTERNALS +static inline int pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset) +{ + pthread_t self = pthread_self(); + if (oldset) + *oldset = self->blocked_signal_set; + if (set) { + switch (how) { + case SIG_BLOCK: + self->blocked_signal_set |= *set; + break; + case SIG_UNBLOCK: + self->blocked_signal_set &= ~(*set); + break; + case SIG_SETMASK: + self->blocked_signal_set = *set; + break; + } + } + return 0; +} + +/* Make speed-critical TLS access inline. + + We don't check key range or validity here: (1) pthread spec is + explicit about undefined behavior for bogus keys, (2) + setspecific/getspecific should be as fast as possible. */ +#define pthread_getspecific pthread_getspecific_np_inline + +static inline void *pthread_getspecific_np_inline(pthread_key_t key) +{ + return pthread_self()->specifics[key]; +} + +#ifdef PTHREAD_DEBUG_OUTPUT +#define pthread_mutex_lock(mutex) \ + pthread_mutex_lock_annotate_np(mutex, __FILE__, __LINE__ ) +#define pthread_mutex_trylock(mutex) \ + pthread_mutex_trylock_annotate_np(mutex, __FILE__ ,__LINE__) +#else + +/* I'm not after inlinining _everything_, but those two things below are + (1) fast, (2) critical (3) short */ +static inline int pthread_mutex_lock_np_inline(pthread_mutex_t *mutex) +{ + pthread_np_assert_live_mutex(mutex,"lock"); + if ((*mutex) == PTHREAD_MUTEX_INITIALIZER) { + return pthread_mutex_lock(mutex); + } else { + EnterCriticalSection(&(*mutex)->cs); + return 0; + } +} + +static inline int pthread_mutex_unlock_np_inline(pthread_mutex_t *mutex) +{ + pthread_np_assert_live_mutex(mutex,"unlock"); + LeaveCriticalSection(&(*mutex)->cs); + return 0; +} + +#define pthread_mutex_lock pthread_mutex_lock_np_inline +#define pthread_mutex_unlock pthread_mutex_unlock_np_inline + +#endif /* !PTHREAD_DEBUG_OUTPUT */ +#endif /* !PTHREAD_INTERNALS */ +#endif /* WIN32_PTHREAD_INCLUDED */ diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index f1c50cf..38770a6 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include "runtime.h" #ifndef LISP_FEATURE_WIN32 #include #endif @@ -42,7 +42,9 @@ #include #endif +#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD)) #include "signal.h" +#endif #include "runtime.h" #include "vars.h" @@ -340,6 +342,10 @@ char *core_string; struct runtime_options *runtime_options; char *saved_runtime_path = NULL; +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +void pthreads_win32_init(); +#endif + int main(int argc, char *argv[], char *envp[]) @@ -364,6 +370,11 @@ main(int argc, char *argv[], char *envp[]) lispobj initial_function; const char *sbcl_home = getenv("SBCL_HOME"); +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) + os_preinit(); + pthreads_win32_init(); +#endif + interrupt_init(); block_blockable_signals(0, 0); diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 4082541..c467261 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -15,6 +15,17 @@ #ifndef _SBCL_RUNTIME_H_ #define _SBCL_RUNTIME_H_ +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +# include "pthreads_win32.h" +#else +# include +# ifdef LISP_FEATURE_SB_THREAD +# include +# endif +#endif + +#include + #if defined(LISP_FEATURE_SB_THREAD) #define thread_self() pthread_self() #define thread_kill pthread_kill @@ -29,6 +40,10 @@ #define thread_mutex_unlock(l) 0 #endif +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +void os_preinit(); +#endif + #if defined(LISP_FEATURE_SB_SAFEPOINT) void map_gc_page(); void unmap_gc_page(); @@ -125,7 +140,6 @@ void dyndebug_init(void); #if QSHOW_SIGNAL_SAFE == 1 && !defined(LISP_FEATURE_WIN32) -#include extern sigset_t blockable_sigset; #define QSHOW_BLOCK \ @@ -178,7 +192,6 @@ typedef unsigned long pointer_sized_uint_t ; #include #if defined(LISP_FEATURE_SB_THREAD) -#include typedef pthread_t os_thread_t; #else typedef pid_t os_thread_t; diff --git a/src/runtime/safepoint.c b/src/runtime/safepoint.c index 1277d24..224643d 100644 --- a/src/runtime/safepoint.c +++ b/src/runtime/safepoint.c @@ -140,7 +140,16 @@ check_pending_thruptions(os_context_t *ctx) { struct thread *p = arch_os_get_current_thread(); - gc_assert(!os_get_csp(p)); +#ifdef LISP_FEATURE_WIN32 + pthread_t pself = p->os_thread; + sigset_t oldset; + /* On Windows, wake_thread/kill_safely does not set THRUPTION_PENDING + * in the self-kill case; instead we do it here while also clearing the + * "signal". */ + if (pself->pending_signal_set) + if (__sync_fetch_and_and(&pself->pending_signal_set,0)) + SetSymbolValue(THRUPTION_PENDING, T, p); +#endif if (!thread_may_thrupt(ctx)) return 0; @@ -148,12 +157,24 @@ check_pending_thruptions(os_context_t *ctx) return 0; SetSymbolValue(THRUPTION_PENDING, NIL, p); +#ifdef LISP_FEATURE_WIN32 + oldset = pself->blocked_signal_set; + pself->blocked_signal_set = deferrable_sigset; + if (ctx) fake_foreign_function_call(ctx); +#else sigset_t oldset; block_deferrable_signals(0, &oldset); +#endif funcall0(StaticSymbolFunction(RUN_INTERRUPTION)); +#ifdef LISP_FEATURE_WIN32 + if (ctx) undo_fake_foreign_function_call(ctx); + pself->blocked_signal_set = oldset; + if (ctx) ctx->sigmask = oldset; +#else pthread_sigmask(SIG_SETMASK, &oldset, 0); +#endif return 1; } #endif @@ -640,6 +661,17 @@ static void set_csp_from_context(struct thread *self, os_context_t *ctx) { void **sp = (void **) *os_context_register_addr(ctx, reg_SP); + /* On POSIX platforms, it is sufficient to investigate only the part + * of the stack that was live before the interrupt, because in + * addition, we consider interrupt contexts explicitly. On Windows, + * however, we do not keep an explicit stack of exception contexts, + * and instead arrange for the conservative stack scan to also cover + * the context implicitly. The obvious way to do that is to start + * at the context itself: */ +#ifdef LISP_FEATURE_WIN32 + gc_assert((void **) ctx < sp); + sp = (void**) ctx; +#endif gc_assert((void **)self->control_stack_start <= sp && sp < (void **)self->control_stack_end); @@ -793,6 +825,38 @@ thread_register_gc_trigger() /* wake_thread(thread) -- ensure a thruption delivery to * `thread'. */ +# ifdef LISP_FEATURE_WIN32 + +void +wake_thread_io(struct thread * thread) +{ + SetEvent(thread->private_events.events[1]); +} + +void +wake_thread_win32(struct thread *thread) +{ + wake_thread_io(thread); + + if (SymbolTlValue(THRUPTION_PENDING,thread)==T) + return; + + SetTlSymbolValue(THRUPTION_PENDING,T,thread); + + if ((SymbolTlValue(GC_PENDING,thread)==T)|| + (SymbolTlValue(STOP_FOR_GC_PENDING,thread)==T)) + return; + + pthread_mutex_unlock(&all_threads_lock); + + if (maybe_become_stw_initiator(1) && !in_race_p()) { + gc_stop_the_world(); + gc_start_the_world(); + } + pthread_mutex_lock(&all_threads_lock); + return; +} +# else int wake_thread_posix(os_thread_t os_thread) { @@ -859,6 +923,7 @@ cleanup: pthread_sigmask(SIG_SETMASK, &oldset, 0); return found ? 0 : -1; } +#endif /* !LISP_FEATURE_WIN32 */ #endif /* LISP_FEATURE_SB_THRUPTION */ void diff --git a/src/runtime/save.c b/src/runtime/save.c index b7faa37..8520eb9 100644 --- a/src/runtime/save.c +++ b/src/runtime/save.c @@ -16,10 +16,14 @@ #include #include #include -#include #include #include "sbcl.h" +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +#include "pthreads_win32.h" +#else +#include +#endif #include "runtime.h" #include "os.h" #include "core.h" diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 0fa1fe7..cc9eebd 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -17,7 +17,7 @@ #ifndef LISP_FEATURE_WIN32 #include #endif -#include +#include "runtime.h" #include #include #include @@ -49,12 +49,8 @@ #include "interrupt.h" #include "lispregs.h" -#ifdef LISP_FEATURE_WIN32 -/* - * Win32 doesn't have SIGSTKSZ, and we're not switching stacks anyway, - * so define it arbitrarily - */ -#define SIGSTKSZ 1024 +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) +# define IMMEDIATE_POST_MORTEM #endif #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_SB_THREAD) @@ -223,12 +219,14 @@ initial_thread_trampoline(struct thread *th) th->os_thread=thread_self(); #ifndef LISP_FEATURE_WIN32 protect_control_stack_hard_guard_page(1, NULL); +#endif protect_binding_stack_hard_guard_page(1, NULL); protect_alien_stack_hard_guard_page(1, NULL); +#ifndef LISP_FEATURE_WIN32 protect_control_stack_guard_page(1, NULL); +#endif protect_binding_stack_guard_page(1, NULL); protect_alien_stack_guard_page(1, NULL); -#endif #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64) return call_into_lisp_first_time(function,args,0); @@ -238,6 +236,28 @@ initial_thread_trampoline(struct thread *th) } #ifdef LISP_FEATURE_SB_THREAD + +# if defined(IMMEDIATE_POST_MORTEM) + +/* + * If this feature is set, we are running on a stack managed by the OS, + * and no fancy delays are required for anything. Just do it. + */ +static void +schedule_thread_post_mortem(struct thread *corpse) +{ + pthread_detach(pthread_self()); + gc_assert(!pthread_attr_destroy(corpse->os_attr)); + free(corpse->os_attr); +#if defined(LISP_FEATURE_WIN32) + os_invalidate_free(corpse->os_address, THREAD_STRUCT_SIZE); +#else + os_invalidate(corpse->os_address, THREAD_STRUCT_SIZE); +#endif +} + +# else + /* THREAD POST MORTEM CLEANUP * * Memory allocated for the thread stacks cannot be reclaimed while @@ -324,6 +344,8 @@ schedule_thread_post_mortem(struct thread *corpse) } } +# endif /* !IMMEDIATE_POST_MORTEM */ + /* this is the first thing that runs in the child (which is why the * silly calling convention). Basically it calls the user's requested * lisp function after doing arch_os_thread_init and whatever other @@ -405,13 +427,27 @@ new_thread_trampoline(struct thread *th) os_sem_destroy(th->state_not_running_sem); os_sem_destroy(th->state_not_stopped_sem); +#if defined(LISP_FEATURE_WIN32) + free((os_vm_address_t)th->interrupt_data); +#else os_invalidate((os_vm_address_t)th->interrupt_data, (sizeof (struct interrupt_data))); +#endif #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER mach_lisp_thread_destroy(th); #endif +#if defined(LISP_FEATURE_WIN32) + int i; + for (i = 0; i< + (int) (sizeof(th->private_events.events)/ + sizeof(th->private_events.events[0])); ++i) { + CloseHandle(th->private_events.events[i]); + } + TlsSetValue(OUR_TLS_INDEX,NULL); +#endif + schedule_thread_post_mortem(th); FSHOW((stderr,"/exiting thread %lu\n", thread_self())); return result; @@ -422,11 +458,20 @@ new_thread_trampoline(struct thread *th) static void free_thread_struct(struct thread *th) { +#if defined(LISP_FEATURE_WIN32) + if (th->interrupt_data) { + os_invalidate_free((os_vm_address_t) th->interrupt_data, + (sizeof (struct interrupt_data))); + } + os_invalidate_free((os_vm_address_t) th->os_address, + THREAD_STRUCT_SIZE); +#else if (th->interrupt_data) os_invalidate((os_vm_address_t) th->interrupt_data, (sizeof (struct interrupt_data))); os_invalidate((os_vm_address_t) th->os_address, THREAD_STRUCT_SIZE); +#endif } #ifdef LISP_FEATURE_SB_THREAD @@ -446,7 +491,7 @@ create_thread_struct(lispobj initial_function) { struct thread *th=0; /* subdue gcc */ void *spaces=0; void *aligned_spaces=0; -#ifdef LISP_FEATURE_SB_THREAD +#if defined(LISP_FEATURE_SB_THREAD) || defined(LISP_FEATURE_WIN32) unsigned int i; #endif @@ -606,8 +651,13 @@ create_thread_struct(lispobj initial_function) { access_control_stack_pointer(th)=th->control_stack_start; #endif +#if defined(LISP_FEATURE_WIN32) + th->interrupt_data = (struct interrupt_data *) + calloc((sizeof (struct interrupt_data)),1); +#else th->interrupt_data = (struct interrupt_data *) os_validate(0,(sizeof (struct interrupt_data))); +#endif if (!th->interrupt_data) { free_thread_struct(th); return 0; @@ -619,6 +669,12 @@ create_thread_struct(lispobj initial_function) { #endif th->no_tls_value_marker=initial_function; +#if defined(LISP_FEATURE_WIN32) + for (i = 0; iprivate_events.events)/ + sizeof(th->private_events.events[0]); ++i) { + th->private_events.events[i] = CreateEvent(NULL,FALSE,FALSE,NULL); + } +#endif th->stepping = NIL; return th; } @@ -664,8 +720,12 @@ boolean create_os_thread(struct thread *th,os_thread_t *kid_tid) if((initcode = pthread_attr_init(th->os_attr)) || /* call_into_lisp_first_time switches the stack for the initial * thread. For the others, we use this. */ +#if defined(LISP_FEATURE_WIN32) + (pthread_attr_setstacksize(th->os_attr, thread_control_stack_size)) || +#else (pthread_attr_setstack(th->os_attr,th->control_stack_start, thread_control_stack_size)) || +#endif (retcode = pthread_create (kid_tid,th->os_attr,(void *(*)(void *))new_thread_trampoline,th))) { FSHOW_SIGNAL((stderr, "init = %d\n", initcode)); @@ -823,10 +883,9 @@ thread_yield() int wake_thread(os_thread_t os_thread) { -#ifdef LISP_FEATURE_WIN32 -# define SIGPIPE 1 -#endif -#if !defined(LISP_FEATURE_SB_THRUPTION) || defined(LISP_FEATURE_WIN32) +#if defined(LISP_FEATURE_WIN32) + return kill_safely(os_thread, 1); +#elif !defined(LISP_FEATURE_SB_THRUPTION) return kill_safely(os_thread, SIGPIPE); #else return wake_thread_posix(os_thread); @@ -871,6 +930,9 @@ kill_safely(os_thread_t os_thread, int signal) * :SPECIALS), especially with s/10/100/ in both loops. */ if (os_thread == pthread_self()) { pthread_kill(os_thread, signal); +#ifdef LISP_FEATURE_WIN32 + check_pending_thruptions(NULL); +#endif return 0; } @@ -883,6 +945,9 @@ kill_safely(os_thread_t os_thread, int signal) int status = pthread_kill(os_thread, signal); if (status) lose("kill_safely: pthread_kill failed with %d\n", status); +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THRUPTION) + wake_thread_win32(thread); +#endif break; } } @@ -892,6 +957,8 @@ kill_safely(os_thread_t os_thread, int signal) return 0; else return -1; +#elif defined(LISP_FEATURE_WIN32) + return 0; #else int status; if (os_thread != 0) diff --git a/src/runtime/thread.h b/src/runtime/thread.h index 2a8ea6e..f832c6b 100644 --- a/src/runtime/thread.h +++ b/src/runtime/thread.h @@ -11,6 +11,9 @@ #ifdef LISP_FEATURE_GENCGC #include "gencgc-alloc-region.h" #endif +#ifdef LISP_FEATURE_WIN32 +#include "win32-thread-private-events.h" +#endif #include "genesis/symbol.h" #include "genesis/static-symbols.h" @@ -283,6 +286,14 @@ extern __thread struct thread *current_thread; # define THREAD_CSP_PAGE_SIZE 0 #endif +#ifdef LISP_FEATURE_WIN32 +/* + * Win32 doesn't have SIGSTKSZ, and we're not switching stacks anyway, + * so define it arbitrarily + */ +#define SIGSTKSZ 1024 +#endif + #define THREAD_STRUCT_SIZE (thread_control_stack_size + BINDING_STACK_SIZE + \ ALIEN_STACK_SIZE + \ sizeof(struct nonpointer_thread_data) + \ @@ -291,6 +302,11 @@ extern __thread struct thread *current_thread; THREAD_ALIGNMENT_BYTES + \ THREAD_CSP_PAGE_SIZE) +#if defined(LISP_FEATURE_WIN32) +static inline struct thread* arch_os_get_current_thread() + __attribute__((__const__)); +#endif + /* This is clearly per-arch and possibly even per-OS code, but we can't * put it somewhere sensible like x86-linux-os.c because it needs too * much stuff like struct thread and all_threads to be defined, which @@ -301,6 +317,10 @@ static inline struct thread *arch_os_get_current_thread(void) #if defined(LISP_FEATURE_SB_THREAD) #if defined(LISP_FEATURE_X86) register struct thread *me=0; +#if defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD) + __asm__ ("movl %%fs:0xE10+(4*63), %0" : "=r"(me) :); + return me; +#endif if(all_threads) { #if defined(LISP_FEATURE_DARWIN) && defined(LISP_FEATURE_RESTORE_FS_SEGMENT_REGISTER_FROM_TLS) sel_t sel; @@ -326,7 +346,7 @@ static inline struct thread *arch_os_get_current_thread(void) #endif return th; #endif - __asm__ __volatile__ ("movl %%fs:%c1,%0" : "=r" (me) + __asm__ ("movl %%fs:%c1,%0" : "=r" (me) : "i" (offsetof (struct thread,this))); } return me; @@ -356,7 +376,11 @@ extern void thread_register_gc_trigger(); # ifdef LISP_FEATURE_SB_THRUPTION int wake_thread(os_thread_t os_thread); +# ifdef LISP_FEATURE_WIN32 +void wake_thread_win32(struct thread *thread); +# else int wake_thread_posix(os_thread_t os_thread); +# endif # endif #define thread_qrl(th) (&(th)->nonpointer_data->qrl_lock) @@ -416,7 +440,6 @@ int check_pending_thruptions(os_context_t *ctx); #endif -extern boolean is_some_thread_local_addr(os_vm_address_t addr); extern void create_initial_thread(lispobj); #ifdef LISP_FEATURE_SB_THREAD diff --git a/src/runtime/win32-os.c b/src/runtime/win32-os.c index b3e22ca..59e5473 100644 --- a/src/runtime/win32-os.c +++ b/src/runtime/win32-os.c @@ -28,11 +28,11 @@ #include #include +#include #include #include #include #include "sbcl.h" -#include "./signal.h" #include "os.h" #include "arch.h" #include "globals.h" @@ -46,7 +46,6 @@ #include "dynbind.h" #include -#include #include #include #include @@ -58,65 +57,348 @@ #include "validate.h" #include "thread.h" -size_t os_vm_page_size; +#include "cpputil.h" + +#ifndef LISP_FEATURE_SB_THREAD +/* dummy definition to reduce ifdef clutter */ +#define WITH_GC_AT_SAFEPOINTS_ONLY() if (0) ; else +#endif + +os_vm_size_t os_vm_page_size; #include "gc.h" #include "gencgc-internal.h" +#include #if 0 int linux_sparc_siginfo_bug = 0; int linux_supports_futex=0; #endif +#include +#include + +/* missing definitions for modern mingws */ +#ifndef EH_UNWINDING +#define EH_UNWINDING 0x02 +#endif +#ifndef EH_EXIT_UNWIND +#define EH_EXIT_UNWIND 0x04 +#endif + +/* Tired of writing arch_os_get_current_thread each time. */ +#define this_thread (arch_os_get_current_thread()) + +/* wrappers for winapi calls that must be successful (like SBCL's + * (aver ...) form). */ + +/* win_aver function: basic building block for miscellaneous + * ..AVER.. macrology (below) */ + +/* To do: These routines used to be "customizable" with dyndebug_init() + * and variables like dyndebug_survive_aver, dyndebug_skip_averlax based + * on environment variables. Those features got lost on the way, but + * ought to be reintroduced. */ + +static inline +intptr_t win_aver(intptr_t value, char* comment, char* file, int line, + int justwarn) +{ + if (!value) { + LPSTR errorMessage = ""; + DWORD errorCode = GetLastError(), allocated=0; + int posixerrno = errno; + const char* posixstrerror = strerror(errno); + char* report_template = + "Expression unexpectedly false: %s:%d\n" + " ... %s\n" + " ===> returned #X%p, \n" + " (in thread %p)" + " ... Win32 thinks:\n" + " ===> code %u, message => %s\n" + " ... CRT thinks:\n" + " ===> code %u, message => %s\n"; + + allocated = + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER| + FORMAT_MESSAGE_FROM_SYSTEM, + NULL, + errorCode, + MAKELANGID(LANG_ENGLISH,SUBLANG_ENGLISH_US), + (LPSTR)&errorMessage, + 1024u, + NULL); + + if (justwarn) { + fprintf(stderr, report_template, + file, line, + comment, value, + this_thread, + (unsigned)errorCode, errorMessage, + posixerrno, posixstrerror); + } else { + lose(report_template, + file, line, + comment, value, + this_thread, + (unsigned)errorCode, errorMessage, + posixerrno, posixstrerror); + } + if (allocated) + LocalFree(errorMessage); + } + return value; +} + +/* sys_aver function: really tiny adaptor of win_aver for + * "POSIX-parody" CRT results ("lowio" and similar stuff): + * negative number means something... negative. */ +static inline +intptr_t sys_aver(long value, char* comment, char* file, int line, + int justwarn) +{ + win_aver((intptr_t)(value>=0),comment,file,line,justwarn); + return value; +} + +/* Check for (call) result being boolean true. (call) may be arbitrary + * expression now; massive attack of gccisms ensures transparent type + * conversion back and forth, so the type of AVER(expression) is the + * type of expression. Value is the same _if_ it can be losslessly + * converted to (void*) and back. + * + * Failed AVER() is normally fatal. Well, unless dyndebug_survive_aver + * flag is set. */ + +#define AVER(call) \ + ({ __typeof__(call) __attribute__((unused)) me = \ + (__typeof__(call)) \ + win_aver((intptr_t)(call), #call, __FILE__, __LINE__, 0); \ + me;}) + +/* AVERLAX(call): do the same check as AVER did, but be mild on + * failure: print an annoying unrequested message to stderr, and + * continue. With dyndebug_skip_averlax flag, AVERLAX stop even to + * check and complain. */ + +#define AVERLAX(call) \ + ({ __typeof__(call) __attribute__((unused)) me = \ + (__typeof__(call)) \ + win_aver((intptr_t)(call), #call, __FILE__, __LINE__, 1); \ + me;}) + +/* Now, when failed AVER... prints both errno and GetLastError(), two + * variants of "POSIX/lowio" style checks below are almost useless + * (they build on sys_aver like the two above do on win_aver). */ + +#define CRT_AVER_NONNEGATIVE(call) \ + ({ __typeof__(call) __attribute__((unused)) me = \ + (__typeof__(call)) \ + sys_aver((call), #call, __FILE__, __LINE__, 0); \ + me;}) + +#define CRT_AVERLAX_NONNEGATIVE(call) \ + ({ __typeof__(call) __attribute__((unused)) me = \ + (__typeof__(call)) \ + sys_aver((call), #call, __FILE__, __LINE__, 1); \ + me;}) + +/* to be removed */ +#define CRT_AVER(booly) \ + ({ __typeof__(booly) __attribute__((unused)) me = (booly); \ + sys_aver((booly)?0:-1, #booly, __FILE__, __LINE__, 0); \ + me;}) + +const char * t_nil_s(lispobj symbol); + +/* + * The following signal-mask-related alien routines are called from Lisp: + */ + +/* As of win32, deferrables _do_ matter. gc_signal doesn't. */ +unsigned long block_deferrables_and_return_mask() +{ + sigset_t sset; + block_deferrable_signals(0, &sset); + return (unsigned long)sset; +} + +#if defined(LISP_FEATURE_SB_THREAD) +void apply_sigmask(unsigned long sigmask) +{ + sigset_t sset = (sigset_t)sigmask; + pthread_sigmask(SIG_SETMASK, &sset, 0); +} +#endif + /* The exception handling function looks like this: */ EXCEPTION_DISPOSITION handle_exception(EXCEPTION_RECORD *, struct lisp_exception_frame *, CONTEXT *, void *); +/* handle_exception is defined further in this file, but since SBCL + * 1.0.1.24, it doesn't get registered as SEH handler directly anymore, + * not even by wos_install_interrupt_handlers. Instead, x86-assem.S + * provides exception_handler_wrapper; we install it here, and each + * exception frame on nested funcall()s also points to it. + */ + void *base_seh_frame; static void *get_seh_frame(void) { void* retval; - asm volatile ("movl %%fs:0,%0": "=r" (retval)); +#ifdef LISP_FEATURE_X86 + asm volatile ("mov %%fs:0,%0": "=r" (retval)); +#else + asm volatile ("mov %%gs:0,%0": "=r" (retval)); +#endif return retval; } static void set_seh_frame(void *frame) { - asm volatile ("movl %0,%%fs:0": : "r" (frame)); +#ifdef LISP_FEATURE_X86 + asm volatile ("mov %0,%%fs:0": : "r" (frame)); +#else + asm volatile ("mov %0,%%gs:0": : "r" (frame)); +#endif } -#if 0 -static struct lisp_exception_frame *find_our_seh_frame(void) +#if defined(LISP_FEATURE_SB_THREAD) + +/* Permit loads from GC_SAFEPOINT_PAGE_ADDR (NB page state change is + * "synchronized" with the memory region content/availability -- + * e.g. you won't see other CPU flushing buffered writes after WP -- + * but there is some window when other thread _seem_ to trap AFTER + * access is granted. You may think of it something like "OS enters + * SEH handler too slowly" -- what's important is there's no implicit + * synchronization between VirtualProtect caller and other thread's + * SEH handler, hence no ordering of events. VirtualProtect is + * implicitly synchronized with protected memory contents (only). + * + * The last fact may be potentially used with many benefits e.g. for + * foreign call speed, but we don't use it for now: almost the only + * fact relevant to the current signalling protocol is "sooner or + * later everyone will trap [everyone will stop trapping]". + * + * An interesting source on page-protection-based inter-thread + * communication is a well-known paper by Dave Dice, Hui Huang, + * Mingyao Yang: ``Asymmetric Dekker Synchronization''. Last time + * I checked it was available at + * http://home.comcast.net/~pjbishop/Dave/Asymmetric-Dekker-Synchronization.txt + */ +void map_gc_page() { - struct lisp_exception_frame *frame = get_seh_frame(); - - while (frame->handler != handle_exception) - frame = frame->next_frame; - - return frame; + DWORD oldProt; + AVER(VirtualProtect((void*) GC_SAFEPOINT_PAGE_ADDR, sizeof(lispobj), + PAGE_READWRITE, &oldProt)); } -inline static void *get_stack_frame(void) +void unmap_gc_page() { - void* retval; - asm volatile ("movl %%ebp,%0": "=r" (retval)); - return retval; + DWORD oldProt; + AVER(VirtualProtect((void*) GC_SAFEPOINT_PAGE_ADDR, sizeof(lispobj), + PAGE_NOACCESS, &oldProt)); } + +#endif + +#if defined(LISP_FEATURE_SB_THREAD) +/* We want to get a slot in TIB that (1) is available at constant + offset, (2) is our private property, so libraries wouldn't legally + override it, (3) contains something predefined for threads created + out of our sight. + + Low 64 TLS slots are adressable directly, starting with + FS:[#xE10]. When SBCL runtime is initialized, some of the low slots + may be already in use by its prerequisite DLLs, as DllMain()s and + TLS callbacks have been called already. But slot 63 is unlikely to + be reached at this point: one slot per DLL that needs it is the + common practice, and many system DLLs use predefined TIB-based + areas outside conventional TLS storage and don't need TLS slots. + With our current dependencies, even slot 2 is observed to be free + (as of WinXP and wine). + + Now we'll call TlsAlloc() repeatedly until slot 63 is officially + assigned to us, then TlsFree() all other slots for normal use. TLS + slot 63, alias FS:[#.(+ #xE10 (* 4 63))], now belongs to us. + + To summarize, let's list the assumptions we make: + + - TIB, which is FS segment base, contains first 64 TLS slots at the + offset #xE10 (i.e. TIB layout compatibility); + - TLS slots are allocated from lower to higher ones; + - All libraries together with CRT startup have not requested 64 + slots yet. + + All these assumptions together don't seem to be less warranted than + the availability of TIB arbitrary data slot for our use. There are + some more reasons to prefer slot 63 over TIB arbitrary data: (1) if + our assumptions for slot 63 are violated, it will be detected at + startup instead of causing some system-specific unreproducible + problems afterwards, depending on OS and loaded foreign libraries; + (2) if getting slot 63 reliably with our current approach will + become impossible for some future Windows version, we can add TLS + callback directory to SBCL binary; main image TLS callback is + started before _any_ TLS slot is allocated by libraries, and + some C compiler vendors rely on this fact. */ + +void os_preinit() +{ +#ifdef LISP_FEATURE_X86 + DWORD slots[TLS_MINIMUM_AVAILABLE]; + DWORD key; + int n_slots = 0, i; + for (i=0; i BACKEND_PAGE_BYTES? + system_info.dwPageSize : BACKEND_PAGE_BYTES; +#if defined(LISP_FEATURE_X86) + fast_bzero_pointer = fast_bzero_detect; +#endif + os_number_of_processors = system_info.dwNumberOfProcessors; base_seh_frame = get_seh_frame(); } +static inline boolean local_thread_stack_address_p(os_vm_address_t address) +{ + return this_thread && + (((((u64)address >= (u64)this_thread->os_address) && + ((u64)address < ((u64)this_thread)-THREAD_CSP_PAGE_SIZE))|| + (((u64)address >= (u64)this_thread->control_stack_start)&& + ((u64)address < (u64)this_thread->control_stack_end)))); +} /* * So we have three fun scenarios here. @@ -147,19 +429,12 @@ os_validate(os_vm_address_t addr, os_vm_size_t len) if (!addr) { /* the simple case first */ - os_vm_address_t real_addr; - if (!(real_addr = VirtualAlloc(addr, len, MEM_COMMIT, PAGE_EXECUTE_READWRITE))) { - fprintf(stderr, "VirtualAlloc: 0x%lx.\n", GetLastError()); - return 0; - } - - return real_addr; + return + AVERLAX(VirtualAlloc(addr, len, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE)); } - if (!VirtualQuery(addr, &mem_info, sizeof mem_info)) { - fprintf(stderr, "VirtualQuery: 0x%lx.\n", GetLastError()); + if (!AVERLAX(VirtualQuery(addr, &mem_info, sizeof mem_info))) return 0; - } if ((mem_info.State == MEM_RESERVE) && (mem_info.RegionSize >=len)) { /* It would be correct to return here. However, support for Wine @@ -181,12 +456,13 @@ os_validate(os_vm_address_t addr, os_vm_size_t len) if (mem_info.State == MEM_RESERVE) { fprintf(stderr, "validation of reserved space too short.\n"); fflush(stderr); + /* Oddly, we do not treat this assertion as fatal; hence also the + * provision for MEM_RESERVE in the following code, I suppose: */ } - if (!VirtualAlloc(addr, len, (mem_info.State == MEM_RESERVE)? MEM_COMMIT: MEM_RESERVE, PAGE_EXECUTE_READWRITE)) { - fprintf(stderr, "VirtualAlloc: 0x%lx.\n", GetLastError()); + if (!AVERLAX(VirtualAlloc(addr, len, (mem_info.State == MEM_RESERVE)? + MEM_COMMIT: MEM_RESERVE, PAGE_EXECUTE_READWRITE))) return 0; - } return addr; } @@ -194,24 +470,57 @@ os_validate(os_vm_address_t addr, os_vm_size_t len) /* * For os_invalidate(), we merely decommit the memory rather than * freeing the address space. This loses when freeing per-thread - * data and related memory since it leaks address space. It's not - * too lossy, however, since the two scenarios I'm aware of are - * fd-stream buffers, which are pooled rather than torched, and - * thread information, which I hope to pool (since windows creates - * threads at its own whim, and we probably want to be able to - * have them callback without funky magic on the part of the user, - * and full-on thread allocation is fairly heavyweight). Someone - * will probably shoot me down on this with some pithy comment on - * the use of (setf symbol-value) on a special variable. I'm happy - * for them. + * data and related memory since it leaks address space. + * + * So far the original comment (author unknown). It used to continue as + * follows: + * + * It's not too lossy, however, since the two scenarios I'm aware of + * are fd-stream buffers, which are pooled rather than torched, and + * thread information, which I hope to pool (since windows creates + * threads at its own whim, and we probably want to be able to have + * them callback without funky magic on the part of the user, and + * full-on thread allocation is fairly heavyweight). + * + * But: As it turns out, we are no longer content with decommitting + * without freeing, and have now grown a second function + * os_invalidate_free(), sort of a really_os_invalidate(). + * + * As discussed on #lisp, this is not a satisfactory solution, and probably + * ought to be rectified in the following way: + * + * - Any cases currently going through the non-freeing version of + * os_invalidate() are ultimately meant for zero-filling applications. + * Replace those use cases with an os_revalidate_bzero() or similarly + * named function, which explicitly takes care of that aspect of + * the semantics. + * + * - The remaining uses of os_invalidate should actually free, and once + * the above is implemented, we can rename os_invalidate_free back to + * just os_invalidate(). + * + * So far the new plan, as yet unimplemented. -- DFL */ void os_invalidate(os_vm_address_t addr, os_vm_size_t len) { - if (!VirtualFree(addr, len, MEM_DECOMMIT)) { - fprintf(stderr, "VirtualFree: 0x%lx.\n", GetLastError()); - } + AVERLAX(VirtualFree(addr, len, MEM_DECOMMIT)); +} + +void +os_invalidate_free(os_vm_address_t addr, os_vm_size_t len) +{ + AVERLAX(VirtualFree(addr, 0, MEM_RELEASE)); +} + +void +os_invalidate_free_by_any_address(os_vm_address_t addr, os_vm_size_t len) +{ + MEMORY_BASIC_INFORMATION minfo; + AVERLAX(VirtualQuery(addr, &minfo, sizeof minfo)); + AVERLAX(minfo.AllocationBase); + AVERLAX(VirtualFree(minfo.AllocationBase, 0, MEM_RELEASE)); } /* @@ -230,25 +539,14 @@ os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len) { os_vm_size_t count; -#if 0 - fprintf(stderr, "os_map: %d, 0x%x, %p, 0x%x.\n", fd, offset, addr, len); - fflush(stderr); -#endif - - if (!VirtualAlloc(addr, len, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) { - fprintf(stderr, "VirtualAlloc: 0x%lx.\n", GetLastError()); - lose("os_map: VirtualAlloc failure"); - } + AVER(VirtualAlloc(addr, len, MEM_COMMIT, PAGE_EXECUTE_READWRITE)|| + VirtualAlloc(addr, len, MEM_RESERVE|MEM_COMMIT, + PAGE_EXECUTE_READWRITE)); - if (lseek(fd, offset, SEEK_SET) == -1) { - lose("os_map: Seek failure."); - } + CRT_AVER_NONNEGATIVE(lseek(fd, offset, SEEK_SET)); count = read(fd, addr, len); - if (count != len) { - fprintf(stderr, "expected 0x%x, read 0x%x.\n", len, count); - lose("os_map: Failed to read enough bytes."); - } + CRT_AVER( count == len ); return addr; } @@ -269,10 +567,13 @@ os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot) { DWORD old_prot; - if (!VirtualProtect(address, length, os_protect_modes[prot], &old_prot)) { - fprintf(stderr, "VirtualProtect failed, code 0x%lx.\n", GetLastError()); - fflush(stderr); - } + DWORD new_prot = os_protect_modes[prot]; + AVER(VirtualProtect(address, length, new_prot, &old_prot)|| + (VirtualAlloc(address, length, MEM_COMMIT, new_prot) && + VirtualProtect(address, length, new_prot, &old_prot))); + odxprint(misc,"Protecting %p + %p vmaccess %d " + "newprot %08x oldprot %08x", + address,length,prot,new_prot,old_prot); } /* FIXME: Now that FOO_END, rather than FOO_SIZE, is the fundamental @@ -281,8 +582,8 @@ os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot) static boolean in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen) { - char* beg = (char*)((long)sbeg); - char* end = (char*)((long)sbeg) + slen; + char* beg = (char*)((uword_t)sbeg); + char* end = (char*)((uword_t)sbeg) + slen; char* adr = (char*)a; return (adr >= beg && adr < end); } @@ -293,26 +594,91 @@ is_linkage_table_addr(os_vm_address_t addr) return in_range_p(addr, LINKAGE_TABLE_SPACE_START, LINKAGE_TABLE_SPACE_END); } +static boolean is_some_thread_local_addr(os_vm_address_t addr); + boolean is_valid_lisp_addr(os_vm_address_t addr) { - struct thread *th; if(in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE) || in_range_p(addr, STATIC_SPACE_START , STATIC_SPACE_SIZE) || - in_range_p(addr, DYNAMIC_SPACE_START , dynamic_space_size)) + in_range_p(addr, DYNAMIC_SPACE_START , dynamic_space_size) || + is_some_thread_local_addr(addr)) return 1; + return 0; +} + +/* test if an address is within thread-local space */ +static boolean +is_thread_local_addr(struct thread* th, os_vm_address_t addr) +{ + /* Assuming that this is correct, it would warrant further comment, + * I think. Based on what our call site is doing, we have been + * tasked to check for the address of a lisp object; not merely any + * foreign address within the thread's area. Indeed, this used to + * be a check for control and binding stack only, rather than the + * full thread "struct". So shouldn't the THREAD_STRUCT_SIZE rather + * be (thread_control_stack_size+BINDING_STACK_SIZE) instead? That + * would also do away with the LISP_FEATURE_SB_THREAD case. Or does + * it simply not matter? --DFL */ + ptrdiff_t diff = ((char*)th->os_address)-(char*)addr; + return diff > (ptrdiff_t)0 && diff < (ptrdiff_t)THREAD_STRUCT_SIZE +#ifdef LISP_FEATURE_SB_THREAD + && addr != (os_vm_address_t) th->csp_around_foreign_call +#endif + ; +} + +static boolean +is_some_thread_local_addr(os_vm_address_t addr) +{ + boolean result = 0; +#ifdef LISP_FEATURE_SB_THREAD + struct thread *th; + pthread_mutex_lock(&all_threads_lock); for_each_thread(th) { - if(((os_vm_address_t)th->control_stack_start <= addr) && (addr < (os_vm_address_t)th->control_stack_end)) - return 1; - if(in_range_p(addr, (unsigned long)th->binding_stack_start, BINDING_STACK_SIZE)) - return 1; + if(is_thread_local_addr(th,addr)) { + result = 1; + break; + } } - return 0; + pthread_mutex_unlock(&all_threads_lock); +#endif + return result; } + /* A tiny bit of interrupt.c state we want our paws on. */ extern boolean internal_errors_enabled; +extern void exception_handler_wrapper(); + +void +c_level_backtrace(const char* header, int depth) +{ + void* frame; + int n = 0; + void** lastseh; + + for (lastseh = get_seh_frame(); lastseh && (lastseh!=(void*)-1); + lastseh = *lastseh); + + fprintf(stderr, "Backtrace: %s (thread %p)\n", header, this_thread); + for (frame = __builtin_frame_address(0); frame; frame=*(void**)frame) + { + if ((n++)>depth) + return; + fprintf(stderr, "[#%02d]: ebp = 0x%p, ret = 0x%p\n",n, + frame, ((void**)frame)[1]); + } +} + +#ifdef LISP_FEATURE_X86 +#define voidreg(ctxptr,name) ((void*)((ctxptr)->E##name)) +#else +#define voidreg(ctxptr,name) ((void*)((ctxptr)->R##name)) +#endif + + #if defined(LISP_FEATURE_X86) static int handle_single_step(os_context_t *ctx) @@ -322,7 +688,8 @@ handle_single_step(os_context_t *ctx) /* We are doing a displaced instruction. At least function * end breakpoints use this. */ - restore_breakpoint_from_single_step(ctx); + WITH_GC_AT_SAFEPOINTS_ONLY () /* Todo: Is it really gc-unsafe? */ + restore_breakpoint_from_single_step(ctx); return 0; } @@ -337,7 +704,7 @@ handle_single_step(os_context_t *ctx) #endif static int -handle_breakpoint_trap(os_context_t *ctx) +handle_breakpoint_trap(os_context_t *ctx, struct thread* self) { #ifdef LISP_FEATURE_UD2_BREAKPOINTS if (((unsigned short *)((ctx->win32_context)->Eip))[0] != 0x0b0f) @@ -346,18 +713,39 @@ handle_breakpoint_trap(os_context_t *ctx) /* Unlike some other operating systems, Win32 leaves EIP * pointing to the breakpoint instruction. */ - ctx->Eip += TRAP_CODE_WIDTH; + (*os_context_pc_addr(ctx)) += TRAP_CODE_WIDTH; /* Now EIP points just after the INT3 byte and aims at the * 'kind' value (eg trap_Cerror). */ - unsigned char trap = *(unsigned char *)(*os_context_pc_addr(ctx)); + unsigned trap = *(unsigned char *)(*os_context_pc_addr(ctx)); + +#ifdef LISP_FEATURE_SB_THREAD + /* Before any other trap handler: gc_safepoint ensures that + inner alloc_sap for passing the context won't trap on + pseudo-atomic. */ + if (trap == trap_PendingInterrupt) { + /* Done everything needed for this trap, except EIP + adjustment */ + arch_skip_instruction(ctx); + thread_interrupted(ctx); + return 0; + } +#endif /* This is just for info in case the monitor wants to print an * approximation. */ - current_control_stack_pointer = + access_control_stack_pointer(self) = (lispobj *)*os_context_sp_addr(ctx); - handle_trap(ctx, trap); + WITH_GC_AT_SAFEPOINTS_ONLY() { +#if defined(LISP_FEATURE_SB_THREAD) + block_blockable_signals(0,&ctx->sigmask); +#endif + handle_trap(ctx, trap); +#if defined(LISP_FEATURE_SB_THREAD) + thread_sigmask(SIG_SETMASK,&ctx->sigmask,NULL); +#endif + } /* Done, we're good to go! */ return 0; @@ -366,50 +754,109 @@ handle_breakpoint_trap(os_context_t *ctx) static int handle_access_violation(os_context_t *ctx, EXCEPTION_RECORD *exception_record, - void *fault_address) + void *fault_address, + struct thread* self) { - if (!(is_valid_lisp_addr(fault_address) - || is_linkage_table_addr(fault_address))) - return -1; + CONTEXT *win32_context = ctx->win32_context; - /* Pick off GC-related memory fault next. */ - MEMORY_BASIC_INFORMATION mem_info; +#if defined(LISP_FEATURE_X86) + odxprint(pagefaults, + "SEGV. ThSap %p, Eip %p, Esp %p, Esi %p, Edi %p, " + "Addr %p Access %d\n", + self, + win32_context->Eip, + win32_context->Esp, + win32_context->Esi, + win32_context->Edi, + fault_address, + exception_record->ExceptionInformation[0]); +#else + odxprint(pagefaults, + "SEGV. ThSap %p, Eip %p, Esp %p, Esi %p, Edi %p, " + "Addr %p Access %d\n", + self, + win32_context->Rip, + win32_context->Rsp, + win32_context->Rsi, + win32_context->Rdi, + fault_address, + exception_record->ExceptionInformation[0]); +#endif - if (!VirtualQuery(fault_address, &mem_info, sizeof mem_info)) { - fprintf(stderr, "VirtualQuery: 0x%lx.\n", GetLastError()); - lose("handle_exception: VirtualQuery failure"); + /* Stack: This case takes care of our various stack exhaustion + * protect pages (with the notable exception of the control stack!). */ + if (self && local_thread_stack_address_p(fault_address)) { + if (handle_guard_page_triggered(ctx, fault_address)) + return 0; /* gc safety? */ + goto try_recommit; } - if (mem_info.State == MEM_RESERVE) { - /* First use new page, lets get some memory for it. */ - if (!VirtualAlloc(mem_info.BaseAddress, os_vm_page_size, - MEM_COMMIT, PAGE_EXECUTE_READWRITE)) { - fprintf(stderr, "VirtualAlloc: 0x%lx.\n", GetLastError()); - lose("handle_exception: VirtualAlloc failure"); + /* Safepoint pages */ +#ifdef LISP_FEATURE_SB_THREAD + if (fault_address == (void *) GC_SAFEPOINT_PAGE_ADDR) { + thread_in_lisp_raised(ctx); + return 0; + } + + if ((((u64)fault_address) == ((u64)self->csp_around_foreign_call))){ + thread_in_safety_transition(ctx); + return 0; + } +#endif + /* dynamic space */ + page_index_t index = find_page_index(fault_address); + if (index != -1) { + /* + * Now, if the page is supposedly write-protected and this + * is a write, tell the gc that it's been hit. + */ + if (page_table[index].write_protected) { + gencgc_handle_wp_violation(fault_address); } else { - /* - * Now, if the page is supposedly write-protected and this - * is a write, tell the gc that it's been hit. - * - * FIXME: Are we supposed to fall-through to the Lisp - * exception handler if the gc doesn't take the wp violation? - */ - if (exception_record->ExceptionInformation[0]) { - page_index_t index = find_page_index(fault_address); - if ((index != -1) && (page_table[index].write_protected)) { - gencgc_handle_wp_violation(fault_address); - } - } - return 0; + AVER(VirtualAlloc(PTR_ALIGN_DOWN(fault_address,os_vm_page_size), + os_vm_page_size, + MEM_COMMIT, PAGE_EXECUTE_READWRITE)); } - - } else if (gencgc_handle_wp_violation(fault_address)) { - /* gc accepts the wp violation, so resume where we left off. */ return 0; } + if (fault_address == undefined_alien_address) + return -1; + + /* linkage table or a "valid_lisp_addr" outside of dynamic space (?) */ + if (is_linkage_table_addr(fault_address) + || is_valid_lisp_addr(fault_address)) + goto try_recommit; + return -1; + +try_recommit: + /* First use of a new page, lets get some memory for it. */ + +#if defined(LISP_FEATURE_X86) + AVER(VirtualAlloc(PTR_ALIGN_DOWN(fault_address,os_vm_page_size), + os_vm_page_size, + MEM_COMMIT, PAGE_EXECUTE_READWRITE) + ||(fprintf(stderr,"Unable to recommit addr %p eip 0x%08lx\n", + fault_address, win32_context->Eip) && + (c_level_backtrace("BT",5), + fake_foreign_function_call(ctx), + lose("Lispy backtrace"), + 0))); +#else + AVER(VirtualAlloc(PTR_ALIGN_DOWN(fault_address,os_vm_page_size), + os_vm_page_size, + MEM_COMMIT, PAGE_EXECUTE_READWRITE) + ||(fprintf(stderr,"Unable to recommit addr %p eip 0x%p\n", + fault_address, (void*)win32_context->Rip) && + (c_level_backtrace("BT",5), + fake_foreign_function_call(ctx), + lose("Lispy backtrace"), + 0))); +#endif + + return 0; } static void @@ -427,37 +874,48 @@ signal_internal_error_or_lose(os_context_t *ctx, lispobj context_sap; lispobj exception_record_sap; + asm("fnclex"); /* We're making the somewhat arbitrary decision that having * internal errors enabled means that lisp has sufficient * marbles to be able to handle exceptions, but exceptions * aren't supposed to happen during cold init or reinit * anyway. */ +#if defined(LISP_FEATURE_SB_THREAD) + block_blockable_signals(0,&ctx->sigmask); +#endif fake_foreign_function_call(ctx); - /* Allocate the SAP objects while the "interrupts" are still - * disabled. */ - context_sap = alloc_sap(ctx); - exception_record_sap = alloc_sap(exception_record); - - /* The exception system doesn't automatically clear pending - * exceptions, so we lose as soon as we execute any FP - * instruction unless we do this first. */ - _clearfp(); - - /* Call into lisp to handle things. */ - funcall2(StaticSymbolFunction(HANDLE_WIN32_EXCEPTION), context_sap, - exception_record_sap); + WITH_GC_AT_SAFEPOINTS_ONLY() { + /* Allocate the SAP objects while the "interrupts" are still + * disabled. */ + context_sap = alloc_sap(ctx); + exception_record_sap = alloc_sap(exception_record); +#if defined(LISP_FEATURE_SB_THREAD) + thread_sigmask(SIG_SETMASK, &ctx->sigmask, NULL); +#endif + /* The exception system doesn't automatically clear pending + * exceptions, so we lose as soon as we execute any FP + * instruction unless we do this first. */ + /* Call into lisp to handle things. */ + funcall2(StaticSymbolFunction(HANDLE_WIN32_EXCEPTION), + context_sap, + exception_record_sap); + } /* If Lisp doesn't nlx, we need to put things back. */ undo_fake_foreign_function_call(ctx); - +#if defined(LISP_FEATURE_SB_THREAD) + thread_sigmask(SIG_SETMASK, &ctx->sigmask, NULL); +#endif /* FIXME: HANDLE-WIN32-EXCEPTION should be allowed to decline */ return; } - fprintf(stderr, "Exception Code: 0x%lx.\n", exception_record->ExceptionCode); - fprintf(stderr, "Faulting IP: 0x%lx.\n", (DWORD)exception_record->ExceptionAddress); + fprintf(stderr, "Exception Code: 0x%p.\n", + (void*)(intptr_t)exception_record->ExceptionCode); + fprintf(stderr, "Faulting IP: 0x%p.\n", + (void*)(intptr_t)exception_record->ExceptionAddress); if (exception_record->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { MEMORY_BASIC_INFORMATION mem_info; @@ -465,9 +923,9 @@ signal_internal_error_or_lose(os_context_t *ctx, fprintf(stderr, "page status: 0x%lx.\n", mem_info.State); } - fprintf(stderr, "Was writing: %ld, where: 0x%lx.\n", - exception_record->ExceptionInformation[0], - (DWORD)fault_address); + fprintf(stderr, "Was writing: %p, where: 0x%p.\n", + (void*)exception_record->ExceptionInformation[0], + fault_address); } fflush(stderr); @@ -478,31 +936,56 @@ signal_internal_error_or_lose(os_context_t *ctx, /* * A good explanation of the exception handling semantics is - * http://win32assembly.online.fr/Exceptionhandling.html . + * http://win32assembly.online.fr/Exceptionhandling.html (possibly defunct) + * or: + * http://www.microsoft.com/msj/0197/exception/exception.aspx */ EXCEPTION_DISPOSITION handle_exception(EXCEPTION_RECORD *exception_record, struct lisp_exception_frame *exception_frame, - CONTEXT *ctx, + CONTEXT *win32_context, void *dispatcher_context) { + if (!win32_context) + /* Not certain why this should be possible, but let's be safe... */ + return ExceptionContinueSearch; + if (exception_record->ExceptionFlags & (EH_UNWINDING | EH_EXIT_UNWIND)) { /* If we're being unwound, be graceful about it. */ /* Undo any dynamic bindings. */ unbind_to_here(exception_frame->bindstack_pointer, arch_os_get_current_thread()); - return ExceptionContinueSearch; } + DWORD lastError = GetLastError(); + DWORD lastErrno = errno; DWORD code = exception_record->ExceptionCode; + struct thread* self = arch_os_get_current_thread(); + + os_context_t context, *ctx = &context; + context.win32_context = win32_context; +#if defined(LISP_FEATURE_SB_THREAD) + context.sigmask = self ? self->os_thread->blocked_signal_set : 0; +#endif /* For EXCEPTION_ACCESS_VIOLATION only. */ void *fault_address = (void *)exception_record->ExceptionInformation[1]; - /* This function will become unwieldy. Let's cut it down into + odxprint(seh, + "SEH: rec %p, ctxptr %p, rip %p, fault %p\n" + "... code %p, rcx %p, fp-tags %p\n\n", + exception_record, + win32_context, + voidreg(win32_context,ip), + fault_address, + (void*)(intptr_t)code, + voidreg(win32_context,cx), + win32_context->FloatSave.TagWord); + + /* This function had become unwieldy. Let's cut it down into * pieces based on the different exception codes. Each exception * code handler gets the chance to decline by returning non-zero if it * isn't happy: */ @@ -511,11 +994,11 @@ handle_exception(EXCEPTION_RECORD *exception_record, switch (code) { case EXCEPTION_ACCESS_VIOLATION: rc = handle_access_violation( - ctx, exception_record, fault_address); + ctx, exception_record, fault_address, self); break; case SBCL_EXCEPTION_BREAKPOINT: - rc = handle_breakpoint_trap(ctx); + rc = handle_breakpoint_trap(ctx, self); break; #if defined(LISP_FEATURE_X86) @@ -532,20 +1015,23 @@ handle_exception(EXCEPTION_RECORD *exception_record, /* All else failed, drop through to the lisp-side exception handler. */ signal_internal_error_or_lose(ctx, exception_record, fault_address); + errno = lastErrno; + SetLastError(lastError); return ExceptionContinueExecution; } void wos_install_interrupt_handlers(struct lisp_exception_frame *handler) { +#ifdef LISP_FEATURE_X86 handler->next_frame = get_seh_frame(); - handler->handler = &handle_exception; + handler->handler = (void*)exception_handler_wrapper; set_seh_frame(handler); -} - -void bcopy(const void *src, void *dest, size_t n) -{ - MoveMemory(dest, src, n); +#else + static int once = 0; + if (!once++) + AddVectoredExceptionHandler(1,veh); +#endif } /* @@ -680,4 +1166,61 @@ os_get_runtime_executable_path(int external) return copied_string(path); } +#ifdef LISP_FEATURE_SB_THREAD + +int +win32_wait_object_or_signal(HANDLE waitFor) +{ + struct thread * self = arch_os_get_current_thread(); + HANDLE handles[2]; + handles[0] = waitFor; + handles[1] = self->private_events.events[1]; + return + WaitForMultipleObjects(2,handles, FALSE,INFINITE); +} + +/* + * Portability glue for win32 waitable timers. + * + * One may ask: Why is there a wrapper in C when the calls are so + * obvious that Lisp could do them directly (as it did on Windows)? + * + * But the answer is that on POSIX platforms, we now emulate the win32 + * calls and hide that emulation behind this os_* abstraction. + */ +HANDLE +os_create_wtimer() +{ + return CreateWaitableTimer(0, 0, 0); +} + +int +os_wait_for_wtimer(HANDLE handle) +{ + return win32_wait_object_or_signal(handle); +} + +void +os_close_wtimer(HANDLE handle) +{ + CloseHandle(handle); +} + +void +os_set_wtimer(HANDLE handle, int sec, int nsec) +{ + /* in units of 100ns, i.e. 0.1us. Negative for relative semantics. */ + long long dueTime + = -(((long long) sec) * 10000000 + + ((long long) nsec + 99) / 100); + SetWaitableTimer(handle, (LARGE_INTEGER*) &dueTime, 0, 0, 0, 0); +} + +void +os_cancel_wtimer(HANDLE handle) +{ + CancelWaitableTimer(handle); +} +#endif + /* EOF */ diff --git a/src/runtime/win32-os.h b/src/runtime/win32-os.h index a09c37f..9fca92e 100644 --- a/src/runtime/win32-os.h +++ b/src/runtime/win32-os.h @@ -9,36 +9,69 @@ * files for more information. */ +#ifndef SBCL_INCLUDED_WIN32_OS_H +#define SBCL_INCLUDED_WIN32_OS_H + +#define WIN32_LEAN_AND_MEAN +#include +#include + #include +#include +#include #include #include #include #include #include -#define WIN32_LEAN_AND_MEAN -#include #include "target-arch-os.h" #include "target-arch.h" +#ifdef LISP_FEATURE_SB_THREAD +#include "pthreads_win32.h" +/* prevent inclusion of a mingw semaphore.h */ +#define CANNOT_USE_POSIX_SEM_T +typedef sem_t os_sem_t; +#else +typedef void *siginfo_t; +#endif + +/* Note: This typedef will moved to runtime.h when AMD64 changes are being + * merged. */ +typedef unsigned long uword_t; + typedef LPVOID os_vm_address_t; -typedef size_t os_vm_size_t; -typedef off_t os_vm_offset_t; +typedef uword_t os_vm_size_t; +typedef intptr_t os_vm_offset_t; typedef int os_vm_prot_t; -typedef void *siginfo_t; - /* These are used as bitfields, but Win32 doesn't work that way, so we do a translation. */ #define OS_VM_PROT_READ 1 #define OS_VM_PROT_WRITE 2 #define OS_VM_PROT_EXECUTE 4 +#define os_open_core(file,mode) win32_open_for_mmap(file) +#define HAVE_os_open_core + +#define os_fopen_runtime(file,mode) win32_fopen_runtime() +#define HAVE_os_fopen_runtime + +extern int os_number_of_processors; +#define HAVE_os_number_of_processors + +extern int win32_open_for_mmap(const char* file); +extern FILE* win32_fopen_runtime(); + +#define OUR_TLS_INDEX 63 #define SIG_MEMORY_FAULT SIGSEGV #define SIG_STOP_FOR_GC (SIGRTMIN+1) #define SIG_DEQUEUE (SIGRTMIN+2) #define SIG_THREAD_EXIT (SIGRTMIN+3) +#define FPU_STATE_SIZE 27 + struct lisp_exception_frame { struct lisp_exception_frame *next_frame; void *handler; @@ -48,3 +81,12 @@ struct lisp_exception_frame { void wos_install_interrupt_handlers(struct lisp_exception_frame *handler); char *dirname(char *path); +void os_invalidate_free(os_vm_address_t addr, os_vm_size_t len); + +#define bcopy(src,dest,n) memmove(dest,src,n) + +struct thread; +void** os_get_csp(struct thread* th); + + +#endif /* SBCL_INCLUDED_WIN32_OS_H */ diff --git a/src/runtime/win32-thread-private-events.h b/src/runtime/win32-thread-private-events.h new file mode 100644 index 0000000..56802df --- /dev/null +++ b/src/runtime/win32-thread-private-events.h @@ -0,0 +1,13 @@ +#ifndef _WIN32_THREAD_PRIVATE_EVENTS_H_ +#define _WIN32_THREAD_PRIVATE_EVENTS_H_ + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include + +struct private_events { + HANDLE events[2]; +}; + +#endif /* _WIN32_THREAD_PRIVATE_EVENTS_H_ */ diff --git a/src/runtime/wrap.c b/src/runtime/wrap.c index c17b0f1..3f174f3 100644 --- a/src/runtime/wrap.c +++ b/src/runtime/wrap.c @@ -118,7 +118,7 @@ char * sb_realpath (char *path) if ((ret = calloc(MAX_PATH, sizeof(char))) == NULL) return NULL; - if (GetFullPathName(path, MAX_PATH, ret, cp) == 0) { + if (GetFullPathName(path, MAX_PATH, ret, &cp) == 0) { errnum = errno; free(ret); errno = errnum; @@ -423,10 +423,18 @@ int select(int top_fd, DWORD *read_set, DWORD *write_set, DWORD *except_set, tim * yet, however, and the closest we can easily get to a timeval is the * seconds part. So that's what we do. */ +#define UNIX_EPOCH_FILETIME 116444736000000000ULL + int gettimeofday(long *timeval, long *timezone) { - timeval[0] = time(NULL); - timeval[1] = 0; + FILETIME ft; + ULARGE_INTEGER uft; + GetSystemTimeAsFileTime(&ft); + uft.LowPart = ft.dwLowDateTime; + uft.HighPart = ft.dwHighDateTime; + uft.QuadPart -= UNIX_EPOCH_FILETIME; + timeval[0] = uft.QuadPart / 10000000; + timeval[1] = (uft.QuadPart % 10000000)/10; return 0; } diff --git a/src/runtime/x86-arch.c b/src/runtime/x86-arch.c index f85ed07..dfbf908 100644 --- a/src/runtime/x86-arch.c +++ b/src/runtime/x86-arch.c @@ -18,7 +18,6 @@ #include "os.h" #include "arch.h" #include "lispregs.h" -#include "signal.h" #include "alloc.h" #include "interrupt.h" #include "interr.h" @@ -78,7 +77,7 @@ context_eflags_addr(os_context_t *context) #elif defined __NetBSD__ return &(context->uc_mcontext.__gregs[_REG_EFL]); #elif defined LISP_FEATURE_WIN32 - return (int *)&context->EFlags; + return (int *)&context->win32_context->EFlags; #else #error unsupported OS #endif diff --git a/src/runtime/x86-arch.h b/src/runtime/x86-arch.h index 1200ef1..582bfc7 100644 --- a/src/runtime/x86-arch.h +++ b/src/runtime/x86-arch.h @@ -21,6 +21,17 @@ #define COMPILER_BARRIER \ do { __asm__ __volatile__ ( "" : : : "memory"); } while (0) +#ifdef LISP_FEATURE_WIN32 +extern int os_number_of_processors; +#define yield_on_uniprocessor() \ + do { if (os_number_of_processors<=1) SwitchToThread(); } while(0) +#else +/* Stubs are better than ifdef EVERYWHERE. */ +#define yield_on_uniprocessor() \ + do {} while(0) +#endif + + static inline void get_spinlock(volatile lispobj *word, unsigned long value) { @@ -36,13 +47,16 @@ get_spinlock(volatile lispobj *word, unsigned long value) : "r" (value), "m" (*word) : "memory", "cc"); #else + if (eax!=0) { + asm volatile("rep; nop"); + } asm volatile ("xor %0,%0\n\ lock cmpxchg %1,%2" : "=a" (eax) : "r" (value), "m" (*word) : "memory", "cc"); #endif - + yield_on_uniprocessor(); } while(eax!=0); #else *word=value; diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 4987db6..0171116 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -87,10 +87,10 @@ * |XXXXXXXX| e4e * TLS ends here> ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63 * / z z - * | ---------- - * | - * | big blob of SBCL-specific thread-local data - * | |----------------------------------------| + * | ---------- "os_address" ----. + * | | + * | big blob of SBCL-specific thread-local data | + * | |----------------------------------------| <--' * | | CONTROL, BINDING, ALIEN STACK | * | z z * ================== | |----------------------------------------| @@ -424,8 +424,13 @@ Ldone: #ifdef LISP_FEATURE_WIN32 /* Establish an SEH frame. */ #ifdef LISP_FEATURE_SB_THREAD - /* FIXME: need to save BSP here. */ -#error "need to save BSP here, but don't know how yet." + /* Save binding stack pointer */ + subl $4, %esp + pushl %eax + movl SBCL_THREAD_BASE_EA, %eax + movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax + movl %eax, 4(%esp) + popl %eax #else pushl BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET #endif @@ -451,6 +456,7 @@ LsingleValue: #ifdef LISP_FEATURE_WIN32 /* Remove our SEH frame. */ + mov %fs:0,%esp popl %fs:0 add $8, %esp #endif diff --git a/src/runtime/x86-win32-os.c b/src/runtime/x86-win32-os.c index fc076de..aaf40d7 100644 --- a/src/runtime/x86-win32-os.c +++ b/src/runtime/x86-win32-os.c @@ -22,7 +22,6 @@ #include #include -#include "./signal.h" #include "os.h" #include "arch.h" #include "globals.h" @@ -32,15 +31,14 @@ #include "sbcl.h" #include -#include +#include "runtime.h" #include #include #include #include "thread.h" /* dynamic_values_bytes */ - +#include "cpputil.h" /* PTR_ALIGN... */ #include "validate.h" -size_t os_vm_page_size; int arch_os_thread_init(struct thread *thread) { @@ -63,7 +61,14 @@ int arch_os_thread_init(struct thread *thread) fprintf(stderr, "VirtualQuery: 0x%lx.\n", GetLastError()); lose("Could not query stack memory information."); } - cur_stack_start = stack_memory.AllocationBase; + + cur_stack_start = stack_memory.AllocationBase + /* Kludge: Elide SBCL's guard page from the range. (The + * real solution is to not establish SBCL's guard page in + * the first place. The trick will be to find a good time + * at which to re-enable the Windows guard page when + * returning from it though.) */ + + os_vm_page_size; /* We use top_exception_frame rather than cur_stack_end to * elide the last few (boring) stack entries at the bottom of @@ -86,42 +91,8 @@ int arch_os_thread_init(struct thread *thread) } #ifdef LISP_FEATURE_SB_THREAD - /* this must be called from a function that has an exclusive lock - * on all_threads - */ - struct user_desc ldt_entry = { - 1, 0, 0, /* index, address, length filled in later */ - 1, MODIFY_LDT_CONTENTS_DATA, 0, 0, 0, 1 - }; - int n; - get_spinlock(&modify_ldt_lock,thread); - n=modify_ldt(0,local_ldt_copy,sizeof local_ldt_copy); - /* get next free ldt entry */ - - if(n) { - u32 *p; - for(n=0,p=local_ldt_copy;*p;p+=LDT_ENTRY_SIZE/sizeof(u32)) - n++; - } - ldt_entry.entry_number=n; - ldt_entry.base_addr=(unsigned long) thread; - ldt_entry.limit=dynamic_values_bytes; - ldt_entry.limit_in_pages=0; - if (modify_ldt (1, &ldt_entry, sizeof (ldt_entry)) != 0) { - modify_ldt_lock=0; - /* modify_ldt call failed: something magical is not happening */ - return -1; - } - __asm__ __volatile__ ("movw %w0, %%fs" : : "q" - ((n << 3) /* selector number */ - + (1 << 2) /* TI set = LDT */ - + 3)); /* privilege level */ - thread->tls_cookie=n; - modify_ldt_lock=0; - - if(n<0) return 0; + TlsSetValue(OUR_TLS_INDEX,thread); #endif - return 1; } @@ -133,51 +104,60 @@ int arch_os_thread_cleanup(struct thread *thread) { return 0; } +#if defined(LISP_FEATURE_SB_THREAD) +sigset_t *os_context_sigmask_addr(os_context_t *context) +{ + return &context->sigmask; +} +#endif + os_context_register_t * os_context_register_addr(os_context_t *context, int offset) { - switch(offset) { - case reg_EAX: return &context->Eax; - case reg_ECX: return &context->Ecx; - case reg_EDX: return &context->Edx; - case reg_EBX: return &context->Ebx; - case reg_ESP: return &context->Esp; - case reg_EBP: return &context->Ebp; - case reg_ESI: return &context->Esi; - case reg_EDI: return &context->Edi; - default: return 0; - } + static const size_t offsets[8] = { + offsetof(CONTEXT,Eax), + offsetof(CONTEXT,Ecx), + offsetof(CONTEXT,Edx), + offsetof(CONTEXT,Ebx), + offsetof(CONTEXT,Esp), + offsetof(CONTEXT,Ebp), + offsetof(CONTEXT,Esi), + offsetof(CONTEXT,Edi), + }; + return + (offset >= 0 && offset < 16) ? + ((void*)(context->win32_context)) + offsets[offset>>1] : 0; } os_context_register_t * os_context_pc_addr(os_context_t *context) { - return &context->Eip; /* REG_EIP */ + return (void*)&context->win32_context->Eip; /* REG_EIP */ } os_context_register_t * os_context_sp_addr(os_context_t *context) { - return &context->Esp; /* REG_UESP */ + return (void*)&context->win32_context->Esp; /* REG_UESP */ } os_context_register_t * os_context_fp_addr(os_context_t *context) { - return &context->Ebp; /* REG_EBP */ + return (void*)&context->win32_context->Ebp; /* REG_EBP */ } unsigned long os_context_fp_control(os_context_t *context) { - return ((((context->FloatSave.ControlWord) & 0xffff) ^ 0x3f) | - (((context->FloatSave.StatusWord) & 0xffff) << 16)); + return ((((context->win32_context->FloatSave.ControlWord) & 0xffff) ^ 0x3f) | + (((context->win32_context->FloatSave.StatusWord) & 0xffff) << 16)); } void os_restore_fp_control(os_context_t *context) { - asm ("fldcw %0" : : "m" (context->FloatSave.ControlWord)); + asm ("fldcw %0" : : "m" (context->win32_context->FloatSave.ControlWord)); } void diff --git a/src/runtime/x86-win32-os.h b/src/runtime/x86-win32-os.h index e703338..c7ffeca 100644 --- a/src/runtime/x86-win32-os.h +++ b/src/runtime/x86-win32-os.h @@ -1,7 +1,13 @@ #ifndef _X86_WIN32_OS_H #define _X86_WIN32_OS_H -typedef CONTEXT os_context_t; +typedef struct os_context_t { + CONTEXT* win32_context; +#if defined(LISP_FEATURE_SB_THREAD) + sigset_t sigmask; +#endif +} os_context_t; + typedef long os_context_register_t; static inline os_context_t *arch_os_get_context(void **void_context) @@ -9,7 +15,15 @@ static inline os_context_t *arch_os_get_context(void **void_context) return (os_context_t *) *void_context; } +static inline DWORD NT_GetLastError() { + DWORD result; + asm("movl %%fs:0x0D,%0":"=r"(result)); + return result; +} + unsigned long os_context_fp_control(os_context_t *context); void os_restore_fp_control(os_context_t *context); +os_context_register_t * os_context_fp_addr(os_context_t *context); + #endif /* _X86_WIN32_OS_H */ diff --git a/tools-for-build/grovel-headers.c b/tools-for-build/grovel-headers.c index 9746e12..6e93419 100644 --- a/tools-for-build/grovel-headers.c +++ b/tools-for-build/grovel-headers.c @@ -205,6 +205,39 @@ main(int argc, char *argv[]) DEFTYPE("uint", UINT); DEFTYPE("ulong", ULONG); + printf(";;; File Desired Access\n"); + defconstant ("FILE_GENERIC_READ", FILE_GENERIC_READ); + defconstant ("FILE_GENERIC_WRITE", FILE_GENERIC_WRITE); + defconstant ("FILE_GENERIC_EXECUTE", FILE_GENERIC_EXECUTE); + defconstant ("FILE_SHARE_READ", FILE_SHARE_READ); + defconstant ("FILE_SHARE_WRITE", FILE_SHARE_WRITE); + defconstant ("FILE_SHARE_DELETE", FILE_SHARE_DELETE); + + printf(";;; File Creation Dispositions\n"); + defconstant("CREATE_NEW", CREATE_NEW); + defconstant("CREATE_ALWAYS", CREATE_ALWAYS); + defconstant("OPEN_EXISTING", OPEN_EXISTING); + defconstant("OPEN_ALWAYS", OPEN_ALWAYS); + defconstant("TRUNCATE_EXISTING", TRUNCATE_EXISTING); + + printf(";;; Desired Access\n"); + defconstant("ACCESS_GENERIC_READ", GENERIC_READ); + defconstant("ACCESS_GENERIC_WRITE", GENERIC_WRITE); + defconstant("ACCESS_GENERIC_EXECUTE", GENERIC_EXECUTE); + defconstant("ACCESS_GENERIC_ALL", GENERIC_ALL); + defconstant("ACCESS_FILE_APPEND_DATA", FILE_APPEND_DATA); + defconstant("ACCESS_DELETE", DELETE); + + printf(";;; Handle Information Flags\n"); + defconstant("HANDLE_FLAG_INHERIT", HANDLE_FLAG_INHERIT); + defconstant("HANDLE_FLAG_PROTECT_FROM_CLOSE", HANDLE_FLAG_PROTECT_FROM_CLOSE); + + printf(";;; Standard Handle Keys\n"); + defconstant("STD_INPUT_HANDLE", STD_INPUT_HANDLE); + defconstant("STD_OUTPUT_HANDLE", STD_OUTPUT_HANDLE); + defconstant("STD_ERROR_HANDLE", STD_ERROR_HANDLE); + + /* FIXME: SB-UNIX and SB-WIN32 really need to be untangled. */ printf("(in-package \"SB!UNIX\")\n\n"); printf(";;; Unix-like constants and types on Windows\n"); @@ -216,11 +249,13 @@ main(int argc, char *argv[]) defconstant("o_append", _O_APPEND); defconstant("o_excl", _O_EXCL); defconstant("o_binary", _O_BINARY); + defconstant("o_noinherit", _O_NOINHERIT); defconstant("enoent", ENOENT); defconstant("eexist", EEXIST); defconstant("eintr", EINTR); defconstant("eagain", EAGAIN); + defconstant("ebadf", EBADF); defconstant("s-ifmt", S_IFMT); defconstant("s-ifdir", S_IFDIR);