From: NIIMI Satoshi Date: Tue, 18 Sep 2007 06:43:42 +0000 (+0000) Subject: 1.0.9.62: Performance and stability improvement of threading on FreeBSD X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=9a19ce460a70a6c1de36095b3e2621116b91cc80;p=sbcl.git 1.0.9.62: Performance and stability improvement of threading on FreeBSD * Use GCC's Thread-Local Storage to store current thread. (Tested on Linux x86 and x86-64, too) * Restore lisp level TLS segment register at interrupt handler. --- diff --git a/make-config.sh b/make-config.sh index 605b352..020af64 100644 --- a/make-config.sh +++ b/make-config.sh @@ -198,8 +198,9 @@ case "$sbcl_os" in printf ' :elf' >> $ltf printf ' :freebsd' >> $ltf printf ' :sb-pthread-futex' >> $ltf + printf ' :gcc-tls' >> $ltf if [ $sbcl_arch = "x86" ]; then - printf ' :restore-tls-segment-register-from-tls' >> $ltf + printf ' :restore-tls-segment-register-from-context' >> $ltf fi link_or_copy Config.$sbcl_arch-freebsd Config ;; diff --git a/src/runtime/globals.c b/src/runtime/globals.c index 2b397cb..07d56b5 100644 --- a/src/runtime/globals.c +++ b/src/runtime/globals.c @@ -52,7 +52,7 @@ boolean stop_the_world=0; * is done). For the GENCGC, it always points to DYNAMIC_SPACE_START. */ lispobj *current_dynamic_space; -#if defined(LISP_FEATURE_SB_THREAD) +#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_GCC_TLS) pthread_key_t specials=0; #endif @@ -71,7 +71,7 @@ void globals_init(void) foreign_function_call_active = 1; #endif -#ifdef LISP_FEATURE_SB_THREAD +#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_GCC_TLS) pthread_key_create(&specials,0); #endif } diff --git a/src/runtime/globals.h b/src/runtime/globals.h index de4b61f..9dca797 100644 --- a/src/runtime/globals.h +++ b/src/runtime/globals.h @@ -41,7 +41,7 @@ extern size_t dynamic_space_size; #endif extern char **ENVIRON; -#if defined(LISP_FEATURE_SB_THREAD) +#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_GCC_TLS) extern pthread_key_t specials; #endif diff --git a/src/runtime/thread.c b/src/runtime/thread.c index 650d515..5b8ec4a 100644 --- a/src/runtime/thread.c +++ b/src/runtime/thread.c @@ -91,6 +91,9 @@ pthread_mutex_t all_threads_lock = PTHREAD_MUTEX_INITIALIZER; #ifdef LOCK_CREATE_THREAD static pthread_mutex_t create_thread_lock = PTHREAD_MUTEX_INITIALIZER; #endif +#ifdef LISP_FEATURE_GCC_TLS +__thread struct thread *current_thread; +#endif #endif #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64) diff --git a/src/runtime/thread.h b/src/runtime/thread.h index 5d5eba9..ca50539 100644 --- a/src/runtime/thread.h +++ b/src/runtime/thread.h @@ -109,6 +109,10 @@ os_context_t *get_interrupt_context_for_thread(struct thread *th) [fixnum_value(SymbolValue(FREE_INTERRUPT_CONTEXT_INDEX,th)-1)]; } +#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_GCC_TLS) +extern __thread struct thread *current_thread; +#endif + /* This is clearly per-arch and possibly even per-OS code, but we can't * put it somewhere sensible like x86-linux-os.c because it needs too * much stuff like struct thread and all_threads to be defined, which @@ -127,8 +131,13 @@ static inline struct thread *arch_os_get_current_thread(void) sel.rpl = USER_PRIV; sel.ti = SEL_LDT; __asm__ __volatile__ ("movw %w0, %%fs" : : "r"(sel)); -#elif defined(LISP_FEATURE_FREEBSD) && defined(LISP_FEATURE_RESTORE_TLS_SEGMENT_REGISTER_FROM_TLS) +#elif defined(LISP_FEATURE_FREEBSD) +#ifdef LISP_FEATURE_GCC_TLS + struct thread *th = current_thread; +#else struct thread *th = pthread_getspecific(specials); +#endif +#ifdef LISP_FEATURE_RESTORE_TLS_SEGMENT_REGISTER_FROM_TLS unsigned int sel = LSEL(th->tls_cookie, SEL_UPL); unsigned int fs = rfs(); @@ -136,6 +145,7 @@ static inline struct thread *arch_os_get_current_thread(void) * causes privilege checking and it takes long time. */ if (fs != sel) load_fs(sel); +#endif return th; #endif __asm__ __volatile__ ("movl %%fs:%c1,%0" : "=r" (me) @@ -143,7 +153,11 @@ static inline struct thread *arch_os_get_current_thread(void) } return me; #else +#ifdef LISP_FEATURE_GCC_TLS + return current_thread; +#else return pthread_getspecific(specials); +#endif #endif /* x86 */ #else return all_threads; diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index 8cfd582..42d21da 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -182,6 +182,10 @@ Lstack: push %rsi # push %rdx # #ifdef LISP_FEATURE_SB_THREAD +#ifdef LISP_FEATURE_GCC_TLS + movq %fs:0, %rax + movq GNAME(current_thread)@TPOFF(%rax), %r12 +#else #ifdef LISP_FEATURE_DARWIN mov GSYM(GNAME(specials)),%rdi #else @@ -190,6 +194,7 @@ Lstack: call GNAME(pthread_getspecific) mov %rax,%r12 #endif +#endif pop %rcx # num args pop %rbx # arg vector pop %rax # function ptr/lexenv diff --git a/src/runtime/x86-64-bsd-os.c b/src/runtime/x86-64-bsd-os.c index c1e3c32..5c29b22 100644 --- a/src/runtime/x86-64-bsd-os.c +++ b/src/runtime/x86-64-bsd-os.c @@ -89,8 +89,12 @@ os_flush_icache(os_vm_address_t address, os_vm_size_t length) int arch_os_thread_init(struct thread *thread) { stack_t sigstack; #ifdef LISP_FEATURE_SB_THREAD +#ifdef LISP_FEATURE_GCC_TLS + current_thread = thread; +#else pthread_setspecific(specials,thread); #endif +#endif #ifdef LISP_FEATURE_MACH_EXCEPTION_HANDLER mach_thread_init(THREAD_STRUCT_TO_EXCEPTION_PORT(thread)); diff --git a/src/runtime/x86-64-linux-os.c b/src/runtime/x86-64-linux-os.c index d6b1cb9..12d4266 100644 --- a/src/runtime/x86-64-linux-os.c +++ b/src/runtime/x86-64-linux-os.c @@ -56,8 +56,12 @@ size_t os_vm_page_size; int arch_os_thread_init(struct thread *thread) { stack_t sigstack; #ifdef LISP_FEATURE_SB_THREAD +#ifdef LISP_FEATURE_GCC_TLS + current_thread = thread; +#else pthread_setspecific(specials,thread); #endif +#endif #ifdef LISP_FEATURE_C_STACK_IS_CONTROL_STACK /* Signal handlers are run on the control stack, so if it is exhausted * we had better use an alternate stack for whatever signal tells us diff --git a/src/runtime/x86-bsd-os.c b/src/runtime/x86-bsd-os.c index 00a7ca6..d493800 100644 --- a/src/runtime/x86-bsd-os.c +++ b/src/runtime/x86-bsd-os.c @@ -169,8 +169,12 @@ int arch_os_thread_init(struct thread *thread) { load_fs(sel); thread->tls_cookie=n; +#ifdef LISP_FEATURE_GCC_TLS + current_thread = thread; +#else pthread_setspecific(specials,thread); #endif +#endif #ifdef LISP_FEATURE_C_STACK_IS_CONTROL_STACK stack_t sigstack; @@ -207,6 +211,14 @@ int arch_os_thread_cleanup(struct thread *thread) { #endif /* !LISP_FEATURE_DARWIN */ #if defined(LISP_FEATURE_FREEBSD) +#if defined(LISP_FEATURE_RESTORE_TLS_SEGMENT_REGISTER_FROM_CONTEXT) +void +os_restore_tls_segment_register(os_context_t *context) +{ + load_fs(context->uc_mcontext.mc_fs); +} +#endif + void os_restore_fp_control(os_context_t *context) { @@ -217,5 +229,11 @@ os_restore_fp_control(os_context_t *context) struct envxmm *ex = (struct envxmm*)(&context->uc_mcontext.mc_fpstate); asm ("fldcw %0" : : "m" (ex->en_cw)); #endif +#if defined(LISP_FEATURE_RESTORE_TLS_SEGMENT_REGISTER_FROM_CONTEXT) + /* Calling this function here may not be good idea. Or rename + * function name os_restore_fp_control to os_restore_context or + * so, to match the behavior? */ + os_restore_tls_segment_register(context); +#endif } #endif diff --git a/src/runtime/x86-linux-os.c b/src/runtime/x86-linux-os.c index d68e964..c15665d 100644 --- a/src/runtime/x86-linux-os.c +++ b/src/runtime/x86-linux-os.c @@ -107,8 +107,12 @@ int arch_os_thread_init(struct thread *thread) { pthread_mutex_unlock(&modify_ldt_lock); if(n<0) return 0; +#ifdef LISP_FEATURE_GCC_TLS + current_thread = thread; +#else pthread_setspecific(specials,thread); #endif +#endif #ifdef LISP_FEATURE_C_STACK_IS_CONTROL_STACK /* Signal handlers are run on the control stack, so if it is exhausted * we had better use an alternate stack for whatever signal tells us diff --git a/version.lisp-expr b/version.lisp-expr index 18f95b3..80aeb76 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.9.61" +"1.0.9.62"