#include <stdio.h>
#include <sys/param.h>
#include <sys/file.h>
+#include "sbcl.h"
#include "./signal.h"
#include "os.h"
#include "arch.h"
#include "interrupt.h"
#include "interr.h"
#include "lispregs.h"
-#include "sbcl.h"
+#include "runtime.h"
+#include "genesis/static-symbols.h"
+#include "genesis/fdefn.h"
+
#include <sys/socket.h>
#include <sys/utsname.h>
+#include <errno.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <unistd.h>
+#include <linux/version.h>
#include "validate.h"
+#include "thread.h"
+#include "gc.h"
+#if defined LISP_FEATURE_GENCGC
+#include "gencgc-internal.h"
+#else
+#include "cheneygc-internal.h"
+#endif
+
+#ifdef LISP_FEATURE_X86
+/* Prototype for personality(2). Done inline here since the header file
+ * for this isn't available on old versions of glibc. */
+int personality (unsigned long);
+#endif
+
size_t os_vm_page_size;
-#if defined GENCGC
-#include "gencgc.h"
+#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_SB_LUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+
+/* values taken from the kernel's linux/futex.h. This header file
+ doesn't exist in userspace, which is our excuse for not grovelling
+ them automatically */
+#define FUTEX_WAIT (0)
+#define FUTEX_WAKE (1)
+#define FUTEX_FD (2)
+#define FUTEX_REQUEUE (3)
+
+#define sys_futex sbcl_sys_futex
+static inline int sys_futex (void *futex, int op, int val, struct timespec *rel)
+{
+ return syscall (SYS_futex, futex, op, val, rel);
+}
+
+int
+futex_wait(int *lock_word, int oldval, long sec, unsigned long usec)
+{
+ struct timespec timeout;
+ int t;
+
+ again:
+ if (sec<0) {
+ t = sys_futex(lock_word,FUTEX_WAIT,oldval, 0);
+ }
+ else {
+ timeout.tv_sec = sec;
+ timeout.tv_nsec = usec * 1000;
+ t = sys_futex(lock_word,FUTEX_WAIT,oldval, &timeout);
+ }
+ if (t==0)
+ return 0;
+ else if (errno==ETIMEDOUT)
+ return 1;
+ else if (errno==EINTR)
+ /* spurious wakeup from interrupt */
+ goto again;
+ else
+ /* EWOULDBLOCK and others, need to check the lock */
+ return -1;
+}
+
+int
+futex_wake(int *lock_word, int n)
+{
+ return sys_futex(lock_word,FUTEX_WAKE,n,0);
+}
#endif
+
\f
+int linux_sparc_siginfo_bug = 0;
-#ifdef sparc
-int early_kernel = 0;
+/* This variable was in real use for a few months, basically for
+ * storing autodetected information about whether the Linux
+ * installation was recent enough to support SBCL threads, and make
+ * some run-time decisions based on that. But this turned out to be
+ * unstable, so now we just flat-out refuse to start on the old installations
+ * when thread support has been compiled in.
+ *
+ * Unfortunately, in the meanwhile Slime started depending on this
+ * variable for deciding which communication style to use. So even
+ * though this variable looks unused, it shouldn't be deleted until
+ * it's no longer used in the versions of Slime that people are
+ * likely to download first. -- JES, 2006-06-07
+ */
+int linux_no_threads_p = 0;
+
+#ifdef LISP_FEATURE_SB_THREAD
+int
+isnptl (void)
+{
+ size_t n = confstr (_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
+ if (n > 0) {
+ char *buf = alloca (n);
+ confstr (_CS_GNU_LIBPTHREAD_VERSION, buf, n);
+ if (strstr (buf, "NPTL")) {
+ return 1;
+ }
+ }
+ return 0;
+}
#endif
-void os_init(void)
+
+void
+os_init(char *argv[], char *envp[])
{
- /* Early versions of Linux don't support the mmap(..) functionality
- * that we need. */
- {
- struct utsname name;
- int major_version;
-#ifdef sparc
- int minor_version;
+ /* Conduct various version checks: do we have enough mmap(), is
+ * this a sparc running 2.2, can we do threads? */
+#if defined(LISP_FEATURE_SB_THREAD) && !defined(LISP_FEATURE_SB_LUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
+ int *futex=0;
+#endif
+ struct utsname name;
+ int major_version;
+ int minor_version;
+ int patch_version;
+ char *p;
+ uname(&name);
+ p=name.release;
+ major_version = atoi(p);
+ p=strchr(p,'.')+1;
+ minor_version = atoi(p);
+ p=strchr(p,'.')+1;
+ patch_version = atoi(p);
+ if (major_version<2) {
+ lose("linux kernel version too old: major version=%d (can't run in version < 2.0.0)\n",
+ major_version);
+ }
+ if (!(major_version>2 || minor_version >= 4)) {
+#ifdef LISP_FEATURE_SPARC
+ FSHOW((stderr,"linux kernel %d.%d predates 2.4;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", major_version,minor_version));
+ linux_sparc_siginfo_bug = 1;
#endif
- uname(&name);
- major_version = atoi(name.release);
- if (major_version < 2) {
- lose("linux major version=%d (can't run in version < 2.0.0)",
- major_version);
- }
-#ifdef sparc
- /* KLUDGE: This will break if Linux moves to a uname() version number
- * that has more than one digit initially -- CSR, 2002-02-12 */
- minor_version = atoi(name.release+2);
- if (minor_version < 4) {
- fprintf(stderr,"linux minor version=%d;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", minor_version);
- early_kernel = 1;
- }
+ }
+#ifdef LISP_FEATURE_SB_THREAD
+#if !defined(LISP_FEATURE_SB_LUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
+ futex_wait(futex,-1,-1,0);
+ if(errno==ENOSYS) {
+ lose("This version of SBCL is compiled with threading support, but your kernel\n"
+ "is too old to support this. Please use a more recent kernel or\n"
+ "a version of SBCL without threading support.\n");
+ }
#endif
+ if(! isnptl()) {
+ lose("This version of SBCL only works correctly with the NPTL threading\n"
+ "library. Please use a newer glibc, use an older SBCL, or stop using\n"
+ "LD_ASSUME_KERNEL\n");
}
+#endif
- os_vm_page_size = getpagesize();
- /* This could just as well be in arch_init(), but it's not. */
-#ifdef __i386__
- SET_FPU_CONTROL_WORD(0x1372|4|8|16|32); /* no interrupts */
+ /* Don't use getpagesize(), since it's not constant across Linux
+ * kernel versions on some architectures (for example PPC). FIXME:
+ * possibly the same should be done on other architectures too.
+ */
+ os_vm_page_size = BACKEND_PAGE_BYTES;
+
+ /* KLUDGE: Disable memory randomization on new Linux kernels
+ * by setting a personality flag and re-executing. (We need
+ * to re-execute, since the memory maps that can conflict with
+ * the SBCL spaces have already been done at this point).
+ *
+ * Since randomization is currently implemented only on x86 kernels,
+ * don't do this trick on other platforms.
+ */
+#ifdef LISP_FEATURE_X86
+ if ((major_version == 2
+ /* Some old kernels will apparently lose unsupported personality flags
+ * on exec() */
+ && ((minor_version == 6 && patch_version >= 11)
+ || (minor_version > 6)
+ /* This is what RHEL 3 reports */
+ || (minor_version == 4 && patch_version > 20)))
+ || major_version >= 3)
+ {
+ int pers = personality(0xffffffffUL);
+ /* 0x40000 aka. ADDR_NO_RANDOMIZE */
+ if (!(pers & 0x40000)) {
+ int retval = personality(pers | 0x40000);
+ /* Allegedly some Linux kernels (the reported case was
+ * "hardened Linux 2.6.7") won't set the new personality,
+ * but nor will they return -1 for an error. So as a
+ * workaround query the new personality...
+ */
+ int newpers = personality(0xffffffffUL);
+ /* ... and don't re-execute if either the setting resulted
+ * in an error or if the value didn't change. Otherwise
+ * this might result in an infinite loop.
+ */
+ if (retval != -1 && newpers != pers) {
+ /* Use /proc/self/exe instead of trying to figure out
+ * the executable path from PATH and argv[0], since
+ * that's unreliable. We follow the symlink instead of
+ * executing the file directly in order to prevent top
+ * from displaying the name of the process as "exe". */
+ char runtime[PATH_MAX+1];
+ int i = readlink("/proc/self/exe", runtime, PATH_MAX);
+ if (i != -1) {
+ runtime[i] = '\0';
+ execve(runtime, argv, envp);
+ }
+ }
+ /* Either changing the personality or execve() failed. Either
+ * way we might as well continue, and hope that the random
+ * memory maps are ok this time around.
+ */
+ fprintf(stderr, "WARNING: Couldn't re-execute SBCL with the proper personality flags (maybe /proc isn't mounted?). Trying to continue anyway.\n");
+ }
+ }
+ /* Use SSE detector. Recent versions of Linux enable SSE support
+ * on SSE capable CPUs. */
+ /* FIXME: Are there any old versions that does not support SSE? */
+ fast_bzero_pointer = fast_bzero_detect;
#endif
}
-/* various os_context_*_addr accessors moved to {x86,alpha}-linux-os.c
- * -dan 20010125
- */
-/* In Debian CMU CL ca. 2.4.9, it was possible to get an infinite
- * cascade of errors from do_mmap(..). This variable is a counter to
- * prevent that; when it counts down to zero, an error in do_mmap
- * causes the low-level monitor to be called. */
-int n_do_mmap_ignorable_errors = 3;
+#ifdef LISP_FEATURE_ALPHA
+/* The Alpha is a 64 bit CPU. SBCL is a 32 bit application. Due to all
+ * the places that assume we can get a pointer into a fixnum with no
+ * information loss, we have to make sure it allocates all its ram in the
+ * 0-2Gb region. */
-/* Return 0 for success. */
-static int
-do_mmap(os_vm_address_t *addr, os_vm_size_t len, int flags)
-{
- /* We *must* have the memory where we expect it. */
- os_vm_address_t old_addr = *addr;
-
- *addr = mmap(*addr, len, OS_VM_PROT_ALL, flags, -1, 0);
- if (*addr == MAP_FAILED ||
- ((old_addr != NULL) && (*addr != old_addr))) {
- FSHOW((stderr,
- "/retryable error in allocating memory from the OS\n"
- "(addr=0x%lx, len=0x%lx, flags=0x%lx)\n",
- (long) addr,
- (long) len,
- (long) flags));
- if (n_do_mmap_ignorable_errors > 0) {
- --n_do_mmap_ignorable_errors;
- } else {
- lose("too many errors in allocating memory from the OS");
- }
- perror("mmap");
- return 1;
- }
- return 0;
-}
+static void * under_2gb_free_pointer=DYNAMIC_1_SPACE_END;
+#endif
os_vm_address_t
os_validate(os_vm_address_t addr, os_vm_size_t len)
{
- if (addr) {
- int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
- os_vm_address_t base_addr = addr;
- do {
- /* KLUDGE: It looks as though this code allocates memory
- * in chunks of size no larger than 'magic', but why? What
- * is the significance of 0x1000000 here? Also, can it be
- * right that if the first few 'do_mmap' calls succeed,
- * then one fails, we leave the memory allocated by the
- * first few in place even while we return a code for
- * complete failure? -- WHN 19991020
- *
- * Peter Van Eynde writes (20000211)
- * This was done because the kernel would only check for
- * overcommit for every allocation seperately. So if you
- * had 16MB of free mem+swap you could allocate 16M. And
- * again, and again, etc.
- * This in [Linux] 2.X could be bad as they changed the memory
- * system. A side effect was/is (I don't really know) that
- * programs with a lot of memory mappings run slower. But
- * of course for 2.2.2X we now have the NO_RESERVE flag that
- * helps...
- *
- * FIXME: The logic is also flaky w.r.t. failed
- * allocations. If we make one or more successful calls to
- * do_mmap(..) before one fails, then we've allocated
- * memory, and we should ensure that it gets deallocated
- * sometime somehow. If this function's response to any
- * failed do_mmap(..) is to give up and return NULL (as in
- * sbcl-0.6.7), then any failed do_mmap(..) after any
- * successful do_mmap(..) causes a memory leak. */
- int magic = 0x1000000;
- if (len <= magic) {
- if (do_mmap(&addr, len, flags)) {
- return NULL;
- }
- len = 0;
- } else {
- if (do_mmap(&addr, magic, flags)) {
- return NULL;
- }
- addr += magic;
- len = len - magic;
- }
- } while (len > 0);
- return base_addr;
- } else {
- int flags = MAP_PRIVATE | MAP_ANONYMOUS;
- if (do_mmap(&addr, len, flags)) {
- return NULL;
- } else {
- return addr;
- }
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ os_vm_address_t actual;
+
+#ifdef LISP_FEATURE_ALPHA
+ if (!addr) {
+ addr=under_2gb_free_pointer;
}
+#endif
+ actual = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
+ if (actual == MAP_FAILED) {
+ perror("mmap");
+ return 0; /* caller should check this */
+ }
+
+ if (addr && (addr!=actual)) {
+ fprintf(stderr, "mmap: wanted %lu bytes at %p, actually mapped at %p\n",
+ (unsigned long) len, addr, actual);
+ return 0;
+ }
+
+#ifdef LISP_FEATURE_ALPHA
+
+ len=(len+(os_vm_page_size-1))&(~(os_vm_page_size-1));
+ under_2gb_free_pointer+=len;
+#endif
+
+ return actual;
}
void
os_invalidate(os_vm_address_t addr, os_vm_size_t len)
{
if (munmap(addr,len) == -1) {
- perror("munmap");
+ perror("munmap");
}
}
os_vm_address_t
os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
{
- addr = mmap(addr, len,
- OS_VM_PROT_ALL,
- MAP_PRIVATE | MAP_FILE | MAP_FIXED,
- fd, (off_t) offset);
-
- if (addr == MAP_FAILED) {
- perror("mmap");
- lose("unexpected mmap(..) failure");
+ os_vm_address_t actual;
+
+ actual = mmap(addr, len, OS_VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
+ fd, (off_t) offset);
+ if (actual == MAP_FAILED || (addr && (addr != actual))) {
+ perror("mmap");
+ lose("unexpected mmap(..) failure\n");
}
- return addr;
+ return actual;
}
void
os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
{
if (mprotect(address, length, prot) == -1) {
- perror("mprotect");
+ if (errno == ENOMEM) {
+ lose("An mprotect call failed with ENOMEM. This probably means that the maximum amount\n"
+ "of separate memory mappings was exceeded. To fix the problem, either increase\n"
+ "the maximum with e.g. 'echo 262144 > /proc/sys/vm/max_map_count' or recompile\n"
+ "SBCL with a larger value for GENCGC-PAGE-SIZE in 'src/target/parms.lisp'.");
+ } else {
+ perror("mprotect");
+ }
}
}
\f
-/* FIXME: Now that FOO_END, rather than FOO_SIZE, is the fundamental
- * description of a space, we could probably punt this and just do
- * (FOO_START <= x && x < FOO_END) everywhere it's called. */
-static boolean
-in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
-{
- char* beg = (char*)((long)sbeg);
- char* end = (char*)((long)sbeg) + slen;
- char* adr = (char*)a;
- return (adr >= beg && adr < end);
-}
-
boolean
is_valid_lisp_addr(os_vm_address_t addr)
{
- return
- in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE) ||
- in_range_p(addr, STATIC_SPACE_START , STATIC_SPACE_SIZE) ||
- in_range_p(addr, DYNAMIC_SPACE_START , DYNAMIC_SPACE_SIZE) ||
- in_range_p(addr, CONTROL_STACK_START , CONTROL_STACK_SIZE) ||
- in_range_p(addr, BINDING_STACK_START , BINDING_STACK_SIZE);
+ struct thread *th;
+ size_t ad = (size_t) addr;
+
+ if ((READ_ONLY_SPACE_START <= ad && ad < READ_ONLY_SPACE_END)
+ || (STATIC_SPACE_START <= ad && ad < STATIC_SPACE_END)
+#if defined LISP_FEATURE_GENCGC
+ || (DYNAMIC_SPACE_START <= ad && ad < DYNAMIC_SPACE_END)
+#else
+ || (DYNAMIC_0_SPACE_START <= ad && ad < DYNAMIC_0_SPACE_END)
+ || (DYNAMIC_1_SPACE_START <= ad && ad < DYNAMIC_1_SPACE_END)
+#endif
+ )
+ return 1;
+ for_each_thread(th) {
+ if((size_t)(th->control_stack_start) <= ad
+ && ad < (size_t)(th->control_stack_end))
+ return 1;
+ if((size_t)(th->binding_stack_start) <= ad
+ && ad < (size_t)(th->binding_stack_start + BINDING_STACK_SIZE))
+ return 1;
+ }
+ return 0;
}
\f
/*
* any OS-dependent special low-level handling for signals
*/
-#if defined GENCGC
-
/*
- * The GENCGC needs to be hooked into whatever signal is raised for
+ * The GC needs to be hooked into whatever signal is raised for
* page fault on this OS.
*/
-void
+static void
sigsegv_handler(int signal, siginfo_t *info, void* void_context)
{
os_context_t *context = arch_os_get_context(&void_context);
- void* fault_addr = (void*)context->uc_mcontext.cr2;
- if (!gencgc_handle_wp_violation(fault_addr)) {
- interrupt_handle_now(signal, info, void_context);
- }
-}
+ os_vm_address_t addr = arch_get_bad_addr(signal, info, context);
-#else
+#ifdef LISP_FEATURE_ALPHA
+ /* Alpha stuff: This is the end of a pseudo-atomic section during
+ which a signal was received. We must deal with the pending
+ interrupt (see also interrupt.c, ../code/interrupt.lisp)
-static void
-sigsegv_handler(int signal, siginfo_t *info, void* void_context)
-{
- os_context_t *context = arch_os_get_context(&void_context);
- os_vm_address_t addr;
+ (how we got here: when interrupting, we set bit 63 in reg_ALLOC.
+ At the end of the atomic section we tried to write to reg_ALLOC,
+ got a SIGSEGV (there's nothing mapped there) so ended up here. */
+ if (addr != NULL &&
+ *os_context_register_addr(context, reg_ALLOC) & (1L<<63)) {
+ *os_context_register_addr(context, reg_ALLOC) -= (1L<<63);
+ interrupt_handle_pending(context);
+ return;
+ }
+#endif
-#ifdef __i386__
- interrupt_handle_now(signal,contextstruct);
+#ifdef LISP_FEATURE_GENCGC
+ if (!gencgc_handle_wp_violation(addr))
#else
- char *control_stack_top = (char*)CONTROL_STACK_START + CONTROL_STACK_SIZE;
-
- addr = arch_get_bad_addr(signal,info,context);
-
- if (addr != NULL &&
- *os_context_register_addr(context,reg_ALLOC) & (1L<<63)){
- /* This is the end of a pseudo-atomic section during which
- * a signal was received. We must deal with the pending interrupt
- * (see also interrupt.c, ../code/interrupt.lisp)
- */
-
- /* (how we got here: when interrupting, we set bit 63 in
- * reg_Alloc. At the end of the atomic section we tried to
- * write to reg_Alloc, got a SIGSEGV (there's nothing mapped
- * there) so ended up here
- */
- *os_context_register_addr(context,reg_ALLOC) -= (1L<<63);
- interrupt_handle_pending(context);
- } else if (addr > control_stack_top && addr < BINDING_STACK_START) {
- fprintf(stderr,
- "Possible stack overflow at 0x%016lX:\n"
- "control_stack_top=%lx, BINDING_STACK_START=%lx\n",
- addr,
- control_stack_top,
- BINDING_STACK_START);
- /* Try to fix control frame pointer. */
- while ( ! (CONTROL_STACK_START <= *current_control_frame_pointer &&
- *current_control_frame_pointer <= control_stack_top))
- ((char*)current_control_frame_pointer) -= sizeof(lispobj);
- monitor_or_something();
- } else if (!interrupt_maybe_gc(signal, info, context)) {
- interrupt_handle_now(signal, info, context);
- }
+ if (!cheneygc_handle_wp_violation(context, addr))
#endif
-}
+ if (!handle_guard_page_triggered(context, addr))
+#ifdef LISP_FEATURE_C_STACK_IS_CONTROL_STACK
+ lisp_memory_fault_error(context, addr);
+#else
+ interrupt_handle_now(signal, info, context);
#endif
+}
void
os_install_interrupt_handlers(void)
{
- undoably_install_low_level_interrupt_handler(SIGSEGV, sigsegv_handler);
+ undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
+ sigsegv_handler);
+#ifdef LISP_FEATURE_SB_THREAD
+ undoably_install_low_level_interrupt_handler(SIG_INTERRUPT_THREAD,
+ interrupt_thread_handler);
+ undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
+ sig_stop_for_gc_handler);
+#endif
}
+char *
+os_get_runtime_executable_path()
+{
+ char path[PATH_MAX + 1];
+ int size;
+
+ size = readlink("/proc/self/exe", path, sizeof(path)-1);
+ if (size < 0)
+ return NULL;
+ else
+ path[size] = '\0';
+
+ return copied_string(path);
+}