src/runtime/linux-os.c

   1 /*
   2  * the Linux incarnation of OS-dependent routines.  See also
   3  * $(sbcl_arch)-linux-os.c
   4  *
   5  * This file (along with os.h) exports an OS-independent interface to
   6  * the operating system VM facilities. Surprise surprise, this
   7  * interface looks a lot like the Mach interface (but simpler in some
   8  * places). For some operating systems, a subset of these functions
   9  * will have to be emulated.
  10  */
  11
  12 /*
  13  * This software is part of the SBCL system. See the README file for
  14  * more information.
  15  *
  16  * This software is derived from the CMU CL system, which was
  17  * written at Carnegie Mellon University and released into the
  18  * public domain. The software is in the public domain and is
  19  * provided with absolutely no warranty. See the COPYING and CREDITS
  20  * files for more information.
  21  */
  22
  23 #include <stdio.h>
  24 #include <sys/param.h>
  25 #include <sys/file.h>
  26 #include "./signal.h"
  27 #include "os.h"
  28 #include "arch.h"
  29 #include "globals.h"
  30 #include "interrupt.h"
  31 #include "interr.h"
  32 #include "lispregs.h"
  33 #include "sbcl.h"
  34 #include <sys/socket.h>
  35 #include <sys/utsname.h>
  36
  37 #include <sys/types.h>
  38 #include <signal.h>
  39 /* #include <sys/sysinfo.h> */
  40 #include <sys/time.h>
  41 #include <sys/stat.h>
  42 #include <unistd.h>
  43
  44 #include "validate.h"
  45 size_t os_vm_page_size;
  46
  47 #if defined GENCGC
  48 #include "gencgc.h"
  49 #endif
  50 \f
  51 void os_init(void)
  52 {
  53     /* Early versions of Linux don't support the mmap(..) functionality
  54      * that we need. */
  55     {
  56         struct utsname name;
  57         int major_version;
  58         uname(&name);
  59         major_version = atoi(name.release);
  60         if (major_version < 2) {
  61             lose("linux major version=%d (can't run in version < 2.0.0)",
  62                  major_version);
  63         }
  64     }
  65
  66     os_vm_page_size = getpagesize();
  67    /* This could just as well be in arch_init(), but it's not. */
  68 #ifdef __i386__
  69     SET_FPU_CONTROL_WORD(0x1372|4|8|16|32); /* no interrupts */
  70 #endif
  71 }
  72
  73 /* various os_context_*_addr accessors moved to {x86,alpha}-linux-os.c
  74  * -dan 20010125
  75  */
  76
  77 /* In Debian CMU CL ca. 2.4.9, it was possible to get an infinite
  78  * cascade of errors from do_mmap(..). This variable is a counter to
  79  * prevent that; when it counts down to zero, an error in do_mmap
  80  * causes the low-level monitor to be called. */
  81 int n_do_mmap_ignorable_errors = 3;
  82
  83 /* Return 0 for success. */
  84 static int
  85 do_mmap(os_vm_address_t *addr, os_vm_size_t len, int flags)
  86 {
  87     /* We *must* have the memory where we expect it. */
  88     os_vm_address_t old_addr = *addr;
  89
  90     *addr = mmap(*addr, len, OS_VM_PROT_ALL, flags, -1, 0);
  91     if (*addr == MAP_FAILED ||
  92         ((old_addr != NULL) && (*addr != old_addr))) {
  93         FSHOW((stderr,
  94                "/retryable error in allocating memory from the OS\n"
  95                "(addr=0x%lx, len=0x%lx, flags=0x%lx)\n",
  96                (long) addr,
  97                (long) len,
  98                (long) flags));
  99         if (n_do_mmap_ignorable_errors > 0) {
 100             --n_do_mmap_ignorable_errors;
 101         } else {
 102             lose("too many errors in allocating memory from the OS");
 103         }
 104         perror("mmap");
 105         return 1;
 106     }
 107     return 0;
 108 }
 109
 110 os_vm_address_t
 111 os_validate(os_vm_address_t addr, os_vm_size_t len)
 112 {
 113     if (addr) {
 114         int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
 115         os_vm_address_t base_addr = addr;
 116         do {
 117             /* KLUDGE: It looks as though this code allocates memory
 118              * in chunks of size no larger than 'magic', but why? What
 119              * is the significance of 0x1000000 here? Also, can it be
 120              * right that if the first few 'do_mmap' calls succeed,
 121              * then one fails, we leave the memory allocated by the
 122              * first few in place even while we return a code for
 123              * complete failure? -- WHN 19991020
 124              *
 125              * Peter Van Eynde writes (20000211)
 126              *     This was done because the kernel would only check for
 127              *   overcommit for every allocation seperately. So if you
 128              *   had 16MB of free mem+swap you could allocate 16M. And
 129              *   again, and again, etc.
 130              *     This in [Linux] 2.X could be bad as they changed the memory
 131              *   system. A side effect was/is (I don't really know) that
 132              *   programs with a lot of memory mappings run slower. But
 133              *   of course for 2.2.2X we now have the NO_RESERVE flag that
 134              *   helps...
 135              *
 136              * FIXME: The logic is also flaky w.r.t. failed
 137              * allocations. If we make one or more successful calls to
 138              * do_mmap(..) before one fails, then we've allocated
 139              * memory, and we should ensure that it gets deallocated
 140              * sometime somehow. If this function's response to any
 141              * failed do_mmap(..) is to give up and return NULL (as in
 142              * sbcl-0.6.7), then any failed do_mmap(..) after any
 143              * successful do_mmap(..) causes a memory leak. */
 144             int magic = 0x1000000;
 145             if (len <= magic) {
 146                 if (do_mmap(&addr, len, flags)) {
 147                     return NULL;
 148                 }
 149                 len = 0;
 150             } else {
 151                 if (do_mmap(&addr, magic, flags)) {
 152                     return NULL;
 153                 }
 154                 addr += magic;
 155                 len = len - magic;
 156             }
 157         } while (len > 0);
 158         return base_addr;
 159     } else {
 160         int flags = MAP_PRIVATE | MAP_ANONYMOUS;
 161         if (do_mmap(&addr, len, flags)) {
 162             return NULL;
 163         } else {
 164             return addr;
 165         }
 166     }
 167 }
 168
 169 void
 170 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
 171 {
 172     if (munmap(addr,len) == -1) {
 173         perror("munmap");
 174     }
 175 }
 176
 177 os_vm_address_t
 178 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
 179 {
 180     addr = mmap(addr, len,
 181                 OS_VM_PROT_ALL,
 182                 MAP_PRIVATE | MAP_FILE | MAP_FIXED,
 183                 fd, (off_t) offset);
 184
 185     if(addr == MAP_FAILED) {
 186         perror("mmap");
 187         lose("unexpected mmap(..) failure");
 188     }
 189
 190     return addr;
 191 }
 192
 193 void
 194 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
 195 {
 196     if (mprotect(address, length, prot) == -1) {
 197         perror("mprotect");
 198     }
 199 }
 200 \f
 201 /* FIXME: Now that FOO_END, rather than FOO_SIZE, is the fundamental
 202  * description of a space, we could probably punt this and just do
 203  * (FOO_START <= x && x < FOO_END) everywhere it's called. */
 204 static boolean
 205 in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
 206 {
 207     char* beg = (char*)((long)sbeg);
 208     char* end = (char*)((long)sbeg) + slen;
 209     char* adr = (char*)a;
 210     return (adr >= beg && adr < end);
 211 }
 212
 213 boolean
 214 is_valid_lisp_addr(os_vm_address_t addr)
 215 {
 216     return
 217         in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE) ||
 218         in_range_p(addr, STATIC_SPACE_START   , STATIC_SPACE_SIZE) ||
 219         in_range_p(addr, DYNAMIC_SPACE_START  , DYNAMIC_SPACE_SIZE) ||
 220         in_range_p(addr, CONTROL_STACK_START  , CONTROL_STACK_SIZE) ||
 221         in_range_p(addr, BINDING_STACK_START  , BINDING_STACK_SIZE);
 222 }
 223 \f
 224 /*
 225  * any OS-dependent special low-level handling for signals
 226  */
 227
 228 #if defined GENCGC
 229
 230 /*
 231  * The GENCGC needs to be hooked into whatever signal is raised for
 232  * page fault on this OS.
 233  */
 234 void
 235 sigsegv_handler(int signal, siginfo_t *info, void* void_context)
 236 {
 237     os_context_t *context = (os_context_t*)void_context;
 238     void* fault_addr = (void*)context->uc_mcontext.cr2;
 239     if (!gencgc_handle_wp_violation(fault_addr)) {
 240         interrupt_handle_now(signal, info, void_context);
 241     }
 242 }
 243
 244 #else
 245
 246 static void
 247 sigsegv_handler(int signal, siginfo_t *info, void* void_context)
 248 {
 249     os_context_t *context = (os_context_t*)void_context;
 250     os_vm_address_t addr;
 251
 252 #ifdef __i386__
 253     interrupt_handle_now(signal,contextstruct);
 254 #else
 255     char *control_stack_top = (char*)CONTROL_STACK_START + CONTROL_STACK_SIZE;
 256
 257     addr = arch_get_bad_addr(signal,info,context);
 258
 259     if(addr != NULL &&
 260        *os_context_register_addr(context,reg_ALLOC) & (1L<<63)){
 261         /* This is the end of a pseudo-atomic section during which
 262          * a signal was received.  We must deal with the pending interrupt
 263          * (see also interrupt.c, ../code/interrupt.lisp)
 264          */
 265
 266         /* (how we got here: when interrupting, we set bit 63 in
 267          * reg_Alloc.  At the end of the atomic section we tried to
 268          * write to reg_Alloc, got a SIGSEGV (there's nothing mapped
 269          * there) so ended up here
 270          */
 271         *os_context_register_addr(context,reg_ALLOC) -= (1L<<63);
 272         interrupt_handle_pending(context);
 273     } else if (addr > control_stack_top && addr < BINDING_STACK_START) {
 274         fprintf(stderr,
 275                 "Possible stack overflow at 0x%016lX:\n"
 276                 "control_stack_top=%lx, BINDING_STACK_START=%lx\n",
 277                 addr,
 278                 control_stack_top,
 279                 BINDING_STACK_START);
 280         /* Try to fix control frame pointer. */
 281         while ( ! (CONTROL_STACK_START <= *current_control_frame_pointer &&
 282                    *current_control_frame_pointer <= control_stack_top))
 283             ((char*)current_control_frame_pointer) -= sizeof(lispobj);
 284         ldb_monitor();
 285     } else if (!interrupt_maybe_gc(signal, info, context)) {
 286         interrupt_handle_now(signal, info, context);
 287     }
 288 #endif
 289 }
 290 #endif
 291
 292 void
 293 os_install_interrupt_handlers(void)
 294 {
 295     undoably_install_low_level_interrupt_handler(SIGSEGV, sigsegv_handler);
 296 }
 297