2 * the Linux incarnation of OS-dependent routines. See also
3 * $(sbcl_arch)-linux-os.c
5 * This file (along with os.h) exports an OS-independent interface to
6 * the operating system VM facilities. Surprise surprise, this
7 * interface looks a lot like the Mach interface (but simpler in some
8 * places). For some operating systems, a subset of these functions
9 * will have to be emulated.
13 * This software is part of the SBCL system. See the README file for
16 * This software is derived from the CMU CL system, which was
17 * written at Carnegie Mellon University and released into the
18 * public domain. The software is in the public domain and is
19 * provided with absolutely no warranty. See the COPYING and CREDITS
20 * files for more information.
24 #include <sys/param.h>
30 #include "interrupt.h"
34 #include <sys/socket.h>
35 #include <sys/utsname.h>
37 #include <sys/types.h>
39 /* #include <sys/sysinfo.h> */
45 size_t os_vm_page_size;
53 /* Early versions of Linux don't support the mmap(..) functionality
59 major_version = atoi(name.release);
60 if (major_version < 2) {
61 lose("linux major version=%d (can't run in version < 2.0.0)",
66 os_vm_page_size = getpagesize();
67 /* This could just as well be in arch_init(), but it's not. */
69 SET_FPU_CONTROL_WORD(0x1372|4|8|16|32); /* no interrupts */
73 /* various os_context_*_addr accessors moved to {x86,alpha}-linux-os.c
77 /* In Debian CMU CL ca. 2.4.9, it was possible to get an infinite
78 * cascade of errors from do_mmap(..). This variable is a counter to
79 * prevent that; when it counts down to zero, an error in do_mmap
80 * causes the low-level monitor to be called. */
81 int n_do_mmap_ignorable_errors = 3;
83 /* Return 0 for success. */
85 do_mmap(os_vm_address_t *addr, os_vm_size_t len, int flags)
87 /* We *must* have the memory where we expect it. */
88 os_vm_address_t old_addr = *addr;
90 *addr = mmap(*addr, len, OS_VM_PROT_ALL, flags, -1, 0);
91 if (*addr == MAP_FAILED ||
92 ((old_addr != NULL) && (*addr != old_addr))) {
94 "/retryable error in allocating memory from the OS\n"
95 "(addr=0x%lx, len=0x%lx, flags=0x%lx)\n",
99 if (n_do_mmap_ignorable_errors > 0) {
100 --n_do_mmap_ignorable_errors;
102 lose("too many errors in allocating memory from the OS");
111 os_validate(os_vm_address_t addr, os_vm_size_t len)
114 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
115 os_vm_address_t base_addr = addr;
117 /* KLUDGE: It looks as though this code allocates memory
118 * in chunks of size no larger than 'magic', but why? What
119 * is the significance of 0x1000000 here? Also, can it be
120 * right that if the first few 'do_mmap' calls succeed,
121 * then one fails, we leave the memory allocated by the
122 * first few in place even while we return a code for
123 * complete failure? -- WHN 19991020
125 * Peter Van Eynde writes (20000211)
126 * This was done because the kernel would only check for
127 * overcommit for every allocation seperately. So if you
128 * had 16MB of free mem+swap you could allocate 16M. And
129 * again, and again, etc.
130 * This in [Linux] 2.X could be bad as they changed the memory
131 * system. A side effect was/is (I don't really know) that
132 * programs with a lot of memory mappings run slower. But
133 * of course for 2.2.2X we now have the NO_RESERVE flag that
136 * FIXME: The logic is also flaky w.r.t. failed
137 * allocations. If we make one or more successful calls to
138 * do_mmap(..) before one fails, then we've allocated
139 * memory, and we should ensure that it gets deallocated
140 * sometime somehow. If this function's response to any
141 * failed do_mmap(..) is to give up and return NULL (as in
142 * sbcl-0.6.7), then any failed do_mmap(..) after any
143 * successful do_mmap(..) causes a memory leak. */
144 int magic = 0x1000000;
146 if (do_mmap(&addr, len, flags)) {
151 if (do_mmap(&addr, magic, flags)) {
160 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
161 if (do_mmap(&addr, len, flags)) {
170 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
172 if (munmap(addr,len) == -1) {
178 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
180 addr = mmap(addr, len,
182 MAP_PRIVATE | MAP_FILE | MAP_FIXED,
185 if(addr == MAP_FAILED) {
187 lose("unexpected mmap(..) failure");
194 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
196 if (mprotect(address, length, prot) == -1) {
201 /* FIXME: Now that FOO_END, rather than FOO_SIZE, is the fundamental
202 * description of a space, we could probably punt this and just do
203 * (FOO_START <= x && x < FOO_END) everywhere it's called. */
205 in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
207 char* beg = (char*)((long)sbeg);
208 char* end = (char*)((long)sbeg) + slen;
209 char* adr = (char*)a;
210 return (adr >= beg && adr < end);
214 is_valid_lisp_addr(os_vm_address_t addr)
217 in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE) ||
218 in_range_p(addr, STATIC_SPACE_START , STATIC_SPACE_SIZE) ||
219 in_range_p(addr, DYNAMIC_SPACE_START , DYNAMIC_SPACE_SIZE) ||
220 in_range_p(addr, CONTROL_STACK_START , CONTROL_STACK_SIZE) ||
221 in_range_p(addr, BINDING_STACK_START , BINDING_STACK_SIZE);
225 * any OS-dependent special low-level handling for signals
231 * The GENCGC needs to be hooked into whatever signal is raised for
232 * page fault on this OS.
235 sigsegv_handler(int signal, siginfo_t *info, void* void_context)
237 os_context_t *context = (os_context_t*)void_context;
238 void* fault_addr = (void*)context->uc_mcontext.cr2;
239 if (!gencgc_handle_wp_violation(fault_addr)) {
240 interrupt_handle_now(signal, info, void_context);
247 sigsegv_handler(int signal, siginfo_t *info, void* void_context)
249 os_context_t *context = (os_context_t*)void_context;
250 os_vm_address_t addr;
253 interrupt_handle_now(signal,contextstruct);
255 char *control_stack_top = (char*)CONTROL_STACK_START + CONTROL_STACK_SIZE;
257 addr = arch_get_bad_addr(signal,info,context);
260 *os_context_register_addr(context,reg_ALLOC) & (1L<<63)){
261 /* This is the end of a pseudo-atomic section during which
262 * a signal was received. We must deal with the pending interrupt
263 * (see also interrupt.c, ../code/interrupt.lisp)
266 /* (how we got here: when interrupting, we set bit 63 in
267 * reg_Alloc. At the end of the atomic section we tried to
268 * write to reg_Alloc, got a SIGSEGV (there's nothing mapped
269 * there) so ended up here
271 *os_context_register_addr(context,reg_ALLOC) -= (1L<<63);
272 interrupt_handle_pending(context);
273 } else if (addr > control_stack_top && addr < BINDING_STACK_START) {
275 "Possible stack overflow at 0x%016lX:\n"
276 "control_stack_top=%lx, BINDING_STACK_START=%lx\n",
279 BINDING_STACK_START);
280 /* Try to fix control frame pointer. */
281 while ( ! (CONTROL_STACK_START <= *current_control_frame_pointer &&
282 *current_control_frame_pointer <= control_stack_top))
283 ((char*)current_control_frame_pointer) -= sizeof(lispobj);
285 } else if (!interrupt_maybe_gc(signal, info, context)) {
286 interrupt_handle_now(signal, info, context);
293 os_install_interrupt_handlers(void)
295 undoably_install_low_level_interrupt_handler(SIGSEGV, sigsegv_handler);