2 * the Linux incarnation of OS-dependent routines. See also
3 * $(sbcl_arch)-linux-os.c
5 * This file (along with os.h) exports an OS-independent interface to
6 * the operating system VM facilities. Surprise surprise, this
7 * interface looks a lot like the Mach interface (but simpler in some
8 * places). For some operating systems, a subset of these functions
9 * will have to be emulated.
13 * This software is part of the SBCL system. See the README file for
16 * This software is derived from the CMU CL system, which was
17 * written at Carnegie Mellon University and released into the
18 * public domain. The software is in the public domain and is
19 * provided with absolutely no warranty. See the COPYING and CREDITS
20 * files for more information.
24 #include <sys/param.h>
31 #include "interrupt.h"
35 #include "genesis/static-symbols.h"
36 #include "genesis/fdefn.h"
38 #include <sys/socket.h>
39 #include <sys/utsname.h>
42 #include <sys/types.h>
44 /* #include <sys/sysinfo.h> */
48 #include <linux/version.h>
53 #if defined LISP_FEATURE_GENCGC
54 #include "gencgc-internal.h"
56 #include "cheneygc-internal.h"
59 #ifdef LISP_FEATURE_SB_WTIMER
60 # include <sys/timerfd.h>
63 #ifdef LISP_FEATURE_X86
64 /* Prototype for personality(2). Done inline here since the header file
65 * for this isn't available on old versions of glibc. */
66 int personality (unsigned long);
69 size_t os_vm_page_size;
71 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
72 #include <sys/syscall.h>
76 /* values taken from the kernel's linux/futex.h. This header file
77 doesn't exist in userspace, which is our excuse for not grovelling
81 /* This is also copied from linux/futex.h so that a binary compiled on
82 * a not so recent Linux system can still take advantage of private
83 * futexes when available.*/
84 #define FUTEX_WAIT_PRIVATE (0+128)
85 #define FUTEX_WAKE_PRIVATE (1+128)
87 #define FUTEX_REQUEUE (3)
89 /* Not static so that Lisp may query it. */
90 boolean futex_private_supported_p;
95 return (futex_private_supported_p ? FUTEX_WAIT_PRIVATE : FUTEX_WAIT);
101 return (futex_private_supported_p ? FUTEX_WAKE_PRIVATE : FUTEX_WAKE);
104 #define sys_futex sbcl_sys_futex
105 static inline int sys_futex (void *futex, int op, int val, struct timespec *rel)
107 return syscall (SYS_futex, futex, op, val, rel);
114 sys_futex(&x, FUTEX_WAIT, 1, 0);
116 lose("This version of SBCL is compiled with threading support, but your kernel\n"
117 "is too old to support this. Please use a more recent kernel or\n"
118 "a version of SBCL without threading support.\n");
119 sys_futex(&x, FUTEX_WAIT_PRIVATE, 1, 0);
120 if (errno == EWOULDBLOCK) {
121 futex_private_supported_p = 1;
123 futex_private_supported_p = 0;
124 SHOW("No futex private suppport\n");
129 futex_wait(int *lock_word, int oldval, long sec, unsigned long usec)
131 struct timespec timeout;
135 t = sys_futex(lock_word, futex_wait_op(), oldval, 0);
138 timeout.tv_sec = sec;
139 timeout.tv_nsec = usec * 1000;
140 t = sys_futex(lock_word, futex_wait_op(), oldval, &timeout);
144 else if (errno==ETIMEDOUT)
146 else if (errno==EINTR)
149 /* EWOULDBLOCK and others, need to check the lock */
154 futex_wake(int *lock_word, int n)
156 return sys_futex(lock_word, futex_wake_op(),n,0);
161 int linux_sparc_siginfo_bug = 0;
163 /* This variable was in real use for a few months, basically for
164 * storing autodetected information about whether the Linux
165 * installation was recent enough to support SBCL threads, and make
166 * some run-time decisions based on that. But this turned out to be
167 * unstable, so now we just flat-out refuse to start on the old installations
168 * when thread support has been compiled in.
170 * Unfortunately, in the meanwhile Slime started depending on this
171 * variable for deciding which communication style to use. So even
172 * though this variable looks unused, it shouldn't be deleted until
173 * it's no longer used in the versions of Slime that people are
174 * likely to download first. -- JES, 2006-06-07
176 int linux_no_threads_p = 0;
178 #ifdef LISP_FEATURE_SB_THREAD
182 size_t n = confstr (_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
184 char *buf = alloca (n);
185 confstr (_CS_GNU_LIBPTHREAD_VERSION, buf, n);
186 if (strstr (buf, "NPTL")) {
195 os_init(char *argv[], char *envp[])
197 /* Conduct various version checks: do we have enough mmap(), is
198 * this a sparc running 2.2, can we do threads? */
207 major_version = atoi(p);
208 minor_version = patch_version = 0;
211 minor_version = atoi(++p);
214 patch_version = atoi(++p);
217 if (major_version<2) {
218 lose("linux kernel version too old: major version=%d (can't run in version < 2.0.0)\n",
221 if (!(major_version>2 || minor_version >= 4)) {
222 #ifdef LISP_FEATURE_SPARC
223 FSHOW((stderr,"linux kernel %d.%d predates 2.4;\n enabling workarounds for SPARC kernel bugs in signal handling.\n", major_version,minor_version));
224 linux_sparc_siginfo_bug = 1;
227 #ifdef LISP_FEATURE_SB_THREAD
228 #if defined(LISP_FEATURE_SB_FUTEX) && !defined(LISP_FEATURE_SB_PTHREAD_FUTEX)
232 lose("This version of SBCL only works correctly with the NPTL threading\n"
233 "library. Please use a newer glibc, use an older SBCL, or stop using\n"
234 "LD_ASSUME_KERNEL\n");
238 /* Don't use getpagesize(), since it's not constant across Linux
239 * kernel versions on some architectures (for example PPC). FIXME:
240 * possibly the same should be done on other architectures too.
242 os_vm_page_size = BACKEND_PAGE_BYTES;
244 /* KLUDGE: Disable memory randomization on new Linux kernels
245 * by setting a personality flag and re-executing. (We need
246 * to re-execute, since the memory maps that can conflict with
247 * the SBCL spaces have already been done at this point).
249 * Since randomization is currently implemented only on x86 kernels,
250 * don't do this trick on other platforms.
252 #if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
253 if ((major_version == 2
254 /* Some old kernels will apparently lose unsupported personality flags
256 && ((minor_version == 6 && patch_version >= 11)
257 || (minor_version > 6)
258 /* This is what RHEL 3 reports */
259 || (minor_version == 4 && patch_version > 20)))
260 || major_version >= 3)
262 int pers = personality(0xffffffffUL);
263 /* 0x40000 aka. ADDR_NO_RANDOMIZE */
264 if (!(pers & 0x40000)) {
265 int retval = personality(pers | 0x40000);
266 /* Allegedly some Linux kernels (the reported case was
267 * "hardened Linux 2.6.7") won't set the new personality,
268 * but nor will they return -1 for an error. So as a
269 * workaround query the new personality...
271 int newpers = personality(0xffffffffUL);
272 /* ... and don't re-execute if either the setting resulted
273 * in an error or if the value didn't change. Otherwise
274 * this might result in an infinite loop.
277 if (!getenv("SBCL_IS_RESTARTING") &&
278 retval != -1 && newpers != pers) {
279 /* Use /proc/self/exe instead of trying to figure out
280 * the executable path from PATH and argv[0], since
281 * that's unreliable. We follow the symlink instead of
282 * executing the file directly in order to prevent top
283 * from displaying the name of the process as "exe". */
284 char runtime[PATH_MAX+1];
285 int i = readlink("/proc/self/exe", runtime, PATH_MAX);
288 setenv("SBCL_IS_RESTARTING", "T", 1);
290 execv(runtime, argv);
293 /* Either changing the personality or execve() failed. Either
294 * way we might as well continue, and hope that the random
295 * memory maps are ok this time around.
297 fprintf(stderr, "WARNING:\
298 \nCouldn't re-execute SBCL with proper personality flags (/proc isn't mounted? setuid?)\
299 \nTrying to continue anyway.\n");
301 unsetenv("SBCL_IS_RESTARTING");
304 #ifdef LISP_FEATURE_X86
305 /* Use SSE detector. Recent versions of Linux enable SSE support
306 * on SSE capable CPUs. */
307 /* FIXME: Are there any old versions that does not support SSE? */
308 fast_bzero_pointer = fast_bzero_detect;
314 #ifdef LISP_FEATURE_ALPHA
315 /* The Alpha is a 64 bit CPU. SBCL is a 32 bit application. Due to all
316 * the places that assume we can get a pointer into a fixnum with no
317 * information loss, we have to make sure it allocates all its ram in the
320 static void * under_2gb_free_pointer=DYNAMIC_1_SPACE_END;
324 os_validate(os_vm_address_t addr, os_vm_size_t len)
326 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
327 os_vm_address_t actual;
329 #ifdef LISP_FEATURE_ALPHA
331 addr=under_2gb_free_pointer;
334 actual = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
335 if (actual == MAP_FAILED) {
337 return 0; /* caller should check this */
340 if (addr && (addr!=actual)) {
341 fprintf(stderr, "mmap: wanted %lu bytes at %p, actually mapped at %p\n",
342 (unsigned long) len, addr, actual);
346 #ifdef LISP_FEATURE_ALPHA
348 len=(len+(os_vm_page_size-1))&(~(os_vm_page_size-1));
349 under_2gb_free_pointer+=len;
356 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
358 if (munmap(addr,len) == -1) {
364 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
366 os_vm_address_t actual;
368 actual = mmap(addr, len, OS_VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
370 if (actual == MAP_FAILED || (addr && (addr != actual))) {
372 lose("unexpected mmap(..) failure\n");
379 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
381 if (mprotect(address, length, prot) == -1) {
382 if (errno == ENOMEM) {
383 lose("An mprotect call failed with ENOMEM. This probably means that the maximum amount\n"
384 "of separate memory mappings was exceeded. To fix the problem, either increase\n"
385 "the maximum with e.g. 'echo 262144 > /proc/sys/vm/max_map_count' or recompile\n"
386 "SBCL with a larger value for GENCGC-CARD-BYTES in\n"
387 "'src/compiler/target/backend-parms.lisp'.");
395 is_valid_lisp_addr(os_vm_address_t addr)
398 size_t ad = (size_t) addr;
400 if ((READ_ONLY_SPACE_START <= ad && ad < READ_ONLY_SPACE_END)
401 || (STATIC_SPACE_START <= ad && ad < STATIC_SPACE_END)
402 #if defined LISP_FEATURE_GENCGC
403 || (DYNAMIC_SPACE_START <= ad && ad < DYNAMIC_SPACE_END)
405 || (DYNAMIC_0_SPACE_START <= ad && ad < DYNAMIC_0_SPACE_END)
406 || (DYNAMIC_1_SPACE_START <= ad && ad < DYNAMIC_1_SPACE_END)
410 for_each_thread(th) {
411 if((size_t)(th->control_stack_start) <= ad
412 && ad < (size_t)(th->control_stack_end))
414 if((size_t)(th->binding_stack_start) <= ad
415 && ad < (size_t)(th->binding_stack_start + BINDING_STACK_SIZE))
422 * any OS-dependent special low-level handling for signals
426 * The GC needs to be hooked into whatever signal is raised for
427 * page fault on this OS.
430 sigsegv_handler(int signal, siginfo_t *info, os_context_t *context)
432 os_vm_address_t addr = arch_get_bad_addr(signal, info, context);
434 #ifdef LISP_FEATURE_ALPHA
435 /* Alpha stuff: This is the end of a pseudo-atomic section during
436 which a signal was received. We must deal with the pending
437 interrupt (see also interrupt.c, ../code/interrupt.lisp)
439 (how we got here: when interrupting, we set bit 63 in reg_ALLOC.
440 At the end of the atomic section we tried to write to reg_ALLOC,
441 got a SIGSEGV (there's nothing mapped there) so ended up here. */
443 *os_context_register_addr(context, reg_ALLOC) & (1L<<63)) {
444 *os_context_register_addr(context, reg_ALLOC) -= (1L<<63);
445 interrupt_handle_pending(context);
450 #ifdef LISP_FEATURE_SB_SAFEPOINT
451 if (!handle_safepoint_violation(context, addr))
454 #ifdef LISP_FEATURE_GENCGC
455 if (!gencgc_handle_wp_violation(addr))
457 if (!cheneygc_handle_wp_violation(context, addr))
459 if (!handle_guard_page_triggered(context, addr))
460 lisp_memory_fault_error(context, addr);
464 os_install_interrupt_handlers(void)
466 undoably_install_low_level_interrupt_handler(SIG_MEMORY_FAULT,
469 /* OAOOM c.f. sunos-os.c.
470 * Should we have a reusable function gc_install_interrupt_handlers? */
471 #ifdef LISP_FEATURE_SB_THREAD
472 # ifdef LISP_FEATURE_SB_SAFEPOINT
473 # ifdef LISP_FEATURE_SB_THRUPTION
474 undoably_install_low_level_interrupt_handler(SIGPIPE, thruption_handler);
477 undoably_install_low_level_interrupt_handler(SIG_STOP_FOR_GC,
478 sig_stop_for_gc_handler);
484 os_get_runtime_executable_path(int external)
486 char path[PATH_MAX + 1];
489 size = readlink("/proc/self/exe", path, sizeof(path)-1);
495 return copied_string(path);
498 #ifdef LISP_FEATURE_SB_WTIMER
500 * Waitable timer implementation for the safepoint-based (SIGALRM-free)
501 * timer facility using timerfd_create().
506 int fd = timerfd_create(CLOCK_MONOTONIC, 0);
508 lose("os_create_wtimer: timerfd_create");
510 /* Cannot count on TFD_CLOEXEC availability, so do it manually: */
511 if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
512 lose("os_create_wtimer: fcntl");
518 os_wait_for_wtimer(int fd)
520 unsigned char buf[8];
521 int n = read(fd, buf, sizeof(buf));
525 lose("os_wtimer_listen failed");
527 if (n != sizeof(buf))
528 lose("os_wtimer_listen read too little");
533 os_close_wtimer(int fd)
536 lose("os_close_wtimer failed");
540 os_set_wtimer(int fd, int sec, int nsec)
542 struct itimerspec spec = { {0,0}, {0,0} };
543 spec.it_value.tv_sec = sec;
544 spec.it_value.tv_nsec = nsec;
545 if (timerfd_settime(fd, 0, &spec, 0) == -1)
546 lose("timerfd_settime");
550 os_cancel_wtimer(int fd)
552 os_set_wtimer(fd, 0, 0);