Optionally mark pages from cores as de-duplicatable
[sbcl.git] / src / runtime / coreparse.c
1 /*
2  * A saved SBCL system is a .core file; the code here helps us accept
3  * such a file as input.
4  */
5
6 /*
7  * This software is part of the SBCL system. See the README file for
8  * more information.
9  *
10  * This software is derived from the CMU CL system, which was
11  * written at Carnegie Mellon University and released into the
12  * public domain. The software is in the public domain and is
13  * provided with absolutely no warranty. See the COPYING and CREDITS
14  * files for more information.
15  */
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #ifdef LISP_FEATURE_LINUX
26 /* For madvise */
27 # define _BSD_SOURCE
28 #endif
29 #include <sys/mman.h>
30
31 #include "sbcl.h"
32 #include "os.h"
33 #include "runtime.h"
34 #include "globals.h"
35 #include "core.h"
36 #include "arch.h"
37 #include "interr.h"
38 #include "thread.h"
39
40 #include "validate.h"
41 #include "gc-internal.h"
42
43 /* lutex stuff */
44 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_LUTEX)
45 #include "genesis/sap.h"
46 #include "pthread-lutex.h"
47 #endif
48
49 #include <errno.h>
50
51 #ifdef LISP_FEATURE_SB_CORE_COMPRESSION
52 # include <zlib.h>
53 #endif
54
55 unsigned char build_id[] =
56 #include "../../output/build-id.tmp"
57 ;
58
59 int
60 open_binary(char *filename, int mode)
61 {
62 #ifdef LISP_FEATURE_WIN32
63     mode |= O_BINARY;
64 #endif
65
66     return open(filename, mode);
67 }
68
69
70 static struct runtime_options *
71 read_runtime_options(int fd)
72 {
73     size_t optarray[RUNTIME_OPTIONS_WORDS];
74     struct runtime_options *options = NULL;
75
76     if (read(fd, optarray, RUNTIME_OPTIONS_WORDS * sizeof(size_t)) !=
77         RUNTIME_OPTIONS_WORDS * sizeof(size_t)) {
78         return NULL;
79     }
80
81     if ((RUNTIME_OPTIONS_MAGIC != optarray[0]) || (0 == optarray[1])) {
82         return NULL;
83     }
84
85     options = successful_malloc(sizeof(struct runtime_options));
86
87     options->dynamic_space_size = optarray[2];
88     options->thread_control_stack_size = optarray[3];
89
90     return options;
91 }
92
93 void
94 maybe_initialize_runtime_options(int fd)
95 {
96     struct runtime_options *new_runtime_options;
97     off_t end_offset = sizeof(lispobj) +
98         sizeof(os_vm_offset_t) +
99         (RUNTIME_OPTIONS_WORDS * sizeof(size_t));
100
101     lseek(fd, -end_offset, SEEK_END);
102
103     if (new_runtime_options = read_runtime_options(fd)) {
104         runtime_options = new_runtime_options;
105     }
106 }
107
108 /* Search 'filename' for an embedded core.  An SBCL core has, at the
109  * end of the file, a trailer containing optional saved runtime
110  * options, the start of the core (an os_vm_offset_t), and a final
111  * signature word (the lispobj CORE_MAGIC).  If this trailer is found
112  * at the end of the file, the start of the core can be determined
113  * from the core size.
114  *
115  * If an embedded core is present, this returns the offset into the
116  * file to load the core from, or -1 if no core is present. */
117 os_vm_offset_t
118 search_for_embedded_core(char *filename)
119 {
120     lispobj header;
121     os_vm_offset_t lispobj_size = sizeof(lispobj);
122     os_vm_offset_t trailer_size = lispobj_size + sizeof(os_vm_offset_t);
123     os_vm_offset_t core_start, pos;
124     int fd = -1;
125
126     if ((fd = open_binary(filename, O_RDONLY)) < 0)
127         goto lose;
128
129     if (read(fd, &header, (size_t)lispobj_size) < lispobj_size)
130         goto lose;
131     if (header == CORE_MAGIC) {
132         /* This file is a real core, not an embedded core.  Return 0 to
133          * indicate where the core starts, and do not look for runtime
134          * options in this case. */
135         return 0;
136     }
137
138     if (lseek(fd, -lispobj_size, SEEK_END) < 0)
139         goto lose;
140     if (read(fd, &header, (size_t)lispobj_size) < lispobj_size)
141         goto lose;
142
143     if (header == CORE_MAGIC) {
144         if (lseek(fd, -trailer_size, SEEK_END) < 0)
145             goto lose;
146         if (read(fd, &core_start, sizeof(os_vm_offset_t)) < 0)
147             goto lose;
148
149         if (lseek(fd, core_start, SEEK_SET) < 0)
150             goto lose;
151         pos = lseek(fd, 0, SEEK_CUR);
152
153         if (read(fd, &header, (size_t)lispobj_size) < lispobj_size)
154             goto lose;
155
156         if (header != CORE_MAGIC)
157             goto lose;
158
159         maybe_initialize_runtime_options(fd);
160
161         close(fd);
162         return pos;
163     }
164
165 lose:
166     if (fd != -1)
167         close(fd);
168
169     return -1;
170 }
171
172 /* If more platforms doesn't support overlapping mmap rename this
173  * def to something like ifdef nommapoverlap */
174 /* currently hpux only */
175 #ifdef LISP_FEATURE_HPUX
176 os_vm_address_t copy_core_bytes(int fd, os_vm_offset_t offset,
177                                 os_vm_address_t addr, int len)
178 {
179   unsigned char buf[4096];
180   int c,x;
181   int old_fd = lseek(fd, 0, SEEK_CUR);
182
183   if(len & (4096-1)){
184     fprintf(stderr, "cant copy a slice of core because slice-length is not of page size(4096)\n");
185     exit(-1);
186   }
187   if(old_fd < 0){
188     fprintf(stderr, "cant perform lseek() on corefile\n");
189   }
190   lseek(fd, offset, SEEK_SET);
191   if(fd < 0){
192     fprintf(stderr, "cant perform lseek(%u,%lu,SEEK_SET) on corefile\n", fd, offset);
193   }
194   for(x = 0; x < len; x += 4096){
195     c = read(fd, buf, 4096);
196     if(c != 4096){
197       fprintf(stderr, "cant read memory area from corefile at position %lu, got %d\n", offset + x, c);
198       exit(-1);
199     }
200     memcpy(addr+x, buf, 4096);
201   }
202   os_flush_icache(addr, len);
203   return addr;
204 }
205 #endif
206
207 #ifdef LISP_FEATURE_SB_CORE_COMPRESSION
208 # define ZLIB_BUFFER_SIZE (1u<<16)
209 os_vm_address_t inflate_core_bytes(int fd, os_vm_offset_t offset,
210                                    os_vm_address_t addr, int len)
211 {
212     z_stream stream;
213     unsigned char buf[ZLIB_BUFFER_SIZE];
214     int ret;
215
216     if (-1 == lseek(fd, offset, SEEK_SET)) {
217         lose("Unable to lseek() on corefile\n");
218     }
219
220     stream.zalloc = NULL;
221     stream.zfree = NULL;
222     stream.opaque = NULL;
223     stream.avail_in = 0;
224     stream.next_in = buf;
225
226     ret = inflateInit(&stream);
227     if (ret != Z_OK)
228         lose("zlib error %i\n", ret);
229
230     stream.next_out  = (void*)addr;
231     stream.avail_out = len;
232     do {
233         ssize_t count = read(fd, buf, sizeof(buf));
234         if (count < 0)
235             lose("unable to read core file (errno = %i)\n", errno);
236         stream.next_in = buf;
237         stream.avail_in = count;
238         if (count == 0) break;
239         ret = inflate(&stream, Z_NO_FLUSH);
240         switch (ret) {
241         case Z_STREAM_END:
242             break;
243         case Z_OK:
244             if (stream.avail_out == 0)
245                 lose("Runaway gzipped core directory... aborting\n");
246             if (stream.avail_in > 0)
247                 lose("zlib inflate returned without fully"
248                      "using up input buffer... aborting\n");
249             break;
250         default:
251             lose("zlib inflate error: %i\n", ret);
252             break;
253         }
254     } while (ret != Z_STREAM_END);
255
256     if (stream.avail_out > 0) {
257         if (stream.avail_out >= os_vm_page_size)
258             fprintf(stderr, "Warning: gzipped core directory significantly"
259                     "shorter than expected (%lu bytes)", (unsigned long)stream.avail_out);
260         /* Is this needed? */
261         memset(stream.next_out, 0, stream.avail_out);
262     }
263
264     inflateEnd(&stream);
265     return addr;
266 }
267 # undef ZLIB_BUFFER_SIZE
268 #endif
269
270 int merge_core_pages = -1;
271
272 static void
273 process_directory(int fd, lispobj *ptr, int count, os_vm_offset_t file_offset)
274 {
275     struct ndir_entry *entry;
276     int compressed;
277
278     FSHOW((stderr, "/process_directory(..), count=%d\n", count));
279
280     for (entry = (struct ndir_entry *) ptr; --count>= 0; ++entry) {
281         compressed = 0;
282         long id = entry->identifier;
283         if (id <= (MAX_CORE_SPACE_ID | DEFLATED_CORE_SPACE_ID_FLAG)) {
284             if (id & DEFLATED_CORE_SPACE_ID_FLAG)
285                 compressed = 1;
286             id &= ~(DEFLATED_CORE_SPACE_ID_FLAG);
287         }
288         long offset = os_vm_page_size * (1 + entry->data_page);
289         os_vm_address_t addr =
290             (os_vm_address_t) (os_vm_page_size * entry->address);
291         lispobj *free_pointer = (lispobj *) addr + entry->nwords;
292         unsigned long len = os_vm_page_size * entry->page_count;
293         if (len != 0) {
294             os_vm_address_t real_addr;
295             FSHOW((stderr, "/mapping %ld(0x%lx) bytes at 0x%lx\n",
296                    (long)len, (long)len, (unsigned long)addr));
297             if (compressed) {
298 #ifdef LISP_FEATURE_SB_CORE_COMPRESSION
299                 real_addr = inflate_core_bytes(fd, offset + file_offset, addr, len);
300 #else
301                 lose("This runtime was not built with zlib-compressed core support... aborting\n");
302 #endif
303             } else {
304 #ifdef LISP_FEATURE_HPUX
305                 real_addr = copy_core_bytes(fd, offset + file_offset, addr, len);
306 #else
307                 real_addr = os_map(fd, offset + file_offset, addr, len);
308 #endif
309             }
310             if (real_addr != addr) {
311                 lose("file mapped in wrong place! "
312                      "(0x%08x != 0x%08lx)\n",
313                      real_addr,
314                      addr);
315             }
316         }
317
318 #ifdef MADV_MERGEABLE
319         if ((merge_core_pages == 1)
320             || ((merge_core_pages == -1) && compressed)) {
321                 madvise(addr, len, MADV_MERGEABLE);
322         }
323 #endif
324
325         FSHOW((stderr, "/space id = %ld, free pointer = 0x%lx\n",
326                id, (unsigned long)free_pointer));
327
328         switch (id) {
329         case DYNAMIC_CORE_SPACE_ID:
330             if (len > dynamic_space_size) {
331                 fprintf(stderr,
332                         "dynamic space too small for core: %ldKiB required, %ldKiB available.\n",
333                         len >> 10,
334                         (long)dynamic_space_size >> 10);
335                 exit(1);
336             }
337 #ifdef LISP_FEATURE_GENCGC
338             if (addr != (os_vm_address_t)DYNAMIC_SPACE_START) {
339                 fprintf(stderr, "in core: 0x%lx; in runtime: 0x%lx \n",
340                         (long)addr, (long)DYNAMIC_SPACE_START);
341                 lose("core/runtime address mismatch: DYNAMIC_SPACE_START\n");
342             }
343 #else
344             if ((addr != (os_vm_address_t)DYNAMIC_0_SPACE_START) &&
345                 (addr != (os_vm_address_t)DYNAMIC_1_SPACE_START)) {
346                 fprintf(stderr, "in core: 0x%lx; in runtime: 0x%lx or 0x%lx\n",
347                         (long)addr,
348                         (long)DYNAMIC_0_SPACE_START,
349                         (long)DYNAMIC_1_SPACE_START);
350                 lose("warning: core/runtime address mismatch: DYNAMIC_SPACE_START\n");
351             }
352 #endif
353 #if defined(ALLOCATION_POINTER)
354             SetSymbolValue(ALLOCATION_POINTER, (lispobj)free_pointer,0);
355 #else
356             dynamic_space_free_pointer = free_pointer;
357 #endif
358             /* For stop-and-copy GC, this will be whatever the GC was
359              * using at the time. With GENCGC, this will always be
360              * space 0. (We checked above that for GENCGC,
361              * addr==DYNAMIC_SPACE_START.) */
362             current_dynamic_space = (lispobj *)addr;
363             break;
364         case STATIC_CORE_SPACE_ID:
365             if (addr != (os_vm_address_t)STATIC_SPACE_START) {
366                 fprintf(stderr, "in core: 0x%lx - in runtime: 0x%lx\n",
367                         (long)addr, (long)STATIC_SPACE_START);
368                 lose("core/runtime address mismatch: STATIC_SPACE_START\n");
369             }
370             break;
371         case READ_ONLY_CORE_SPACE_ID:
372             if (addr != (os_vm_address_t)READ_ONLY_SPACE_START) {
373                 fprintf(stderr, "in core: 0x%lx - in runtime: 0x%lx\n",
374                         (long)addr, (long)READ_ONLY_SPACE_START);
375                 lose("core/runtime address mismatch: READ_ONLY_SPACE_START\n");
376             }
377             break;
378         default:
379             lose("unknown space ID %ld addr 0x%lx\n", id, (long)addr);
380         }
381     }
382 }
383
384 lispobj
385 load_core_file(char *file, os_vm_offset_t file_offset)
386 {
387     lispobj *header, val, len, *ptr, remaining_len;
388     int fd = open_binary(file, O_RDONLY);
389     unsigned int count;
390
391     lispobj initial_function = NIL;
392     FSHOW((stderr, "/entering load_core_file(%s)\n", file));
393     if (fd < 0) {
394         fprintf(stderr, "could not open file \"%s\"\n", file);
395         perror("open");
396         exit(1);
397     }
398
399     lseek(fd, file_offset, SEEK_SET);
400     header = calloc(os_vm_page_size / sizeof(u32), sizeof(u32));
401
402     count = read(fd, header, os_vm_page_size);
403     if (count < os_vm_page_size) {
404         lose("premature end of core file\n");
405     }
406     SHOW("successfully read first page of core");
407
408     ptr = header;
409     val = *ptr++;
410
411     if (val != CORE_MAGIC) {
412         lose("invalid magic number in core: 0x%lx should have been 0x%x.\n",
413              val,
414              CORE_MAGIC);
415     }
416     SHOW("found CORE_MAGIC");
417
418     while (val != END_CORE_ENTRY_TYPE_CODE) {
419         val = *ptr++;
420         len = *ptr++;
421         remaining_len = len - 2; /* (-2 to cancel the two ++ operations) */
422         FSHOW((stderr, "/val=0x%ld, remaining_len=0x%ld\n",
423                (long)val, (long)remaining_len));
424
425         switch (val) {
426
427         case END_CORE_ENTRY_TYPE_CODE:
428             SHOW("END_CORE_ENTRY_TYPE_CODE case");
429             break;
430
431         case VERSION_CORE_ENTRY_TYPE_CODE:
432             SHOW("VERSION_CORE_ENTRY_TYPE_CODE case");
433             if (*ptr != SBCL_CORE_VERSION_INTEGER) {
434                 lose("core file version (%d) != runtime library version (%d)\n",
435                      *ptr,
436                      SBCL_CORE_VERSION_INTEGER);
437             }
438             break;
439
440         case BUILD_ID_CORE_ENTRY_TYPE_CODE:
441             SHOW("BUILD_ID_CORE_ENTRY_TYPE_CODE case");
442             {
443                 unsigned int i;
444
445                 FSHOW((stderr, "build_id[]=\"%s\"\n", build_id));
446                 FSHOW((stderr, "remaining_len = %d\n", remaining_len));
447                 if (remaining_len != strlen((const char *)build_id))
448                     goto losing_build_id;
449                 for (i = 0; i < remaining_len; ++i) {
450                     FSHOW((stderr, "ptr[%d] = char = %d, expected=%d\n",
451                            i, ptr[i], build_id[i]));
452                     if (ptr[i] != build_id[i])
453                         goto losing_build_id;
454                 }
455                 break;
456             losing_build_id:
457                 /* .core files are not binary-compatible between
458                  * builds because we can't easily detect whether the
459                  * sources were patched between the time the
460                  * dumping-the-.core runtime was built and the time
461                  * that the loading-the-.core runtime was built.
462                  *
463                  * (We could easily detect whether version.lisp-expr
464                  * was changed, but people experimenting with patches
465                  * don't necessarily update version.lisp-expr.) */
466
467                 lose("can't load .core for different runtime, sorry\n");
468             }
469
470         case NEW_DIRECTORY_CORE_ENTRY_TYPE_CODE:
471             SHOW("NEW_DIRECTORY_CORE_ENTRY_TYPE_CODE case");
472             process_directory(fd,
473                               ptr,
474 #ifndef LISP_FEATURE_ALPHA
475                               remaining_len / (sizeof(struct ndir_entry) /
476                                                sizeof(long)),
477 #else
478                               remaining_len / (sizeof(struct ndir_entry) /
479                                                sizeof(u32)),
480 #endif
481                               file_offset);
482             break;
483
484         case INITIAL_FUN_CORE_ENTRY_TYPE_CODE:
485             SHOW("INITIAL_FUN_CORE_ENTRY_TYPE_CODE case");
486             initial_function = (lispobj)*ptr;
487             break;
488
489 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_SB_LUTEX)
490         case LUTEX_TABLE_CORE_ENTRY_TYPE_CODE:
491             SHOW("LUTEX_TABLE_CORE_ENTRY_TYPE_CODE case");
492             {
493                 size_t n_lutexes = *ptr;
494                 size_t fdoffset = (*(ptr + 1) + 1) * (os_vm_page_size);
495                 size_t data_length = n_lutexes * sizeof(struct sap *);
496                 struct lutex **lutexes_to_resurrect = malloc(data_length);
497                 long bytes_read;
498
499                 lseek(fd, fdoffset + file_offset, SEEK_SET);
500
501                 FSHOW((stderr, "attempting to read %ld lutexes from core\n", n_lutexes));
502                 bytes_read = read(fd, lutexes_to_resurrect, data_length);
503
504                 /* XXX */
505                 if (bytes_read != data_length) {
506                     lose("Could not read the lutex table");
507                 }
508                 else {
509                     int i;
510
511                     for (i=0; i<n_lutexes; ++i) {
512                         struct lutex *lutex = lutexes_to_resurrect[i];
513
514                         FSHOW((stderr, "re-init'ing lutex @ %p\n", lutex));
515                         lutex_init((tagged_lutex_t) lutex);
516                     }
517
518                     free(lutexes_to_resurrect);
519                 }
520                 break;
521             }
522 #endif
523
524 #ifdef LISP_FEATURE_GENCGC
525         case PAGE_TABLE_CORE_ENTRY_TYPE_CODE:
526         {
527             size_t size = *ptr;
528             size_t fdoffset = (*(ptr+1) + 1) * (os_vm_page_size);
529             size_t offset = 0;
530             long bytes_read;
531             unsigned long data[4096];
532             unsigned long word;
533             lseek(fd, fdoffset + file_offset, SEEK_SET);
534             while ((bytes_read = read(fd, data, (size < 4096 ? size : 4096 )))
535                     > 0)
536             {
537                 int i = 0;
538                 size -= bytes_read;
539                 while (bytes_read) {
540                     bytes_read -= sizeof(long);
541                     /* Ignore all zeroes. The size of the page table
542                      * core entry was rounded up to os_vm_page_size
543                      * during the save, and might now have more
544                      * elements than the page table.
545                      *
546                      * The low bits of each word are allocation flags.
547                      */
548                     if (word=data[i]) {
549                         page_table[offset].region_start_offset = word & ~0x03;
550                         page_table[offset].allocated = word & 0x03;
551                     }
552                     i++;
553                     offset++;
554                 }
555             }
556
557             gencgc_partial_pickup = 1;
558             break;
559         }
560 #endif
561         default:
562             lose("unknown core file entry: %ld\n", (long)val);
563         }
564
565         ptr += remaining_len;
566         FSHOW((stderr, "/new ptr=%lx\n", (unsigned long)ptr));
567     }
568     SHOW("about to free(header)");
569     free(header);
570     SHOW("returning from load_core_file(..)");
571     return initial_function;
572 }
573