The implementation is based on cmucl's weak hash table code.
* scav_vector defers scavenging of weak hash tables until ...
* ... newspace scavenging at which time the deferred weak hash
tables are scavenged according to their WEAKNESS type (this
happens after each scan of newspace)
* finally just before weak pointers are scanned (i.e. with the
purpose of breaking them) the weak hash tables are scanned (i.e.
the appropriate entries are removed) too.
;;;; -*- coding: utf-8; -*-
changes in sbcl-0.9.17 (0.9.99?) relative to sbcl-0.9.16:
+ * feature: weak hash tables, see MAKE-HASH-TABLE documentation
* incompatible change: External-format support for FFI calls. The
SB-ALIEN:C-STRING no longer implies an ASCII
external-format. Instead the string is subject to external-format
;; weak pointers and finalization
"CANCEL-FINALIZATION"
"FINALIZE"
- "HASH-TABLE-WEAK-P" "MAKE-WEAK-POINTER"
+ "HASH-TABLE-WEAKNESS" "MAKE-WEAK-POINTER"
"WEAK-POINTER" "WEAK-POINTER-P" "WEAK-POINTER-VALUE"
;; If the user knows we're doing IEEE, he might reasonably
(number-entries 0 :type index)
;; The Key-Value pair vector.
(table (missing-arg) :type simple-vector)
- ;; True if this is a weak hash table, meaning that key->value
- ;; mappings will disappear if there are no other references to the
- ;; key. Note: this only matters if the hash function indicates that
- ;; the hashing is EQ based.
- (weak-p nil :type (member t nil))
+ ;; This slot is used to link weak hash tables during GC. When the GC
+ ;; isn't running it is always NIL.
+ (next-weak-hash-table nil :type null)
+ ;; Non-NIL if this is some kind of weak hash table. For details see
+ ;; the docstring of MAKE-HASH-TABLE.
+ (weakness nil :type (member nil :key :value :key-or-value :key-and-value)
+ :read-only t)
;; Index into the next-vector, chaining together buckets that need
;; to be rehashed because their hashing is EQ based and the key has
;; been moved by the garbage collector.
(defconstant +min-hash-table-rehash-threshold+ (float 1/16 1.0))
(defun make-hash-table (&key (test 'eql)
- (size +min-hash-table-size+)
- (rehash-size 1.5)
- (rehash-threshold 1)
- (weak-p nil))
+ (size +min-hash-table-size+)
+ (rehash-size 1.5)
+ (rehash-threshold 1)
+ (weakness nil))
#!+sb-doc
"Create and return a new hash table. The keywords are as follows:
:TEST -- Indicates what kind of test to use.
forcing a rehash. Can be any positive number <=1, with density
approaching zero as the threshold approaches 0. Density 1 means an
average of one entry per bucket.
- :WEAK-P -- (This is an extension from CMU CL, not currently supported
- in SBCL 0.6.6, but perhaps supported in a future version.) If T,
- don't keep entries if the key would otherwise be garbage."
+ :WEAKNESS -- IF NIL (the default) it is a normal non-weak hash table.
+ If one of :KEY, :VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE it is a weak
+ hash table.
+ Depending on the type of weakness the lack of references to the
+ key and the value may allow for removal of the entry. If WEAKNESS
+ is :KEY and the key would otherwise be garbage the entry is eligible
+ for removal from the hash table. Similarly, if WEAKNESS is :VALUE
+ the life of an entry depends on its value's references. If WEAKNESS
+ is :KEY-AND-VALUE and either the key or the value would otherwise be
+ garbage the entry can be removed. If WEAKNESS is :KEY-OR-VALUE and
+ both the key and the value would otherwise be garbage the entry can
+ be removed."
(declare (type (or function symbol) test))
(declare (type unsigned-byte size))
- (when weak-p
- (error "stub: unsupported WEAK-P option"))
(multiple-value-bind (test test-fun hash-fun)
(cond ((or (eq test #'eq) (eq test 'eq))
(values 'eq #'eq #'eq-hash))
;; Failing that, I'd like to rename it to
;; *USER-HASH-TABLE-TESTS*.
(dolist (info *hash-table-tests*
- (error "unknown :TEST for MAKE-HASH-TABLE: ~S"
- test))
+ (error "unknown :TEST for MAKE-HASH-TABLE: ~S"
+ test))
(destructuring-bind (test-name test-fun hash-fun) info
(when (or (eq test test-name) (eq test test-fun))
(return (values test-name test-fun hash-fun)))))))
;; boxing.
(rehash-threshold (max +min-hash-table-rehash-threshold+
(float rehash-threshold 1.0)))
- (size+1 (1+ size)) ; The first element is not usable.
+ (size+1 (1+ size)) ; The first element is not usable.
;; KLUDGE: The most natural way of expressing the below is
;; (round (/ (float size+1) rehash-threshold)), and indeed
;; it was expressed like that until 0.7.0. However,
:element-type
'(unsigned-byte #.sb!vm:n-word-bits)
:initial-element 0))
- ;; needs to be the same length as the KV vector
- ;; (FIXME: really? why doesn't the code agree?)
+ ;; Needs to be the half the length of the KV vector to link
+ ;; KV entries - mapped to indeces at 2i and 2i+1 -
+ ;; together.
(next-vector (make-array size+1
:element-type
'(unsigned-byte #.sb!vm:n-word-bits)))
:rehash-threshold rehash-threshold
:rehash-trigger size
:table kv-vector
- :weak-p weak-p
+ :weakness weakness
:index-vector index-vector
:next-vector next-vector
:hash-vector
"Return the test HASH-TABLE was created with.")
#!+sb-doc
-(setf (fdocumentation 'hash-table-weak-p 'function)
- "Return T if HASH-TABLE will not keep entries for keys that would
- otherwise be garbage, and NIL if it will.")
+(setf (fdocumentation 'hash-table-weakness 'function)
+ "Return the WEAKNESS of HASH-TABLE which is one of NIL, :KEY,
+:VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE.")
\f
;;;; accessing functions
(old-index-vector (hash-table-index-vector table))
(new-length (almost-primify
(truncate (/ (float new-size)
- (hash-table-rehash-threshold table)))))
+ (hash-table-rehash-threshold table)))))
(new-index-vector
(make-array new-length
:element-type '(unsigned-byte #.sb!vm:n-word-bits)
;; Disable GC tricks on the OLD-KV-VECTOR.
(set-header-data old-kv-vector sb!vm:vector-normal-subtype)
+ ;; Non-empty weak hash tables always need GC support.
+ (when (and (hash-table-weakness table) (plusp (hash-table-count table)))
+ (set-header-data new-kv-vector sb!vm:vector-valid-hashing-subtype))
+
;; FIXME: here and in several other places in the hash table code,
;; loops like this one are used when FILL or REPLACE would be
;; appropriate. why are standard CL functions not used?
(length (length index-vector)))
(declare (type index size length))
- ;; Disable GC tricks, they will be re-enabled during the re-hash
- ;; if necesary.
- (set-header-data kv-vector sb!vm:vector-normal-subtype)
+ ;; Non-empty weak hash tables always need GC support.
+ (unless (and (hash-table-weakness table) (plusp (hash-table-count table)))
+ ;; Disable GC tricks, they will be re-enabled during the re-hash
+ ;; if necessary.
+ (set-header-data kv-vector sb!vm:vector-normal-subtype))
;; Rehash all the entries.
(setf (hash-table-next-free-kv table) 0)
(hash-vector (hash-table-hash-vector hash-table))
(test-fun (hash-table-test-fun hash-table)))
(declare (type index index))
-
+ (when (hash-table-weakness hash-table)
+ (set-header-data kv-vector sb!vm:vector-valid-hashing-subtype))
(cond ((or eq-based (not hash-vector))
(when eq-based
(set-header-data kv-vector
;; Set up the free list, all free.
(do ((i 1 (1+ i)))
((>= i (1- size)))
- (setf (aref next-vector i) (1+ i)))
+ (setf (aref next-vector i) (1+ i)))
(setf (aref next-vector (1- size)) 0)
(setf (hash-table-next-free-kv hash-table) 1)
(setf (hash-table-needing-rehash hash-table) 0)
(let* ((kv-vector (hash-table-table hash-table))
(key (aref kv-vector (* 2 i)))
(value (aref kv-vector (1+ (* 2 i)))))
- (unless (and (eq key +empty-ht-slot+)
- (eq value +empty-ht-slot+))
+ ;; We are running without locking or WITHOUT-GCING. For a weak
+ ;; :VALUE hash table it's possible that the GC hit after KEY
+ ;; was read and now the entry is gone. So check if either the
+ ;; key or the value is empty.
+ (unless (or (eq key +empty-ht-slot+)
+ (eq value +empty-ht-slot+))
(funcall fun key value))))))
\f
;;;; methods on HASH-TABLE
;;; Return a list of keyword args and values to use for MAKE-HASH-TABLE
;;; when reconstructing HASH-TABLE.
(defun %hash-table-ctor-args (hash-table)
- (when (hash-table-weak-p hash-table)
- ;; FIXME: This might actually work with no trouble, but as of
- ;; sbcl-0.6.12.10 when this code was written, weak hash tables
- ;; weren't working yet, so I couldn't test it. When weak hash
- ;; tables are supported again, this should be fixed.
- (error "can't dump weak hash tables readably")) ; defensive programming..
`(:test ',(hash-table-test hash-table)
:size ',(hash-table-size hash-table)
:rehash-size ',(hash-table-rehash-size hash-table)
- :rehash-threshold ',(hash-table-rehash-threshold hash-table)))
+ :rehash-threshold ',(hash-table-rehash-threshold hash-table)
+ :weakness ',(hash-table-weakness hash-table)))
;;; Return an association list representing the same data as HASH-TABLE.
(defun %hash-table-alist (hash-table)
(&key (:test callable) (:size unsigned-byte)
(:rehash-size (or (integer 1) (float (1.0))))
(:rehash-threshold (real 0 1))
- (:weak-p t))
+ (:weakness (member nil :key :value :key-and-value :key-or-value)))
hash-table
(flushable unsafe))
(defknown hash-table-p (t) boolean (movable foldable flushable))
sb!unix::*interrupts-enabled*
sb!unix::*interrupt-pending*
*gc-inhibit*
- *gc-pending*))
+ *gc-pending*
+
+ ;; hash table weaknesses
+ :key
+ :value
+ :key-and-value
+ :key-or-value))
sb!alien::*enter-alien-callback*
;; see comments in ../x86-64/parms.lisp
- sb!pcl::..slot-unbound..
- )))
+ sb!pcl::..slot-unbound..)))
(defparameter *static-funs*
'(length
/* Scan the weak pointers. */
#ifdef PRINTNOISE
+ printf("Scanning weak hash tables ...\n");
+#endif
+ scan_weak_hash_tables();
+
+ /* Scan the weak pointers. */
+#ifdef PRINTNOISE
printf("Scanning weak pointers ...\n");
#endif
scan_weak_pointers();
-
/* Flip spaces. */
#ifdef PRINTNOISE
printf("Flipping spaces ...\n");
here,new_space_free_pointer); */
next = new_space_free_pointer;
scavenge(here, next - here);
+ scav_weak_hash_tables();
here = next;
}
/* printf("done with newspace\n"); */
}
\f
-/* vector-like objects */
-
-static long
-scav_vector(lispobj *where, lispobj object)
-{
- if (HeaderValue(object) == subtype_VectorValidHashing) {
- *where =
- (subtype_VectorMustRehash<<N_WIDETAG_BITS) | SIMPLE_VECTOR_WIDETAG;
- }
-
- return 1;
-}
-
-\f
/* weak pointers */
#define WEAK_POINTER_NWORDS \
gc_init(void)
{
gc_init_tables();
- scavtab[SIMPLE_VECTOR_WIDETAG] = scav_vector;
scavtab[WEAK_POINTER_WIDETAG] = scav_weak_pointer;
}
#include "genesis/primitive-objects.h"
#include "genesis/static-symbols.h"
#include "genesis/layout.h"
+#include "genesis/hash-table.h"
#include "gc-internal.h"
#ifdef LISP_FEATURE_SPARC
}
}
+\f
+/* Hash tables */
+
+#if N_WORD_BITS == 32
+#define EQ_HASH_MASK 0x1fffffff
+#elif N_WORD_BITS == 64
+#define EQ_HASH_MASK 0x1fffffffffffffff
+#endif
+
+/* Compute the EQ-hash of KEY. This must match POINTER-HASH in
+ * target-hash-table.lisp. */
+#define EQ_HASH(key) ((key) & EQ_HASH_MASK)
+
+/* List of weak hash tables chained through their NEXT-WEAK-HASH-TABLE
+ * slot. Set to NULL at the end of a collection.
+ *
+ * This is not optimal because, when a table is tenured, it won't be
+ * processed automatically; only the yougest generation is GC'd by
+ * default. On the other hand, all applications will need an
+ * occasional full GC anyway, so it's not that bad either. */
+struct hash_table *weak_hash_tables = NULL;
+
+/* Return true if OBJ has already survived the current GC. */
+static inline int
+survived_gc_yet (lispobj obj)
+{
+ return (!is_lisp_pointer(obj) || !from_space_p(obj) ||
+ forwarding_pointer_p(native_pointer(obj)));
+}
+
+static inline int
+weak_hash_entry_alivep (lispobj weakness, lispobj key, lispobj value)
+{
+ switch (weakness) {
+ case KEY:
+ return survived_gc_yet(key);
+ case VALUE:
+ return survived_gc_yet(value);
+ case KEY_OR_VALUE:
+ return (survived_gc_yet(key) || survived_gc_yet(value));
+ case KEY_AND_VALUE:
+ return (survived_gc_yet(key) && survived_gc_yet(value));
+ default:
+ gc_assert(0);
+ /* Shut compiler up. */
+ return 0;
+ }
+}
+
+/* Return the beginning of data in ARRAY (skipping the header and the
+ * length) or NULL if it isn't an array of the specified widetag after
+ * all. */
+static inline lispobj *
+get_array_data (lispobj array, int widetag, unsigned long *length)
+{
+ if (is_lisp_pointer(array) &&
+ (widetag_of(*(lispobj *)native_pointer(array)) == widetag)) {
+ if (length != NULL)
+ *length = fixnum_value(((lispobj *)native_pointer(array))[1]);
+ return ((lispobj *)native_pointer(array)) + 2;
+ } else {
+ return NULL;
+ }
+}
+
+/* Only need to worry about scavenging the _real_ entries in the
+ * table. Phantom entries such as the hash table itself at index 0 and
+ * the empty marker at index 1 were scavenged by scav_vector that
+ * either called this function directly or arranged for it to be
+ * called later by pushing the hash table onto weak_hash_tables. */
+static void
+scav_hash_table_entries (struct hash_table *hash_table)
+{
+ lispobj *kv_vector;
+ unsigned long kv_length;
+ lispobj *index_vector;
+ unsigned long length;
+ lispobj *next_vector;
+ unsigned long next_vector_length;
+ lispobj *hash_vector;
+ unsigned long hash_vector_length;
+ lispobj empty_symbol;
+ lispobj weakness = hash_table->weakness;
+ long i;
+
+ kv_vector = get_array_data(hash_table->table,
+ SIMPLE_VECTOR_WIDETAG, &kv_length);
+ if (kv_vector == NULL)
+ lose("invalid kv_vector %x\n", hash_table->table);
+
+ index_vector = get_array_data(hash_table->index_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG, &length);
+ if (index_vector == NULL)
+ lose("invalid index_vector %x\n", hash_table->index_vector);
+
+ next_vector = get_array_data(hash_table->next_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG,
+ &next_vector_length);
+ if (next_vector == NULL)
+ lose("invalid next_vector %x\n", hash_table->next_vector);
+
+ hash_vector = get_array_data(hash_table->hash_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG,
+ &hash_vector_length);
+ if (hash_vector != NULL)
+ gc_assert(hash_vector_length == next_vector_length);
+
+ /* These lengths could be different as the index_vector can be a
+ * different length from the others, a larger index_vector could
+ * help reduce collisions. */
+ gc_assert(next_vector_length*2 == kv_length);
+
+ empty_symbol = kv_vector[1];
+ /* fprintf(stderr,"* empty_symbol = %x\n", empty_symbol);*/
+ if (widetag_of(*(lispobj *)native_pointer(empty_symbol)) !=
+ SYMBOL_HEADER_WIDETAG) {
+ lose("not a symbol where empty-hash-table-slot symbol expected: %x\n",
+ *(lispobj *)native_pointer(empty_symbol));
+ }
+
+ /* Work through the KV vector. */
+ for (i = 1; i < next_vector_length; i++) {
+ lispobj old_key = kv_vector[2*i];
+ lispobj value = kv_vector[2*i+1];
+ if ((weakness == NIL) ||
+ weak_hash_entry_alivep(weakness, old_key, value)) {
+
+ /* Scavenge the key and value. */
+ scavenge(&kv_vector[2*i],2);
+
+ /* Rehashing of EQ based keys. */
+ if ((!hash_vector) ||
+ (hash_vector[i] == MAGIC_HASH_VECTOR_VALUE)) {
+#ifndef LISP_FEATURE_GENCGC
+ /* For GENCGC scav_hash_table_entries only rehashes
+ * the entries whose keys were moved. Cheneygc always
+ * moves the objects so here we let the lisp side know
+ * that rehashing is needed for the whole table. */
+ *(kv_vector - 2) = (subtype_VectorMustRehash<<N_WIDETAG_BITS) |
+ SIMPLE_VECTOR_WIDETAG;
+#else
+ unsigned long old_index = EQ_HASH(old_key)%length;
+ lispobj new_key = kv_vector[2*i];
+ unsigned long new_index = EQ_HASH(new_key)%length;
+ /* Check whether the key has moved. */
+ if ((old_index != new_index) &&
+ (new_key != empty_symbol)) {
+ gc_assert(kv_vector[2*i+1] != empty_symbol);
+
+ /*FSHOW((stderr,
+ "* EQ key %d moved from %x to %x; index %d to %d\n",
+ i, old_key, new_key, old_index, new_index));*/
+
+ /* Unlink the key from the old_index chain. */
+ if (!index_vector[old_index]) {
+ /* It's not here, must be on the
+ * needing_rehash chain. */
+ } else if (index_vector[old_index] == i) {
+ /*FSHOW((stderr, "/P2a %d\n", next_vector[i]));*/
+ index_vector[old_index] = next_vector[i];
+ /* Link it into the needing rehash chain. */
+ next_vector[i] =
+ fixnum_value(hash_table->needing_rehash);
+ hash_table->needing_rehash = make_fixnum(i);
+ /*SHOW("P2");*/
+ } else {
+ unsigned long prior = index_vector[old_index];
+ unsigned long next = next_vector[prior];
+
+ /*FSHOW((stderr, "/P3a %d %d\n", prior, next));*/
+
+ while (next != 0) {
+ /*FSHOW((stderr, "/P3b %d %d\n", prior, next));*/
+ if (next == i) {
+ /* Unlink it. */
+ next_vector[prior] = next_vector[next];
+ /* Link it into the needing rehash
+ * chain. */
+ next_vector[next] =
+ fixnum_value(hash_table->needing_rehash);
+ hash_table->needing_rehash = make_fixnum(next);
+ /*SHOW("/P3");*/
+ break;
+ }
+ prior = next;
+ next = next_vector[next];
+ }
+ }
+ }
+#endif
+ }
+ }
+ }
+}
+
+long
+scav_vector (lispobj *where, lispobj object)
+{
+ unsigned long kv_length;
+ lispobj *kv_vector;
+ struct hash_table *hash_table;
+
+ /* SB-VM:VECTOR-VALID-HASHING-SUBTYPE is set for EQ-based and weak
+ * hash tables in the Lisp HASH-TABLE code to indicate need for
+ * special GC support. */
+ if (HeaderValue(object) == subtype_VectorNormal)
+ return 1;
+
+ kv_length = fixnum_value(where[1]);
+ kv_vector = where + 2; /* Skip the header and length. */
+ /*FSHOW((stderr,"/kv_length = %d\n", kv_length));*/
+
+ /* Scavenge element 0, which may be a hash-table structure. */
+ scavenge(where+2, 1);
+ if (!is_lisp_pointer(where[2])) {
+ lose("no pointer at %x in hash table\n", where[2]);
+ }
+ hash_table = (struct hash_table *)native_pointer(where[2]);
+ /*FSHOW((stderr,"/hash_table = %x\n", hash_table));*/
+ if (widetag_of(hash_table->header) != INSTANCE_HEADER_WIDETAG) {
+ lose("hash table not instance (%x at %x)\n",
+ hash_table->header,
+ hash_table);
+ }
+
+ /* Scavenge element 1, which should be some internal symbol that
+ * the hash table code reserves for marking empty slots. */
+ scavenge(where+3, 1);
+ if (!is_lisp_pointer(where[3])) {
+ lose("not empty-hash-table-slot symbol pointer: %x\n", where[3]);
+ }
+
+ /* Scavenge hash table, which will fix the positions of the other
+ * needed objects. */
+ scavenge((lispobj *)hash_table,
+ sizeof(struct hash_table) / sizeof(lispobj));
+
+ /* Cross-check the kv_vector. */
+ if (where != (lispobj *)native_pointer(hash_table->table)) {
+ lose("hash_table table!=this table %x\n", hash_table->table);
+ }
+
+ if (hash_table->weakness == NIL) {
+ scav_hash_table_entries(hash_table);
+ } else {
+ /* Delay scavenging of this table by pushing it onto
+ * weak_hash_tables (if it's not there already) for the weak
+ * object phase. */
+ if (hash_table->next_weak_hash_table == NIL) {
+ hash_table->next_weak_hash_table = (lispobj)weak_hash_tables;
+ weak_hash_tables = hash_table;
+ }
+ }
+
+ return (CEILING(kv_length + 2, 2));
+}
+
+void
+scav_weak_hash_tables (void)
+{
+ struct hash_table *table;
+
+ /* Scavenge entries whose triggers are known to survive. */
+ for (table = weak_hash_tables; table != NULL;
+ table = (struct hash_table *)table->next_weak_hash_table) {
+ scav_hash_table_entries(table);
+ }
+}
+
+/* Walk through the chain whose first element is *FIRST and remove
+ * dead weak entries. */
+static inline void
+scan_weak_hash_table_chain (struct hash_table *hash_table, lispobj *prev,
+ lispobj *kv_vector, lispobj *index_vector,
+ lispobj *next_vector, lispobj *hash_vector,
+ lispobj empty_symbol, lispobj weakness)
+{
+ unsigned index = *prev;
+ while (index) {
+ unsigned next = next_vector[index];
+ lispobj key = kv_vector[2 * index];
+ lispobj value = kv_vector[2 * index + 1];
+ gc_assert(key != empty_symbol);
+ gc_assert(value != empty_symbol);
+ if (!weak_hash_entry_alivep(weakness, key, value)) {
+ unsigned count = fixnum_value(hash_table->number_entries);
+ gc_assert(count > 0);
+ *prev = next;
+ hash_table->number_entries = make_fixnum(count - 1);
+ next_vector[index] = fixnum_value(hash_table->next_free_kv);
+ hash_table->next_free_kv = make_fixnum(index);
+ kv_vector[2 * index] = empty_symbol;
+ kv_vector[2 * index + 1] = empty_symbol;
+ if (hash_vector)
+ hash_vector[index] = MAGIC_HASH_VECTOR_VALUE;
+ } else {
+ prev = &next_vector[index];
+ }
+ index = next;
+ }
+}
+
+static void
+scan_weak_hash_table (struct hash_table *hash_table)
+{
+ lispobj *kv_vector;
+ lispobj *index_vector;
+ unsigned long length = 0; /* prevent warning */
+ lispobj *next_vector;
+ unsigned long next_vector_length = 0; /* prevent warning */
+ lispobj *hash_vector;
+ lispobj empty_symbol;
+ lispobj weakness = hash_table->weakness;
+ long i;
+
+ kv_vector = get_array_data(hash_table->table,
+ SIMPLE_VECTOR_WIDETAG, NULL);
+ index_vector = get_array_data(hash_table->index_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG, &length);
+ next_vector = get_array_data(hash_table->next_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG,
+ &next_vector_length);
+ hash_vector = get_array_data(hash_table->hash_vector,
+ SIMPLE_ARRAY_WORD_WIDETAG, NULL);
+ empty_symbol = kv_vector[1];
+
+ for (i = 0; i < length; i++) {
+ scan_weak_hash_table_chain(hash_table, &index_vector[i],
+ kv_vector, index_vector, next_vector,
+ hash_vector, empty_symbol, weakness);
+ }
+ {
+ lispobj first = fixnum_value(hash_table->needing_rehash);
+ scan_weak_hash_table_chain(hash_table, &first,
+ kv_vector, index_vector, next_vector,
+ hash_vector, empty_symbol, weakness);
+ hash_table->needing_rehash = make_fixnum(first);
+ }
+}
+
+/* Remove dead entries from weak hash tables. */
+void
+scan_weak_hash_tables (void)
+{
+ struct hash_table *table, *next;
+
+ for (table = weak_hash_tables; table != NULL; table = next) {
+ next = (struct hash_table *)table->next_weak_hash_table;
+ table->next_weak_hash_table = NIL;
+ scan_weak_hash_table(table);
+ }
+
+ weak_hash_tables = NULL;
+}
\f
/*
#else
scavtab[FDEFN_WIDETAG] = scav_fdefn;
#endif
+ scavtab[SIMPLE_VECTOR_WIDETAG] = scav_vector;
/* transport other table, initialized same way as scavtab */
for (i = 0; i < ((sizeof transother)/(sizeof transother[0])); i++)
extern long (*sizetab[256])(lispobj *where);
extern struct weak_pointer *weak_pointers; /* in gc-common.c */
+extern struct hash_table *weak_hash_tables; /* in gc-common.c */
extern void scavenge(lispobj *start, long n_words);
extern void scavenge_interrupt_contexts(void);
+extern void scav_weak_hash_tables(void);
+extern void scan_weak_hash_tables(void);
extern void scan_weak_pointers(void);
lispobj copy_large_unboxed_object(lispobj object, long nwords);
#include "cheneygc-internal.h"
#endif
+#if N_WORD_BITS == 32
+# define SIMPLE_ARRAY_WORD_WIDETAG SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG
+#elif N_WORD_BITS == 64
+# define SIMPLE_ARRAY_WORD_WIDETAG SIMPLE_ARRAY_UNSIGNED_BYTE_64_WIDETAG
+#endif
+
#endif /* _GC_INTERNAL_H_ */
* is needed. */
static void *heap_base = NULL;
-#if N_WORD_BITS == 32
- #define SIMPLE_ARRAY_WORD_WIDETAG SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG
-#elif N_WORD_BITS == 64
- #define SIMPLE_ARRAY_WORD_WIDETAG SIMPLE_ARRAY_UNSIGNED_BYTE_64_WIDETAG
-#endif
-
/* Calculate the start address for the given page number. */
inline void *
page_address(page_index_t page_num)
\f
/*
- * vector-like objects
- */
-
-#if defined(LISP_FEATURE_X86) || defined(LISP_FEATURE_X86_64)
-
-#if N_WORD_BITS == 32
-#define EQ_HASH_MASK 0x1fffffff
-#elif N_WORD_BITS == 64
-#define EQ_HASH_MASK 0x1fffffffffffffff
-#endif
-
-/* Compute the EQ-hash of KEY. This must match POINTER-HASH in
- * target-hash-table.lisp. */
-#define EQ_HASH(key) ((key) & EQ_HASH_MASK)
-
-/* Return the beginning of data in ARRAY (skipping the header and the
- * length) or NULL if it isn't an array of the specified widetag after
- * all. */
-static inline lispobj *
-get_array_data (lispobj array, int widetag, unsigned long *length)
-{
- if (is_lisp_pointer(array) &&
- (widetag_of(*(lispobj *)native_pointer(array)) == widetag)) {
- if (length != NULL)
- *length = fixnum_value(((lispobj *)native_pointer(array))[1]);
- return ((lispobj *)native_pointer(array)) + 2;
- } else {
- return NULL;
- }
-}
-
-/* Only need to worry about scavenging the _real_ entries in the
- * table. Phantom entries such as the hash table itself at index 0 and
- * the empty marker at index 1 were scavenged by scav_vector that
- * either called this function directly or arranged for it to be
- * called later by pushing the hash table onto weak_hash_tables. */
-static void
-scav_hash_table_entries (struct hash_table *hash_table)
-{
- lispobj *kv_vector;
- unsigned long kv_length;
- lispobj *index_vector;
- unsigned long length;
- lispobj *next_vector;
- unsigned long next_vector_length;
- lispobj *hash_vector;
- unsigned long hash_vector_length;
- lispobj empty_symbol;
- long i;
-
- kv_vector = get_array_data(hash_table->table,
- SIMPLE_VECTOR_WIDETAG, &kv_length);
- if (kv_vector == NULL)
- lose("invalid kv_vector %x\n", hash_table->table);
-
- index_vector = get_array_data(hash_table->index_vector,
- SIMPLE_ARRAY_WORD_WIDETAG, &length);
- if (index_vector == NULL)
- lose("invalid index_vector %x\n", hash_table->index_vector);
-
- next_vector = get_array_data(hash_table->next_vector,
- SIMPLE_ARRAY_WORD_WIDETAG,
- &next_vector_length);
- if (next_vector == NULL)
- lose("invalid next_vector %x\n", hash_table->next_vector);
-
- hash_vector = get_array_data(hash_table->hash_vector,
- SIMPLE_ARRAY_WORD_WIDETAG,
- &hash_vector_length);
- if (hash_vector != NULL)
- gc_assert(hash_vector_length == next_vector_length);
-
- /* These lengths could be different as the index_vector can be a
- * different length from the others, a larger index_vector could help
- * reduce collisions. */
- gc_assert(next_vector_length*2 == kv_length);
-
- empty_symbol = kv_vector[1];
- /* fprintf(stderr,"* empty_symbol = %x\n", empty_symbol);*/
- if (widetag_of(*(lispobj *)native_pointer(empty_symbol)) !=
- SYMBOL_HEADER_WIDETAG) {
- lose("not a symbol where empty-hash-table-slot symbol expected: %x\n",
- *(lispobj *)native_pointer(empty_symbol));
- }
-
- /* Work through the KV vector. */
- for (i = 1; i < next_vector_length; i++) {
- lispobj old_key = kv_vector[2*i];
- {
- unsigned long old_index = EQ_HASH(old_key)%length;
- lispobj new_key;
- unsigned long new_index;
-
- /* Scavenge the key and value. */
- scavenge(&kv_vector[2*i],2);
-
- /* Check whether the key has moved and is EQ based. */
- new_key = kv_vector[2*i];
- new_index = EQ_HASH(new_key)%length;
-
- if ((old_index != new_index) &&
- ((!hash_vector) ||
- (hash_vector[i] == MAGIC_HASH_VECTOR_VALUE)) &&
- ((new_key != empty_symbol) ||
- (kv_vector[2*i+1] != empty_symbol))) {
-
- /*FSHOW((stderr,
- "* EQ key %d moved from %x to %x; index %d to %d\n",
- i, old_key, new_key, old_index, new_index));*/
-
- if (index_vector[old_index] != 0) {
- /*FSHOW((stderr, "/P1 %d\n", index_vector[old_index]));*/
-
- /* Unlink the key from the old_index chain. */
- if (index_vector[old_index] == i) {
- /*FSHOW((stderr, "/P2a %d\n", next_vector[i]));*/
- index_vector[old_index] = next_vector[i];
- /* Link it into the needing rehash chain. */
- next_vector[i] =
- fixnum_value(hash_table->needing_rehash);
- hash_table->needing_rehash = make_fixnum(i);
- /*SHOW("P2");*/
- } else {
- unsigned long prior = index_vector[old_index];
- unsigned long next = next_vector[prior];
-
- /*FSHOW((stderr, "/P3a %d %d\n", prior, next));*/
-
- while (next != 0) {
- /*FSHOW((stderr, "/P3b %d %d\n", prior, next));*/
- if (next == i) {
- /* Unlink it. */
- next_vector[prior] = next_vector[next];
- /* Link it into the needing rehash
- * chain. */
- next_vector[next] =
- fixnum_value(hash_table->needing_rehash);
- hash_table->needing_rehash = make_fixnum(next);
- /*SHOW("/P3");*/
- break;
- }
- prior = next;
- next = next_vector[next];
- }
- }
- }
- }
- }
- }
-}
-
-static long
-scav_vector(lispobj *where, lispobj object)
-{
- unsigned long kv_length;
- lispobj *kv_vector;
- struct hash_table *hash_table;
-
- /* FIXME: A comment explaining this would be nice. It looks as
- * though SB-VM:VECTOR-VALID-HASHING-SUBTYPE is set for EQ-based
- * hash tables in the Lisp HASH-TABLE code, and nowhere else. */
- if (HeaderValue(object) != subtype_VectorValidHashing)
- return 1;
-
- kv_length = fixnum_value(where[1]);
- kv_vector = where + 2; /* Skip the header and length. */
- /*FSHOW((stderr,"/kv_length = %d\n", kv_length));*/
-
- /* Scavenge element 0, which may be a hash-table structure. */
- scavenge(where+2, 1);
- if (!is_lisp_pointer(where[2])) {
- lose("no pointer at %x in hash table\n", where[2]);
- }
- hash_table = (struct hash_table *)native_pointer(where[2]);
- /*FSHOW((stderr,"/hash_table = %x\n", hash_table));*/
- if (widetag_of(hash_table->header) != INSTANCE_HEADER_WIDETAG) {
- lose("hash table not instance (%x at %x)\n",
- hash_table->header,
- hash_table);
- }
-
- /* Scavenge element 1, which should be some internal symbol that
- * the hash table code reserves for marking empty slots. */
- scavenge(where+3, 1);
- if (!is_lisp_pointer(where[3])) {
- lose("not empty-hash-table-slot symbol pointer: %x\n", where[3]);
- }
-
- /* Scavenge hash table, which will fix the positions of the other
- * needed objects. */
- scavenge((lispobj *)hash_table,
- sizeof(struct hash_table) / sizeof(lispobj));
-
- /* Cross-check the kv_vector. */
- if (where != (lispobj *)native_pointer(hash_table->table)) {
- lose("hash_table table!=this table %x\n", hash_table->table);
- }
-
- scav_hash_table_entries(hash_table);
-
- return (CEILING(kv_length + 2, 2));
-}
-
-#else
-
-static long
-scav_vector(lispobj *where, lispobj object)
-{
- if (HeaderValue(object) == subtype_VectorValidHashing) {
- *where =
- (subtype_VectorMustRehash<<N_WIDETAG_BITS) | SIMPLE_VECTOR_WIDETAG;
- }
- return 1;
-}
-
-#endif
-
-\f
-/*
* Lutexes. Using the normal finalization machinery for finalizing
* lutexes is tricky, since the finalization depends on working lutexes.
* So we track the lutexes in the GC and finalize them manually.
/* Record all new areas now. */
record_new_objects = 2;
+ /* Give a chance to weak hash tables to make other objects live.
+ * FIXME: The algorithm implemented here for weak hash table gcing
+ * is O(W^2+N) as Bruno Haible warns in
+ * http://www.haible.de/bruno/papers/cs/weak/WeakDatastructures-writeup.html
+ * see "Implementation 2". */
+ scav_weak_hash_tables();
+
/* Flush the current regions updating the tables. */
gc_alloc_update_all_page_tables();
/* Record all new areas now. */
record_new_objects = 2;
+ scav_weak_hash_tables();
+
/* Flush the current regions updating the tables. */
gc_alloc_update_all_page_tables();
scavenge(page_address(page)+offset, size);
}
+ scav_weak_hash_tables();
+
/* Flush the current regions updating the tables. */
gc_alloc_update_all_page_tables();
}
/* The oldest generation can't be raised. */
gc_assert((generation != HIGHEST_NORMAL_GENERATION) || (raise == 0));
+ /* Check if weak hash tables were processed in the previous GC. */
+ gc_assert(weak_hash_tables == NULL);
+
/* Initialize the weak pointer list. */
weak_pointers = NULL;
}
#endif
+ scan_weak_hash_tables();
scan_weak_pointers();
/* Flush the current regions, updating the tables. */
page_index_t i;
gc_init_tables();
- scavtab[SIMPLE_VECTOR_WIDETAG] = scav_vector;
scavtab[WEAK_POINTER_WIDETAG] = scav_weak_pointer;
transother[SIMPLE_ARRAY_WIDETAG] = trans_boxed_large;
;;; overconfident primitive type computation leading to bogus type
;;; checking.
-(let* ((form1 '(lambda (x)
- (declare (type (and condition function) x))
+(let* ((form1 '(lambda (x)
+ (declare (type (and condition function) x))
x))
(fun1 (compile nil form1))
- (form2 '(lambda (x)
- (declare (type (and standard-object function) x))
+ (form2 '(lambda (x)
+ (declare (type (and standard-object function) x))
x))
(fun2 (compile nil form2)))
(assert (raises-error? (funcall fun1 (make-condition 'error))))
(in-package :cl-user)
+(use-package :test-util)
+(use-package :assertoid)
+
(defstruct foo)
(defstruct bar x y)
(,fun x)))
nil))))
+;;; This test works reliably on non-conservative platforms and
+;;; somewhat reliably on conservative platforms with threads.
+#+(or (not (or x86 x86-64)) sb-thread)
+(progn
+
+(defparameter *ht* nil)
+
+(defvar *cons-here*)
+
+(defmacro alloc (&body body)
+ "Execute BODY and try to reduce the chance of leaking a conservative root."
+ #-sb-thread
+ `(multiple-value-prog1
+ (progn ,@body)
+ (loop repeat 20000 do (setq *cons-here* (cons nil nil)))
+ ;; KLUDGE: Clean the argument passing regs.
+ (apply #'take (loop repeat 36 collect #'cons)))
+ #+sb-thread
+ (let ((values (gensym))
+ (sem (gensym)))
+ `(let ((,sem (sb-thread::make-semaphore))
+ ,values)
+ (sb-thread:make-thread (lambda ()
+ (setq ,values
+ (multiple-value-list (progn ,@body)))
+ (sb-thread::signal-semaphore ,sem)))
+ (sb-thread::wait-on-semaphore ,sem)
+ (values-list ,values))))
+
+(with-test (:name (:hash-table :weakness :eql :numbers))
+ (flet ((random-number ()
+ (random 1000)))
+ (loop for weakness in '(nil :key :value :key-and-value :key-or-value) do
+ (let* ((ht (make-hash-table :weakness weakness))
+ (n (alloc (loop repeat 1000
+ count (let ((key (random-number)))
+ (if (gethash key ht)
+ (setf (gethash key ht)
+ (random-number))))))))
+ (gc :full t)
+ (gc :full t)
+ (assert (= n (hash-table-count ht)))))))
+
+(defun take (&rest args)
+ (declare (ignore args)))
+
+(defun add-removable-stuff (ht &key (n 100) (size 10))
+ (flet ((unique-object ()
+ (make-array size :fill-pointer 0)))
+ (loop for i below n do
+ (multiple-value-bind (key value)
+ (ecase (hash-table-weakness ht)
+ ((:key) (values (unique-object) i))
+ ((:value) (values i (unique-object)))
+ ((:key-and-value)
+ (if (zerop (random 2))
+ (values (unique-object) i)
+ (values i (unique-object))))
+ ((:key-or-value)
+ (values (unique-object) (unique-object))))
+ (setf (gethash key ht) value)))
+ (values)))
+
+(defun print-ht (ht &optional (stream t))
+ (format stream "Weakness: ~S~%" (sb-impl::hash-table-weakness ht))
+ (format stream "Table: ~S~%" (sb-impl::hash-table-table ht))
+ (format stream "Next: ~S~%" (sb-impl::hash-table-next-vector ht))
+ (format stream "Index: ~S~%" (sb-impl::hash-table-index-vector ht))
+ (format stream "Hash: ~S~%" (sb-impl::hash-table-hash-vector ht))
+ (force-output stream))
+
+(with-test (:name (:hash-table :weakness :removal))
+ (loop for test in '(eq eql equal equalp) do
+ (format t "test: ~A~%" test)
+ (loop for weakness in '(:key :value :key-and-value :key-or-value)
+ do
+ (format t "weakness: ~A~%" weakness)
+ (let ((ht (make-hash-table :test 'equal :weakness weakness)))
+ (alloc (add-removable-stuff ht :n 117 :size 1))
+ (loop for i upfrom 0
+ do (format t "~A. count: ~A~%" i (hash-table-count ht))
+ (force-output)
+ until (zerop (hash-table-count ht))
+ do
+ (when (= i 10)
+ (print-ht ht)
+ #-(or x86 x86-64)
+ (assert nil)
+ ;; With conservative gc the test may not be
+ ;; bullet-proof so it's not an outright
+ ;; failure but a warning.
+ #+(or x86 x86-64)
+ (progn
+ (warn "Weak hash removal test failed for weakness ~A"
+ weakness)
+ (return)))
+ (gc :full t))))))
+
+(with-test (:name (:hash-table :weakness :string-interning))
+ (let ((ht (make-hash-table :test 'equal :weakness :key))
+ (s "a"))
+ (setf (gethash s ht) s)
+ (assert (eq (gethash s ht) s))
+ (assert (eq (gethash (copy-seq s) ht) s))))
+
+;;; see if hash_vector is not written when there is none ...
+(with-test (:name (:hash-table :weakness :eq))
+ (loop repeat 10 do
+ (let ((index (random 2000)))
+ (let ((first (+ most-positive-fixnum (mod (* index 31) 9)))
+ (n 50000))
+ (let ((hash-table (make-hash-table :weakness :key :test 'eq)))
+ (dotimes (i n)
+ (setf (gethash (+ first i) hash-table) i))
+ hash-table)))))
+
+;; used to crash in gc
+(with-test (:name (:hash-table :weakness :keep))
+ (loop repeat 2 do
+ (let ((h1 (make-hash-table :weakness :key :test #'equal))
+ (keep ()))
+ (loop for i from 0 to 1000
+ for key = i
+ for value = (make-array 10000 :fill-pointer 0)
+ do
+ (push value keep)
+ (setf (gethash key h1) value))
+ (sb-ext:gc :full t))))
+
+)
+
;;; success
(defvar *reprs*)
(declaim (type simple-vector *reprs*))
+(defun random-element (seq)
+ (elt seq (random (length seq))))
+
(defun repr (i)
(declare (type fixnum i))
(let ((result (svref *reprs* (mod i (length *reprs*)))))
|#
hash-table))))
+(defun repr-weak-key-hash-table (index &optional (value nil value-p))
+ (let ((first (+ most-positive-fixnum (mod (* index 31) 9)))
+ (n 5))
+ (if value-p
+ (and (hash-table-p value)
+ (<= (hash-table-count value) n)
+ (dotimes (i n t)
+ (let ((x (gethash (+ i first) value)))
+ (unless (or (null x) (= x i))
+ (return nil)))))
+ (let ((hash-table (make-hash-table
+ :weakness :key
+ :test (random-element '(eq eql equal equalp)))))
+ (dotimes (i n)
+ (setf (gethash (+ first i) hash-table) i))
+ hash-table))))
+
(defun repr-bignum (index &optional (value nil value-p))
(let ((bignum (+ index 10000300020)))
(if value-p
#'repr-function
#'repr-instance
#'repr-eql-hash-table
+ #'repr-weak-key-hash-table
#|
#'repr-equal-hash-table
#'repr-equalp-hash-table
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.16.31"
+"0.9.16.32"