X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-hash-table.lisp;h=20113045045778a5ec06952221bc0b3194c33dae;hb=e94fe1bcf814af45ca9eeb4721df17c58afa4d76;hp=51d6537cd521c01d0922ab9dfc3b8e465cc895d5;hpb=8f7210321a2abb703ceeb3f203e86d5e0d9412b0;p=sbcl.git diff --git a/src/code/target-hash-table.lisp b/src/code/target-hash-table.lisp index 51d6537..2011304 100644 --- a/src/code/target-hash-table.lisp +++ b/src/code/target-hash-table.lisp @@ -14,26 +14,22 @@ ;;;; utilities -;; This stuff is performance critical and unwind-protect is too -;; slow. And without the locking the next vector can get cyclic -;; causing looping in a WITHOUT-GCING form, SHRINK-VECTOR can corrupt -;; memory and who knows what else. +;;; Without the locking the next vector can get cyclic causing +;;; looping in a WITHOUT-GCING form, SHRINK-VECTOR can corrupt memory +;;; and who knows what else. +;;; +;;; WITHOUT-GCING implies WITHOUT-INTERRUPTS. (defmacro with-spinlock-and-without-gcing ((spinlock) &body body) #!-sb-thread (declare (ignore spinlock)) - (with-unique-names (old-gc-inhibit) - `(let ((,old-gc-inhibit *gc-inhibit*) - (*gc-inhibit* t)) + `(without-gcing (unwind-protect (progn #!+sb-thread (sb!thread::get-spinlock ,spinlock) ,@body) #!+sb-thread - (sb!thread::release-spinlock ,spinlock) - (let ((*gc-inhibit* ,old-gc-inhibit)) - ;; the test is racy, but it can err only on the overeager side - (sb!kernel::maybe-handle-pending-gc)))))) + (sb!thread::release-spinlock ,spinlock)))) (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant max-hash sb!xc:most-positive-fixnum)) @@ -60,7 +56,16 @@ #!-sb-fluid (declaim (inline equal-hash)) (defun equal-hash (key) (declare (values hash (member t nil))) - (values (sxhash key) nil)) + (typecase key + ;; For some types the definition of EQUAL implies a special hash + ((or string cons number bit-vector pathname) + (values (sxhash key) nil)) + ;; Otherwise use an EQ hash, rather than SXHASH, since the values + ;; of SXHASH will be extremely badly distributed due to the + ;; requirements of the spec fitting badly with our implementation + ;; strategy. + (t + (eq-hash key)))) #!-sb-fluid (declaim (inline eql-hash)) (defun eql-hash (key) @@ -71,7 +76,13 @@ (defun equalp-hash (key) (declare (values hash (member t nil))) - (values (psxhash key) nil)) + (typecase key + ;; Types requiring special treatment. Note that PATHNAME and + ;; HASH-TABLE are caught by the STRUCTURE-OBJECT test. + ((or array cons number character structure-object) + (values (psxhash key) nil)) + (t + (eq-hash key)))) (defun almost-primify (num) (declare (type index num)) @@ -105,10 +116,10 @@ (defconstant +min-hash-table-rehash-threshold+ (float 1/16 1.0)) (defun make-hash-table (&key (test 'eql) - (size +min-hash-table-size+) - (rehash-size 1.5) - (rehash-threshold 1) - (weak-p nil)) + (size +min-hash-table-size+) + (rehash-size 1.5) + (rehash-threshold 1) + (weakness nil)) #!+sb-doc "Create and return a new hash table. The keywords are as follows: :TEST -- Indicates what kind of test to use. @@ -122,13 +133,20 @@ forcing a rehash. Can be any positive number <=1, with density approaching zero as the threshold approaches 0. Density 1 means an average of one entry per bucket. - :WEAK-P -- (This is an extension from CMU CL, not currently supported - in SBCL 0.6.6, but perhaps supported in a future version.) If T, - don't keep entries if the key would otherwise be garbage." + :WEAKNESS -- IF NIL (the default) it is a normal non-weak hash table. + If one of :KEY, :VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE it is a weak + hash table. + Depending on the type of weakness the lack of references to the + key and the value may allow for removal of the entry. If WEAKNESS + is :KEY and the key would otherwise be garbage the entry is eligible + for removal from the hash table. Similarly, if WEAKNESS is :VALUE + the life of an entry depends on its value's references. If WEAKNESS + is :KEY-AND-VALUE and either the key or the value would otherwise be + garbage the entry can be removed. If WEAKNESS is :KEY-OR-VALUE and + both the key and the value would otherwise be garbage the entry can + be removed." (declare (type (or function symbol) test)) (declare (type unsigned-byte size)) - (when weak-p - (error "stub: unsupported WEAK-P option")) (multiple-value-bind (test test-fun hash-fun) (cond ((or (eq test #'eq) (eq test 'eq)) (values 'eq #'eq #'eq-hash)) @@ -143,8 +161,8 @@ ;; Failing that, I'd like to rename it to ;; *USER-HASH-TABLE-TESTS*. (dolist (info *hash-table-tests* - (error "unknown :TEST for MAKE-HASH-TABLE: ~S" - test)) + (error "unknown :TEST for MAKE-HASH-TABLE: ~S" + test)) (destructuring-bind (test-name test-fun hash-fun) info (when (or (eq test test-name) (eq test test-fun)) (return (values test-name test-fun hash-fun))))))) @@ -162,7 +180,7 @@ ;; boxing. (rehash-threshold (max +min-hash-table-rehash-threshold+ (float rehash-threshold 1.0))) - (size+1 (1+ size)) ; The first element is not usable. + (size+1 (1+ size)) ; The first element is not usable. ;; KLUDGE: The most natural way of expressing the below is ;; (round (/ (float size+1) rehash-threshold)), and indeed ;; it was expressed like that until 0.7.0. However, @@ -181,8 +199,9 @@ :element-type '(unsigned-byte #.sb!vm:n-word-bits) :initial-element 0)) - ;; needs to be the same length as the KV vector - ;; (FIXME: really? why doesn't the code agree?) + ;; Needs to be the half the length of the KV vector to link + ;; KV entries - mapped to indeces at 2i and 2i+1 - + ;; together. (next-vector (make-array size+1 :element-type '(unsigned-byte #.sb!vm:n-word-bits))) @@ -196,7 +215,7 @@ :rehash-threshold rehash-threshold :rehash-trigger size :table kv-vector - :weak-p weak-p + :weakness weakness :index-vector index-vector :next-vector next-vector :hash-vector @@ -244,9 +263,9 @@ "Return the test HASH-TABLE was created with.") #!+sb-doc -(setf (fdocumentation 'hash-table-weak-p 'function) - "Return T if HASH-TABLE will not keep entries for keys that would - otherwise be garbage, and NIL if it will.") +(setf (fdocumentation 'hash-table-weakness 'function) + "Return the WEAKNESS of HASH-TABLE which is one of NIL, :KEY, +:VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE.") ;;;; accessing functions @@ -279,7 +298,7 @@ (old-index-vector (hash-table-index-vector table)) (new-length (almost-primify (truncate (/ (float new-size) - (hash-table-rehash-threshold table))))) + (hash-table-rehash-threshold table))))) (new-index-vector (make-array new-length :element-type '(unsigned-byte #.sb!vm:n-word-bits) @@ -289,6 +308,10 @@ ;; Disable GC tricks on the OLD-KV-VECTOR. (set-header-data old-kv-vector sb!vm:vector-normal-subtype) + ;; Non-empty weak hash tables always need GC support. + (when (and (hash-table-weakness table) (plusp (hash-table-count table))) + (set-header-data new-kv-vector sb!vm:vector-valid-hashing-subtype)) + ;; FIXME: here and in several other places in the hash table code, ;; loops like this one are used when FILL or REPLACE would be ;; appropriate. why are standard CL functions not used? @@ -311,6 +334,7 @@ ;; the chains are first to last. (do ((i (1- new-size) (1- i))) ((zerop i)) + (declare (type index/2 i)) (let ((key (aref new-kv-vector (* 2 i))) (value (aref new-kv-vector (1+ (* 2 i))))) (cond ((and (eq key +empty-ht-slot+) @@ -368,9 +392,11 @@ (length (length index-vector))) (declare (type index size length)) - ;; Disable GC tricks, they will be re-enabled during the re-hash - ;; if necesary. - (set-header-data kv-vector sb!vm:vector-normal-subtype) + ;; Non-empty weak hash tables always need GC support. + (unless (and (hash-table-weakness table) (plusp (hash-table-count table))) + ;; Disable GC tricks, they will be re-enabled during the re-hash + ;; if necessary. + (set-header-data kv-vector sb!vm:vector-normal-subtype)) ;; Rehash all the entries. (setf (hash-table-next-free-kv table) 0) @@ -381,6 +407,7 @@ (setf (aref index-vector i) 0)) (do ((i (1- size) (1- i))) ((zerop i)) + (declare (type index/2 i)) (let ((key (aref kv-vector (* 2 i))) (value (aref kv-vector (1+ (* 2 i))))) (cond ((and (eq key +empty-ht-slot+) @@ -419,7 +446,7 @@ (length (length index-vector))) (do ((next (hash-table-needing-rehash table))) ((zerop next)) - (declare (type index next)) + (declare (type index/2 next)) (let* ((key (aref kv-vector (* 2 next))) (hashing (pointer-hash key)) (index (rem hashing length)) @@ -481,13 +508,13 @@ (if (or eq-based (not hash-vector)) (do ((next next (aref next-vector next))) ((zerop next) (values default nil)) - (declare (type index next)) + (declare (type index/2 next)) (when (eq key (aref table (* 2 next))) (setf (hash-table-cache hash-table) (* 2 next)) (return (values (aref table (1+ (* 2 next))) t)))) (do ((next next (aref next-vector next))) ((zerop next) (values default nil)) - (declare (type index next)) + (declare (type index/2 next)) (when (and (= hashing (aref hash-vector next)) (funcall test-fun key (aref table (* 2 next)))) ;; Found. @@ -535,8 +562,9 @@ (next-vector (hash-table-next-vector hash-table)) (hash-vector (hash-table-hash-vector hash-table)) (test-fun (hash-table-test-fun hash-table))) - (declare (type index index)) - + (declare (type index index next)) + (when (hash-table-weakness hash-table) + (set-header-data kv-vector sb!vm:vector-valid-hashing-subtype)) (cond ((or eq-based (not hash-vector)) (when eq-based (set-header-data kv-vector @@ -545,7 +573,7 @@ ;; Search next-vector chain for a matching key. (do ((next next (aref next-vector next))) ((zerop next)) - (declare (type index next)) + (declare (type index/2 next)) (when (eq key (aref kv-vector (* 2 next))) ;; Found, just replace the value. (setf (hash-table-cache hash-table) (* 2 next)) @@ -555,7 +583,7 @@ ;; Search next-vector chain for a matching key. (do ((next next (aref next-vector next))) ((zerop next)) - (declare (type index next)) + (declare (type index/2 next)) (when (and (= hashing (aref hash-vector next)) (funcall test-fun key (aref kv-vector (* 2 next)))) @@ -566,6 +594,7 @@ ;; Pop a KV slot off the free list (let ((free-kv-slot (hash-table-next-free-kv hash-table))) + (declare (type index/2 free-kv-slot)) ;; Double-check for overflow. (aver (not (zerop free-kv-slot))) (setf (hash-table-next-free-kv hash-table) @@ -618,8 +647,10 @@ (next-vector (hash-table-next-vector hash-table)) (hash-vector (hash-table-hash-vector hash-table)) (test-fun (hash-table-test-fun hash-table))) - (declare (type index index next)) + (declare (type index index) + (type index/2 next)) (flet ((clear-slot (chain-vector prior-slot-location slot-location) + (declare (type index/2 slot-location)) ;; Mark slot as empty. (setf (aref table (* 2 slot-location)) +empty-ht-slot+ (aref table (1+ (* 2 slot-location))) +empty-ht-slot+) @@ -656,7 +687,7 @@ (do ((prior next next) (next (aref next-vector next) (aref next-vector next))) ((zerop next) nil) - (declare (type index next)) + (declare (type index/2 next)) (when (and (= hashing (aref hash-vector next)) (funcall test-fun key (aref table (* 2 next)))) (return-from remhash @@ -682,7 +713,7 @@ ;; Set up the free list, all free. (do ((i 1 (1+ i))) ((>= i (1- size))) - (setf (aref next-vector i) (1+ i))) + (setf (aref next-vector i) (1+ i))) (setf (aref next-vector (1- size)) 0) (setf (hash-table-next-free-kv hash-table) 1) (setf (hash-table-needing-rehash hash-table) 0) @@ -706,19 +737,25 @@ (declaim (inline maphash)) (defun maphash (function-designator hash-table) #!+sb-doc - "For each entry in HASH-TABLE, call the designated two-argument function - on the key and value of the entry. Return NIL." + "For each entry in HASH-TABLE, call the designated two-argument function on +the key and value of the entry. Return NIL." + ;; This essentially duplicates WITH-HASH-TABLE-ITERATOR, so + ;; any changes here should be reflected there as well. (let ((fun (%coerce-callable-to-fun function-designator)) (size (length (hash-table-next-vector hash-table)))) (declare (type function fun)) (do ((i 1 (1+ i))) ((>= i size)) - (declare (type index i)) + (declare (type index/2 i)) (let* ((kv-vector (hash-table-table hash-table)) (key (aref kv-vector (* 2 i))) (value (aref kv-vector (1+ (* 2 i))))) - (unless (and (eq key +empty-ht-slot+) - (eq value +empty-ht-slot+)) + ;; We are running without locking or WITHOUT-GCING. For a weak + ;; :VALUE hash table it's possible that the GC hit after KEY + ;; was read and now the entry is gone. So check if either the + ;; key or the value is empty. + (unless (or (eq key +empty-ht-slot+) + (eq value +empty-ht-slot+)) (funcall fun key value)))))) ;;;; methods on HASH-TABLE @@ -726,16 +763,11 @@ ;;; Return a list of keyword args and values to use for MAKE-HASH-TABLE ;;; when reconstructing HASH-TABLE. (defun %hash-table-ctor-args (hash-table) - (when (hash-table-weak-p hash-table) - ;; FIXME: This might actually work with no trouble, but as of - ;; sbcl-0.6.12.10 when this code was written, weak hash tables - ;; weren't working yet, so I couldn't test it. When weak hash - ;; tables are supported again, this should be fixed. - (error "can't dump weak hash tables readably")) ; defensive programming.. `(:test ',(hash-table-test hash-table) :size ',(hash-table-size hash-table) :rehash-size ',(hash-table-rehash-size hash-table) - :rehash-threshold ',(hash-table-rehash-threshold hash-table))) + :rehash-threshold ',(hash-table-rehash-threshold hash-table) + :weakness ',(hash-table-weakness hash-table))) ;;; Return an association list representing the same data as HASH-TABLE. (defun %hash-table-alist (hash-table)