X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-hash-table.lisp;h=8947388e22dba2dc75559eb43b386fb2aab1ac8b;hb=25fe91bf63fd473d9316675b0e0ca9be0079e9eb;hp=67ef6779a63410ada84983748b46a265ea7f1d6e;hpb=92c8db80e039f60623e53a0b9355cf0a9ec49f3d;p=sbcl.git diff --git a/src/code/target-hash-table.lisp b/src/code/target-hash-table.lisp index 67ef677..8947388 100644 --- a/src/code/target-hash-table.lisp +++ b/src/code/target-hash-table.lisp @@ -14,6 +14,27 @@ ;;;; utilities +;; This stuff is performance critical and unwind-protect is too +;; slow. And without the locking the next vector can get cyclic +;; causing looping in a WITHOUT-GCING form, SHRINK-VECTOR can corrupt +;; memory and who knows what else. +(defmacro with-spinlock-and-without-gcing ((spinlock) &body body) + #!-sb-thread + (declare (ignore spinlock)) + (with-unique-names (old-gc-inhibit) + `(let ((,old-gc-inhibit *gc-inhibit*) + (*gc-inhibit* t)) + (unwind-protect + (progn + #!+sb-thread + (sb!thread::get-spinlock ,spinlock) + ,@body) + #!+sb-thread + (sb!thread::release-spinlock ,spinlock) + (let ((*gc-inhibit* ,old-gc-inhibit)) + ;; the test is racy, but it can err only on the overeager side + (sb!kernel::maybe-handle-pending-gc)))))) + (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant max-hash sb!xc:most-positive-fixnum)) @@ -82,16 +103,12 @@ (defconstant +min-hash-table-size+ 16) (defconstant +min-hash-table-rehash-threshold+ (float 1/16 1.0)) -;; as explained by pmai on openprojects #lisp IRC 2002-07-30: #x80000000 -;; is bigger than any possible nonEQ hash value, and thus indicates an -;; empty slot; and EQ hash tables don't use HASH-TABLE-HASH-VECTOR -(defconstant +magic-hash-vector-value+ #x80000000) (defun make-hash-table (&key (test 'eql) - (size +min-hash-table-size+) - (rehash-size 1.5) - (rehash-threshold 1) - (weak-p nil)) + (size +min-hash-table-size+) + (rehash-size 1.5) + (rehash-threshold 1) + (weakness nil)) #!+sb-doc "Create and return a new hash table. The keywords are as follows: :TEST -- Indicates what kind of test to use. @@ -105,13 +122,20 @@ forcing a rehash. Can be any positive number <=1, with density approaching zero as the threshold approaches 0. Density 1 means an average of one entry per bucket. - :WEAK-P -- (This is an extension from CMU CL, not currently supported - in SBCL 0.6.6, but perhaps supported in a future version.) If T, - don't keep entries if the key would otherwise be garbage." + :WEAKNESS -- IF NIL (the default) it is a normal non-weak hash table. + If one of :KEY, :VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE it is a weak + hash table. + Depending on the type of weakness the lack of references to the + key and the value may allow for removal of the entry. If WEAKNESS + is :KEY and the key would otherwise be garbage the entry is eligible + for removal from the hash table. Similarly, if WEAKNESS is :VALUE + the life of an entry depends on its value's references. If WEAKNESS + is :KEY-AND-VALUE and either the key or the value would otherwise be + garbage the entry can be removed. If WEAKNESS is :KEY-OR-VALUE and + both the key and the value would otherwise be garbage the entry can + be removed." (declare (type (or function symbol) test)) (declare (type unsigned-byte size)) - (when weak-p - (error "stub: unsupported WEAK-P option")) (multiple-value-bind (test test-fun hash-fun) (cond ((or (eq test #'eq) (eq test 'eq)) (values 'eq #'eq #'eq-hash)) @@ -126,8 +150,8 @@ ;; Failing that, I'd like to rename it to ;; *USER-HASH-TABLE-TESTS*. (dolist (info *hash-table-tests* - (error "unknown :TEST for MAKE-HASH-TABLE: ~S" - test)) + (error "unknown :TEST for MAKE-HASH-TABLE: ~S" + test)) (destructuring-bind (test-name test-fun hash-fun) info (when (or (eq test test-name) (eq test test-fun)) (return (values test-name test-fun hash-fun))))))) @@ -145,7 +169,7 @@ ;; boxing. (rehash-threshold (max +min-hash-table-rehash-threshold+ (float rehash-threshold 1.0))) - (size+1 (1+ size)) ; The first element is not usable. + (size+1 (1+ size)) ; The first element is not usable. ;; KLUDGE: The most natural way of expressing the below is ;; (round (/ (float size+1) rehash-threshold)), and indeed ;; it was expressed like that until 0.7.0. However, @@ -164,8 +188,9 @@ :element-type '(unsigned-byte #.sb!vm:n-word-bits) :initial-element 0)) - ;; needs to be the same length as the KV vector - ;; (FIXME: really? why doesn't the code agree?) + ;; Needs to be the half the length of the KV vector to link + ;; KV entries - mapped to indeces at 2i and 2i+1 - + ;; together. (next-vector (make-array size+1 :element-type '(unsigned-byte #.sb!vm:n-word-bits))) @@ -179,13 +204,16 @@ :rehash-threshold rehash-threshold :rehash-trigger size :table kv-vector - :weak-p weak-p + :weakness weakness :index-vector index-vector :next-vector next-vector - :hash-vector (unless (eq test 'eq) - (make-array size+1 - :element-type '(unsigned-byte #.sb!vm:n-word-bits) - :initial-element +magic-hash-vector-value+))))) + :hash-vector + (unless (eq test 'eq) + (make-array size+1 + :element-type '(unsigned-byte + #.sb!vm:n-word-bits) + :initial-element +magic-hash-vector-value+)) + :spinlock (sb!thread::make-spinlock)))) (declare (type index size+1 scaled-size length)) ;; Set up the free list, all free. These lists are 0 terminated. (do ((i 1 (1+ i))) @@ -224,9 +252,9 @@ "Return the test HASH-TABLE was created with.") #!+sb-doc -(setf (fdocumentation 'hash-table-weak-p 'function) - "Return T if HASH-TABLE will not keep entries for keys that would - otherwise be garbage, and NIL if it will.") +(setf (fdocumentation 'hash-table-weakness 'function) + "Return the WEAKNESS of HASH-TABLE which is one of NIL, :KEY, +:VALUE, :KEY-AND-VALUE, :KEY-OR-VALUE.") ;;;; accessing functions @@ -247,25 +275,32 @@ (the index (truncate (* rehash-size old-size))))))) (new-kv-vector (make-array (* 2 new-size) :initial-element +empty-ht-slot+)) - (new-next-vector (make-array new-size - :element-type '(unsigned-byte #.sb!vm:n-word-bits) - :initial-element 0)) - (new-hash-vector (when old-hash-vector - (make-array new-size - :element-type '(unsigned-byte #.sb!vm:n-word-bits) - :initial-element +magic-hash-vector-value+))) + (new-next-vector + (make-array new-size + :element-type '(unsigned-byte #.sb!vm:n-word-bits) + :initial-element 0)) + (new-hash-vector + (when old-hash-vector + (make-array new-size + :element-type '(unsigned-byte #.sb!vm:n-word-bits) + :initial-element +magic-hash-vector-value+))) (old-index-vector (hash-table-index-vector table)) (new-length (almost-primify (truncate (/ (float new-size) - (hash-table-rehash-threshold table))))) - (new-index-vector (make-array new-length - :element-type '(unsigned-byte #.sb!vm:n-word-bits) - :initial-element 0))) + (hash-table-rehash-threshold table))))) + (new-index-vector + (make-array new-length + :element-type '(unsigned-byte #.sb!vm:n-word-bits) + :initial-element 0))) (declare (type index new-size new-length old-size)) ;; Disable GC tricks on the OLD-KV-VECTOR. (set-header-data old-kv-vector sb!vm:vector-normal-subtype) + ;; Non-empty weak hash tables always need GC support. + (when (and (hash-table-weakness table) (plusp (hash-table-count table))) + (set-header-data new-kv-vector sb!vm:vector-valid-hashing-subtype)) + ;; FIXME: here and in several other places in the hash table code, ;; loops like this one are used when FILL or REPLACE would be ;; appropriate. why are standard CL functions not used? @@ -297,7 +332,8 @@ (hash-table-next-free-kv table)) (setf (hash-table-next-free-kv table) i)) ((and new-hash-vector - (not (= (aref new-hash-vector i) +magic-hash-vector-value+))) + (not (= (aref new-hash-vector i) + +magic-hash-vector-value+))) ;; Can use the existing hash value (not EQ based) (let* ((hashing (aref new-hash-vector i)) (index (rem hashing new-length)) @@ -325,11 +361,11 @@ (setf (hash-table-next-vector table) new-next-vector) (setf (hash-table-hash-vector table) new-hash-vector) ;; Shrink the old vectors to 0 size to help the conservative GC. - (shrink-vector old-kv-vector 0) - (shrink-vector old-index-vector 0) - (shrink-vector old-next-vector 0) + (%shrink-vector old-kv-vector 0) + (%shrink-vector old-index-vector 0) + (%shrink-vector old-next-vector 0) (when old-hash-vector - (shrink-vector old-hash-vector 0)) + (%shrink-vector old-hash-vector 0)) (setf (hash-table-rehash-trigger table) new-size)) (values)) @@ -344,9 +380,11 @@ (length (length index-vector))) (declare (type index size length)) - ;; Disable GC tricks, they will be re-enabled during the re-hash - ;; if necesary. - (set-header-data kv-vector sb!vm:vector-normal-subtype) + ;; Non-empty weak hash tables always need GC support. + (unless (and (hash-table-weakness table) (plusp (hash-table-count table))) + ;; Disable GC tricks, they will be re-enabled during the re-hash + ;; if necessary. + (set-header-data kv-vector sb!vm:vector-normal-subtype)) ;; Rehash all the entries. (setf (hash-table-next-free-kv table) 0) @@ -364,7 +402,8 @@ ;; Slot is empty, push it onto free list. (setf (aref next-vector i) (hash-table-next-free-kv table)) (setf (hash-table-next-free-kv table) i)) - ((and hash-vector (not (= (aref hash-vector i) +magic-hash-vector-value+))) + ((and hash-vector (not (= (aref hash-vector i) + +magic-hash-vector-value+))) ;; Can use the existing hash value (not EQ based) (let* ((hashing (aref hash-vector i)) (index (rem hashing length)) @@ -426,7 +465,7 @@ "Three argument version of GETHASH" (declare (type hash-table hash-table) (values t (member t nil))) - (without-gcing + (with-spinlock-and-without-gcing ((hash-table-spinlock hash-table)) (cond ((= (get-header-data (hash-table-table hash-table)) sb!vm:vector-must-rehash-subtype) (rehash-without-growing hash-table)) @@ -477,7 +516,7 @@ (defun %puthash (key hash-table value) (declare (type hash-table hash-table)) (aver (hash-table-index-vector hash-table)) - (without-gcing + (with-spinlock-and-without-gcing ((hash-table-spinlock hash-table)) ;; We need to rehash here so that a current key can be found if it ;; exists. Check that there is room for one more entry. May not be ;; needed if the key is already present. @@ -493,7 +532,8 @@ (kv-vector (hash-table-table hash-table))) ;; Check the cache - (if (and cache (< cache (length kv-vector)) (eq (aref kv-vector cache) key)) + (if (and cache (< cache (length kv-vector)) + (eq (aref kv-vector cache) key)) ;; If cached, just store here (setf (aref kv-vector (1+ cache)) value) @@ -510,10 +550,12 @@ (hash-vector (hash-table-hash-vector hash-table)) (test-fun (hash-table-test-fun hash-table))) (declare (type index index)) - + (when (hash-table-weakness hash-table) + (set-header-data kv-vector sb!vm:vector-valid-hashing-subtype)) (cond ((or eq-based (not hash-vector)) (when eq-based - (set-header-data kv-vector sb!vm:vector-valid-hashing-subtype)) + (set-header-data kv-vector + sb!vm:vector-valid-hashing-subtype)) ;; Search next-vector chain for a matching key. (do ((next next (aref next-vector next))) @@ -553,7 +595,8 @@ (when hash-vector (if (not eq-based) (setf (aref hash-vector free-kv-slot) hashing) - (aver (= (aref hash-vector free-kv-slot) +magic-hash-vector-value+)))) + (aver (= (aref hash-vector free-kv-slot) + +magic-hash-vector-value+)))) ;; Push this slot into the next chain. (setf (aref next-vector free-kv-slot) next) @@ -566,7 +609,7 @@ was such an entry, or NIL if not." (declare (type hash-table hash-table) (values (member t nil))) - (without-gcing + (with-spinlock-and-without-gcing ((hash-table-spinlock hash-table)) ;; We need to rehash here so that a current key can be found if it ;; exists. (cond ((= (get-header-data (hash-table-table hash-table)) @@ -603,7 +646,8 @@ (hash-table-next-free-kv hash-table)) (setf (hash-table-next-free-kv hash-table) slot-location) (when hash-vector - (setf (aref hash-vector slot-location) +magic-hash-vector-value+)) + (setf (aref hash-vector slot-location) + +magic-hash-vector-value+)) (decf (hash-table-number-entries hash-table)) t)) (cond ((zerop next) @@ -630,38 +674,40 @@ (declare (type index next)) (when (and (= hashing (aref hash-vector next)) (funcall test-fun key (aref table (* 2 next)))) - (return-from remhash (clear-slot next-vector prior next))))))))))) + (return-from remhash + (clear-slot next-vector prior next))))))))))) (defun clrhash (hash-table) #!+sb-doc "This removes all the entries from HASH-TABLE and returns the hash table itself." (declare (optimize speed)) - (let* ((kv-vector (hash-table-table hash-table)) - (next-vector (hash-table-next-vector hash-table)) - (hash-vector (hash-table-hash-vector hash-table)) - (size (length next-vector)) - (index-vector (hash-table-index-vector hash-table))) - ;; Disable GC tricks. - (set-header-data kv-vector sb!vm:vector-normal-subtype) - ;; Mark all slots as empty by setting all keys and values to magic - ;; tag. - (aver (eq (aref kv-vector 0) hash-table)) - (fill kv-vector +empty-ht-slot+ :start 2) - ;; Set up the free list, all free. - (do ((i 1 (1+ i))) - ((>= i (1- size))) - (setf (aref next-vector i) (1+ i))) - (setf (aref next-vector (1- size)) 0) - (setf (hash-table-next-free-kv hash-table) 1) - (setf (hash-table-needing-rehash hash-table) 0) - ;; Clear the index-vector. - (fill index-vector 0) - ;; Clear the hash-vector. - (when hash-vector - (fill hash-vector +magic-hash-vector-value+))) - (setf (hash-table-cache hash-table) nil) - (setf (hash-table-number-entries hash-table) 0) + (with-spinlock-and-without-gcing ((hash-table-spinlock hash-table)) + (let* ((kv-vector (hash-table-table hash-table)) + (next-vector (hash-table-next-vector hash-table)) + (hash-vector (hash-table-hash-vector hash-table)) + (size (length next-vector)) + (index-vector (hash-table-index-vector hash-table))) + ;; Disable GC tricks. + (set-header-data kv-vector sb!vm:vector-normal-subtype) + ;; Mark all slots as empty by setting all keys and values to magic + ;; tag. + (aver (eq (aref kv-vector 0) hash-table)) + (fill kv-vector +empty-ht-slot+ :start 2) + ;; Set up the free list, all free. + (do ((i 1 (1+ i))) + ((>= i (1- size))) + (setf (aref next-vector i) (1+ i))) + (setf (aref next-vector (1- size)) 0) + (setf (hash-table-next-free-kv hash-table) 1) + (setf (hash-table-needing-rehash hash-table) 0) + ;; Clear the index-vector. + (fill index-vector 0) + ;; Clear the hash-vector. + (when hash-vector + (fill hash-vector +magic-hash-vector-value+))) + (setf (hash-table-cache hash-table) nil) + (setf (hash-table-number-entries hash-table) 0)) hash-table) ;;;; MAPHASH @@ -686,8 +732,12 @@ (let* ((kv-vector (hash-table-table hash-table)) (key (aref kv-vector (* 2 i))) (value (aref kv-vector (1+ (* 2 i))))) - (unless (and (eq key +empty-ht-slot+) - (eq value +empty-ht-slot+)) + ;; We are running without locking or WITHOUT-GCING. For a weak + ;; :VALUE hash table it's possible that the GC hit after KEY + ;; was read and now the entry is gone. So check if either the + ;; key or the value is empty. + (unless (or (eq key +empty-ht-slot+) + (eq value +empty-ht-slot+)) (funcall fun key value)))))) ;;;; methods on HASH-TABLE @@ -695,16 +745,11 @@ ;;; Return a list of keyword args and values to use for MAKE-HASH-TABLE ;;; when reconstructing HASH-TABLE. (defun %hash-table-ctor-args (hash-table) - (when (hash-table-weak-p hash-table) - ;; FIXME: This might actually work with no trouble, but as of - ;; sbcl-0.6.12.10 when this code was written, weak hash tables - ;; weren't working yet, so I couldn't test it. When weak hash - ;; tables are supported again, this should be fixed. - (error "can't dump weak hash tables readably")) ; defensive programming.. `(:test ',(hash-table-test hash-table) :size ',(hash-table-size hash-table) :rehash-size ',(hash-table-rehash-size hash-table) - :rehash-threshold ',(hash-table-rehash-threshold hash-table))) + :rehash-threshold ',(hash-table-rehash-threshold hash-table) + :weakness ',(hash-table-weakness hash-table))) ;;; Return an association list representing the same data as HASH-TABLE. (defun %hash-table-alist (hash-table) @@ -725,14 +770,12 @@ (def!method print-object ((hash-table hash-table) stream) (declare (type stream stream)) - (cond ((not *print-readably*) + (cond ((or (not *print-readably*) (not *read-eval*)) (print-unreadable-object (hash-table stream :type t :identity t) (format stream ":TEST ~S :COUNT ~S" (hash-table-test hash-table) (hash-table-count hash-table)))) - ((not *read-eval*) - (error "can't print hash tables readably without *READ-EVAL*")) (t (with-standard-io-syntax (format stream