X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-hash-table.lisp;h=c28e640a305ed8220cf8665f39da84c1875babe1;hb=716c33a5b0ee4c745b48c2bf7635e83815b168f9;hp=844729c3a935a235a18b1ffd1e742e206dd8901d;hpb=60011b86627fa68eeacffd49c49826e474c7fd82;p=sbcl.git diff --git a/src/code/target-hash-table.lisp b/src/code/target-hash-table.lisp index 844729c..c28e640 100644 --- a/src/code/target-hash-table.lisp +++ b/src/code/target-hash-table.lisp @@ -15,7 +15,7 @@ ;;;; utilities (eval-when (:compile-toplevel :load-toplevel :execute) - (defconstant max-hash most-positive-fixnum)) + (defconstant max-hash sb!xc:most-positive-fixnum)) (deftype hash () `(integer 0 ,max-hash)) @@ -81,6 +81,11 @@ ;;;; construction and simple accessors (defconstant +min-hash-table-size+ 16) +(defconstant +min-hash-table-rehash-threshold+ (float 1/16 1.0)) +;; as explained by pmai on openprojects #lisp IRC 2002-07-30: #x80000000 +;; is bigger than any possible nonEQ hash value, and thus indicates an +;; empty slot; and EQ hash tables don't use HASH-TABLE-HASH-VECTOR +(defconstant +magic-hash-vector-value+ #x80000000) (defun make-hash-table (&key (test 'eql) (size +min-hash-table-size+) @@ -130,23 +135,36 @@ (min size ;; SIZE is just a hint, so if the user asks ;; for a SIZE which'd be too big for us to - ;; easily implement, we bump it down. - (floor array-dimension-limit 16)))) + ;; easily implement, we bump it down. + (floor array-dimension-limit 1024)))) (rehash-size (if (integerp rehash-size) rehash-size (float rehash-size 1.0))) ;; FIXME: Original REHASH-THRESHOLD default should be 1.0, ;; not 1, to make it easier for the compiler to avoid ;; boxing. - (rehash-threshold (float rehash-threshold 1.0)) + (rehash-threshold (max +min-hash-table-rehash-threshold+ + (float rehash-threshold 1.0))) (size+1 (1+ size)) ; The first element is not usable. - (scaled-size (round (/ (float size+1) rehash-threshold))) + ;; KLUDGE: The most natural way of expressing the below is + ;; (round (/ (float size+1) rehash-threshold)), and indeed + ;; it was expressed like that until 0.7.0. However, + ;; MAKE-HASH-TABLE is called very early in cold-init, and + ;; the SPARC has no primitive instructions for rounding, + ;; but only for truncating; therefore, we fudge this issue + ;; a little. The other uses of truncate, below, similarly + ;; used to be round. -- CSR, 2002-10-01 + ;; + ;; Note that this has not yet been audited for + ;; correctness. It just seems to work. -- CSR, 2002-11-02 + (scaled-size (truncate (/ (float size+1) rehash-threshold))) (length (almost-primify (max scaled-size (1+ +min-hash-table-size+)))) (index-vector (make-array length :element-type '(unsigned-byte 32) :initial-element 0)) ;; needs to be the same length as the KV vector + ;; (FIXME: really? why doesn't the code agree?) (next-vector (make-array size+1 :element-type '(unsigned-byte 32))) (kv-vector (make-array (* 2 size+1) @@ -165,7 +183,7 @@ :hash-vector (unless (eq test 'eq) (make-array size+1 :element-type '(unsigned-byte 32) - :initial-element #x80000000))))) + :initial-element +magic-hash-vector-value+))))) (declare (type index size+1 scaled-size length)) ;; Set up the free list, all free. These lists are 0 terminated. (do ((i 1 (1+ i))) @@ -179,7 +197,7 @@ (defun hash-table-count (hash-table) #!+sb-doc - "Returns the number of entries in the given HASH-TABLE." + "Return the number of entries in the given HASH-TABLE." (declare (type hash-table hash-table) (values index)) (hash-table-number-entries hash-table)) @@ -224,7 +242,7 @@ (fixnum (+ rehash-size old-size)) (float - (the index (round (* rehash-size old-size))))))) + (the index (truncate (* rehash-size old-size))))))) (new-kv-vector (make-array (* 2 new-size) :initial-element +empty-ht-slot+)) (new-next-vector (make-array new-size @@ -233,10 +251,10 @@ (new-hash-vector (when old-hash-vector (make-array new-size :element-type '(unsigned-byte 32) - :initial-element #x80000000))) + :initial-element +magic-hash-vector-value+))) (old-index-vector (hash-table-index-vector table)) (new-length (almost-primify - (round (/ (float new-size) + (truncate (/ (float new-size) (hash-table-rehash-threshold table))))) (new-index-vector (make-array new-length :element-type '(unsigned-byte 32) @@ -246,6 +264,11 @@ ;; Disable GC tricks on the OLD-KV-VECTOR. (set-header-data old-kv-vector sb!vm:vector-normal-subtype) + ;; FIXME: here and in several other places in the hash table code, + ;; loops like this one are used when FILL or REPLACE would be + ;; appropriate. why are standard CL functions not used? + ;; Performance issues? General laziness? -- NJF, 2004-03-10 + ;; Copy over the kv-vector. The element positions should not move ;; in case there are active scans. (dotimes (i (* old-size 2)) @@ -272,7 +295,7 @@ (hash-table-next-free-kv table)) (setf (hash-table-next-free-kv table) i)) ((and new-hash-vector - (not (= (aref new-hash-vector i) #x80000000))) + (not (= (aref new-hash-vector i) +magic-hash-vector-value+))) ;; Can use the existing hash value (not EQ based) (let* ((hashing (aref new-hash-vector i)) (index (rem hashing new-length)) @@ -317,8 +340,7 @@ (size (length next-vector)) (index-vector (hash-table-index-vector table)) (length (length index-vector))) - (declare (type index size length) - (type (simple-array (unsigned-byte 32) (*)))) + (declare (type index size length)) ;; Disable GC tricks, they will be re-enabled during the re-hash ;; if necesary. @@ -340,7 +362,7 @@ ;; Slot is empty, push it onto free list. (setf (aref next-vector i) (hash-table-next-free-kv table)) (setf (hash-table-next-free-kv table) i)) - ((and hash-vector (not (= (aref hash-vector i) #x80000000))) + ((and hash-vector (not (= (aref hash-vector i) +magic-hash-vector-value+))) ;; Can use the existing hash value (not EQ based) (let* ((hashing (aref hash-vector i)) (index (rem hashing length)) @@ -429,7 +451,7 @@ (defun %puthash (key hash-table value) (declare (type hash-table hash-table)) - (assert (hash-table-index-vector hash-table)) + (aver (hash-table-index-vector hash-table)) (without-gcing ;; We need to rehash here so that a current key can be found if it ;; exists. Check that there is room for one more entry. May not be @@ -483,7 +505,7 @@ ;; Pop a KV slot off the free list (let ((free-kv-slot (hash-table-next-free-kv hash-table))) ;; Double-check for overflow. - (assert (not (zerop free-kv-slot))) + (aver (not (zerop free-kv-slot))) (setf (hash-table-next-free-kv hash-table) (aref next-vector free-kv-slot)) (incf (hash-table-number-entries hash-table)) @@ -495,7 +517,7 @@ (when hash-vector (if (not eq-based) (setf (aref hash-vector free-kv-slot) hashing) - (assert (= (aref hash-vector free-kv-slot) #x80000000)))) + (aver (= (aref hash-vector free-kv-slot) +magic-hash-vector-value+)))) ;; Push this slot into the next chain. (setf (aref next-vector free-kv-slot) next) @@ -504,8 +526,8 @@ (defun remhash (key hash-table) #!+sb-doc - "Remove the entry in HASH-TABLE associated with KEY. Returns T if there - was such an entry, and NIL if not." + "Remove the entry in HASH-TABLE associated with KEY. Return T if there + was such an entry, or NIL if not." (declare (type hash-table hash-table) (values (member t nil))) (without-gcing @@ -530,74 +552,46 @@ (hash-vector (hash-table-hash-vector hash-table)) (test-fun (hash-table-test-fun hash-table))) (declare (type index index next)) - (cond ((zerop next) - nil) - ((if (or eq-based (not hash-vector)) - (eq key (aref table (* 2 next))) - (and (= hashing (aref hash-vector next)) - (funcall test-fun key (aref table (* 2 next))))) - - ;; FIXME: Substantially the same block of code seems to - ;; appear in all three cases. (In the first case, it - ;; appear bare; in the other two cases, it's wrapped in - ;; DO.) It should be defined in a separate (possibly - ;; inline) DEFUN or FLET. - - ;; Mark slot as empty. - (setf (aref table (* 2 next)) +empty-ht-slot+ - (aref table (1+ (* 2 next))) +empty-ht-slot+) - ;; Update the index-vector pointer. - (setf (aref index-vector index) (aref next-vector next)) - ;; Push KV slot onto free chain. - (setf (aref next-vector next) - (hash-table-next-free-kv hash-table)) - (setf (hash-table-next-free-kv hash-table) next) - (when hash-vector - (setf (aref hash-vector next) #x80000000)) - (decf (hash-table-number-entries hash-table)) - t) - ;; Search next-vector chain for a matching key. - ((or eq-based (not hash-vector)) - ;; EQ based - (do ((prior next next) - (next (aref next-vector next) (aref next-vector next))) - ((zerop next) nil) - (declare (type index next)) - (when (eq key (aref table (* 2 next))) - ;; Mark slot as empty. - (setf (aref table (* 2 next)) +empty-ht-slot+ - (aref table (1+ (* 2 next))) +empty-ht-slot+) - ;; Update the prior pointer in the chain to skip this. - (setf (aref next-vector prior) (aref next-vector next)) - ;; Push KV slot onto free chain. - (setf (aref next-vector next) - (hash-table-next-free-kv hash-table)) - (setf (hash-table-next-free-kv hash-table) next) - (when hash-vector - (setf (aref hash-vector next) #x80000000)) - (decf (hash-table-number-entries hash-table)) - (return t)))) - (t - ;; not EQ based - (do ((prior next next) - (next (aref next-vector next) (aref next-vector next))) - ((zerop next) nil) - (declare (type index next)) - (when (and (= hashing (aref hash-vector next)) - (funcall test-fun key (aref table (* 2 next)))) - ;; Mark slot as empty. - (setf (aref table (* 2 next)) +empty-ht-slot+) - (setf (aref table (1+ (* 2 next))) +empty-ht-slot+) - ;; Update the prior pointer in the chain to skip this. - (setf (aref next-vector prior) (aref next-vector next)) - ;; Push KV slot onto free chain. - (setf (aref next-vector next) - (hash-table-next-free-kv hash-table)) - (setf (hash-table-next-free-kv hash-table) next) - (when hash-vector - (setf (aref hash-vector next) #x80000000)) - (decf (hash-table-number-entries hash-table)) - (return t))))))))) + (flet ((clear-slot (chain-vector prior-slot-location slot-location) + ;; Mark slot as empty. + (setf (aref table (* 2 slot-location)) +empty-ht-slot+ + (aref table (1+ (* 2 slot-location))) +empty-ht-slot+) + ;; Update the prior pointer in the chain to skip this. + (setf (aref chain-vector prior-slot-location) + (aref next-vector slot-location)) + ;; Push KV slot onto free chain. + (setf (aref next-vector slot-location) + (hash-table-next-free-kv hash-table)) + (setf (hash-table-next-free-kv hash-table) slot-location) + (when hash-vector + (setf (aref hash-vector slot-location) +magic-hash-vector-value+)) + (decf (hash-table-number-entries hash-table)) + t)) + (cond ((zerop next) + nil) + ((if (or eq-based (not hash-vector)) + (eq key (aref table (* 2 next))) + (and (= hashing (aref hash-vector next)) + (funcall test-fun key (aref table (* 2 next))))) + (clear-slot index-vector index next)) + ;; Search next-vector chain for a matching key. + ((or eq-based (not hash-vector)) + ;; EQ based + (do ((prior next next) + (next (aref next-vector next) (aref next-vector next))) + ((zerop next) nil) + (declare (type index next)) + (when (eq key (aref table (* 2 next))) + (return-from remhash (clear-slot next-vector prior next))))) + (t + ;; not EQ based + (do ((prior next next) + (next (aref next-vector next) (aref next-vector next))) + ((zerop next) nil) + (declare (type index next)) + (when (and (= hashing (aref hash-vector next)) + (funcall test-fun key (aref table (* 2 next)))) + (return-from remhash (clear-slot next-vector prior next))))))))))) (defun clrhash (hash-table) #!+sb-doc @@ -617,7 +611,7 @@ (do ((i 2 (1+ i))) ((>= i kv-length)) (setf (aref kv-vector i) +empty-ht-slot+)) - (assert (eq (aref kv-vector 0) hash-table)) + (aver (eq (aref kv-vector 0) hash-table)) ;; Set up the free list, all free. (do ((i 1 (1+ i))) ((>= i (1- size))) @@ -631,7 +625,7 @@ ;; Clear the hash-vector. (when hash-vector (dotimes (i size) - (setf (aref hash-vector i) #x80000000)))) + (setf (aref hash-vector i) +magic-hash-vector-value+)))) (setf (hash-table-number-entries hash-table) 0) hash-table) @@ -642,15 +636,13 @@ ;;; not only parts of the system which are defined after DEFUN MAPHASH. ;;; 2. It could be conditional on compilation policy, so that ;;; it could be compiled as a full call instead of an inline -;;; expansion when SPACE>SPEED. (Not only would this save space, -;;; it might actually be faster when a call is made from byte-compiled -;;; code.) +;;; expansion when SPACE>SPEED. (declaim (inline maphash)) (defun maphash (function-designator hash-table) #!+sb-doc - "For each entry in HASH-TABLE, call the designated function on the key - and value of the entry. Return NIL." - (let ((fun (%coerce-callable-to-function function-designator)) + "For each entry in HASH-TABLE, call the designated two-argument function + on the key and value of the entry. Return NIL." + (let ((fun (%coerce-callable-to-fun function-designator)) (size (length (hash-table-next-vector hash-table)))) (declare (type function fun)) (do ((i 1 (1+ i))) @@ -665,33 +657,56 @@ ;;;; methods on HASH-TABLE -(def!method print-object ((ht hash-table) stream) +;;; Return a list of keyword args and values to use for MAKE-HASH-TABLE +;;; when reconstructing HASH-TABLE. +(defun %hash-table-ctor-args (hash-table) + (when (hash-table-weak-p hash-table) + ;; FIXME: This might actually work with no trouble, but as of + ;; sbcl-0.6.12.10 when this code was written, weak hash tables + ;; weren't working yet, so I couldn't test it. When weak hash + ;; tables are supported again, this should be fixed. + (error "can't dump weak hash tables readably")) ; defensive programming.. + `(:test ',(hash-table-test hash-table) + :size ',(hash-table-size hash-table) + :rehash-size ',(hash-table-rehash-size hash-table) + :rehash-threshold ',(hash-table-rehash-threshold hash-table))) + +;;; Return an association list representing the same data as HASH-TABLE. +(defun %hash-table-alist (hash-table) + (let ((result nil)) + (maphash (lambda (key value) + (push (cons key value) result)) + hash-table) + result)) + +;;; Stuff an association list into HASH-TABLE. Return the hash table, +;;; so that we can use this for the *PRINT-READABLY* case in +;;; PRINT-OBJECT (HASH-TABLE T) without having to worry about LET +;;; forms and readable gensyms and stuff. +(defun %stuff-hash-table (hash-table alist) + (dolist (x alist) + (setf (gethash (car x) hash-table) (cdr x))) + hash-table) + +(def!method print-object ((hash-table hash-table) stream) (declare (type stream stream)) - (print-unreadable-object (ht stream :type t :identity t) - (format stream - ":TEST ~S :COUNT ~D" - (hash-table-test ht) - (hash-table-number-entries ht)))) + (cond ((not *print-readably*) + (print-unreadable-object (hash-table stream :type t :identity t) + (format stream + ":TEST ~S :COUNT ~S" + (hash-table-test hash-table) + (hash-table-count hash-table)))) + ((not *read-eval*) + (error "can't print hash tables readably without *READ-EVAL*")) + (t + (with-standard-io-syntax + (format stream + "#.~W" + `(%stuff-hash-table (make-hash-table ,@(%hash-table-ctor-args + hash-table)) + ',(%hash-table-alist hash-table))))))) (def!method make-load-form ((hash-table hash-table) &optional environment) - (declare (ignorable environment)) - (values - `(make-hash-table - :test ',(hash-table-test hash-table) - :size ',(hash-table-size hash-table) - :rehash-size ',(hash-table-rehash-size hash-table) - :rehash-threshold ',(hash-table-rehash-threshold hash-table)) - (let ((alist nil)) - (maphash (lambda (key value) - (push (cons key value) alist)) - hash-table) - (if alist - ;; FIXME: It'd probably be more efficient here to write the - ;; hash table values as a SIMPLE-VECTOR rather than an alist. - ;; (Someone dumping a huge hash table might well thank us..) - `(stuff-hash-table ,hash-table ',alist) - nil)))) - -(defun stuff-hash-table (table alist) - (dolist (x alist) - (setf (gethash (car x) table) (cdr x)))) + (declare (ignore environment)) + (values `(make-hash-table ,@(%hash-table-ctor-args hash-table)) + `(%stuff-hash-table ,hash-table ',(%hash-table-alist hash-table))))