X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-sxhash.lisp;h=0c4f16ff2bda1b855e40c2a98ecd7e231aa76390;hb=2d3cb6dba6461e98744eca2a1df4f770cea468ca;hp=318e21b1ae8b7f1fab6b33f754c25c9a833524f2;hpb=95a6db7329b91dd90d165dd4057b9b5098d34aa2;p=sbcl.git diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp index 318e21b..0c4f16f 100644 --- a/src/code/target-sxhash.lisp +++ b/src/code/target-sxhash.lisp @@ -26,9 +26,9 @@ ;;; desiderata: ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the ;;; same value as #(5 1), and ending up in real trouble in some -;;; special cases like bit vectors the way that CMUCL SXHASH 18b +;;; special cases like bit vectors the way that CMUCL 18b SXHASH ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..) -;;; * We'd like to scatter our hash values the entire possible range +;;; * We'd like to scatter our hash values over the entire possible range ;;; of values instead of hashing small or common key values (like ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b ;;; SXHASH function does, again helping to avoid pathologies like @@ -99,7 +99,7 @@ ;;; (unless (string= (gethash hash ht) string) ;;; (format t "collision: ~S ~S~%" string (gethash hash ht))) ;;; (setf (gethash hash ht) string)))) -;;; (format t "final count=~D~%" (hash-table-count ht))) +;;; (format t "final count=~W~%" (hash-table-count ht))) (defun %sxhash-simple-string (x) (declare (optimize speed)) @@ -115,9 +115,12 @@ ;;;; the SXHASH function (defun sxhash (x) + ;; profiling SXHASH is hard, but we might as well try to make it go + ;; fast, in case it is the bottleneck somwhere. -- CSR, 2003-03-14 + (declare (optimize speed)) (labels ((sxhash-number (x) (etypecase x - (fixnum (sxhash x)) ; through DEFTRANSFORM + (fixnum (sxhash x)) ; through DEFTRANSFORM (integer (sb!bignum:sxhash-bignum x)) (single-float (sxhash x)) ; through DEFTRANSFORM (double-float (sxhash x)) ; through DEFTRANSFORM @@ -135,7 +138,7 @@ (sxhash-recurse (x &optional (depthoid +max-hash-depthoid+)) (declare (type index depthoid)) (typecase x - (list + (cons (if (plusp depthoid) (mix (sxhash-recurse (car x) (1- depthoid)) (sxhash-recurse (cdr x) (1- depthoid))) @@ -144,23 +147,30 @@ (if (typep x 'structure-object) (logxor 422371266 (sxhash ; through DEFTRANSFORM - (class-name (layout-class (%instance-layout x))))) - 309518995)) + (classoid-name + (layout-classoid (%instance-layout x))))) + (sxhash-instance x))) (symbol (sxhash x)) ; through DEFTRANSFORM - (number (sxhash-number x)) (array (typecase x (simple-string (sxhash x)) ; through DEFTRANSFORM (string (%sxhash-substring x)) - (bit-vector (let ((result 410823708)) - (declare (type fixnum result)) - (dotimes (i (min depthoid (length x))) - (mixf result (aref x i))) - result)) + (simple-bit-vector (sxhash x)) ; through DEFTRANSFORM + (bit-vector + ;; FIXME: It must surely be possible to do better + ;; than this. The problem is that a non-SIMPLE + ;; BIT-VECTOR could be displaced to another, with a + ;; non-zero offset -- so that significantly more + ;; work needs to be done using the %RAW-BITS + ;; approach. This will probably do for now. + (sxhash-recurse (copy-seq x) depthoid)) (t (logxor 191020317 (sxhash (array-rank x)))))) (character (logxor 72185131 (sxhash (char-code x)))) ; through DEFTRANSFORM + ;; general, inefficient case of NUMBER + (number (sxhash-number x)) + (generic-function (sxhash-instance x)) (t 42)))) (sxhash-recurse x))) @@ -185,7 +195,7 @@ (array (array-psxhash key depthoid)) (hash-table (hash-table-psxhash key)) (structure-object (structure-object-psxhash key depthoid)) - (list (list-psxhash key depthoid)) + (cons (list-psxhash key depthoid)) (number (number-psxhash key)) (character (sxhash (char-upcase key))) (t (sxhash key)))) @@ -238,8 +248,8 @@ (declare (type (integer 0 #.+max-hash-depthoid+) depthoid)) (let* ((layout (%instance-layout key)) ; i.e. slot #0 (length (layout-length layout)) - (class (layout-class layout)) - (name (class-name class)) + (classoid (layout-classoid layout)) + (name (classoid-name classoid)) (result (mix (sxhash name) (the fixnum 79867)))) (declare (type fixnum result)) (dotimes (i (min depthoid (1- length))) @@ -319,88 +329,3 @@ (mixf result (number-psxhash (realpart key))) (mixf result (number-psxhash (imagpart key))) result)))))) - -;;; SXHASH and PSXHASH should distribute hash values well over the -;;; space of possible values, so that collisions between the hash values -;;; of unequal objects should be very uncommon. -;;; -;;; FIXME: These tests should be enabled once the rest of the system is -;;; stable. (For now, I don't want to mess with things like making sure -;;; that bignums are hashed uniquely.) -;;;#!+sb-test -#+nil -(let* ((test-cases `((0 . 1) - (0 . 1) - (1 . 0) - ((1 . 0) (0 . 0)) - ((0 . 1) (0 . 0)) - ((0 . 0) (1 . 0)) - ((0 . 0) (0 . 1)) - #((1 . 0) (0 . 0)) - #((0 . 1) (0 . 0)) - #((0 . 0) (1 . 0)) - #((0 . 0) (0 . 1)) - #((1 . 0) (0 . 0)) - #((0 1) (0 0)) - #((0 0) (1 0)) - #((0 0) (0 1)) - #(#(1 0) (0 0)) - #(#(0 1) (0 0)) - #(#(0 0) (1 0)) - #(#(0 0) (0 1)) - #(#*00 #*10) - #(#(0 0) (0 1.0d0)) - #(#(-0.0d0 0) (1.0 0)) - ;; KLUDGE: Some multi-dimensional array test cases would - ;; be good here too, but currently SBCL isn't smart enough - ;; to dump them as literals, and I'm too lazy to make - ;; code to create them at run time. -- WHN 20000111 - 44 44.0 44.0d0 - 44 44.0 44.0d0 - -44 -44.0 -44.0d0 - 0 0.0 0.0d0 - -0 -0.0 -0.0d0 - -121 -121.0 -121.0d0 - 3/4 0.75 0.75d0 - -3/4 -0.75 -0.75d0 - 44.1 44.1d0 - 45 45.0 45.0d0 - ,(expt 2 33) ,(expt 2.0 33) ,(expt 2.0d0 33) - ,(- (expt 1/2 50)) ,(- (expt 0.5 50)) ,(- (expt 0.5d0 50)) - ,(- (expt 1/2 50)) ,(- (expt 0.5 50)) ,(- (expt 0.5d0 50)) - #c(1.0 2.0) #c(1 2.0) #c(1.5 -3/2) #c(3/2 -3/2) #c(0 1) - #c(1.0 2.0) #c(1 2.0) #c(1.5 -3/2) #c(3/2 -3/2) #c(0 1) - ,(make-hash-table) - ,(make-hash-table :test 'equal) - "abc" "ABC" "aBc" 'abc #(#\a #\b #\c) #(a b c) #("A" b c) - "abcc" - "" #* #() () (()) #(()) (#()) - "" #* #() () (()) #(()) (#()) - #\x #\X #\* - #\x #\X #\*))) - (dolist (i test-cases) - (unless (typep (sxhash i) '(and fixnum unsigned-byte)) - (error "bad SXHASH behavior for ~S" i)) - (unless (typep (psxhash i) '(and fixnum unsigned-byte)) - (error "bad PSXHASH behavior for ~S" i)) - (dolist (j test-cases) - (flet ((t->boolean (x) (if x t nil))) - ;; Note: It's possible that a change to the hashing algorithm could - ;; leave it correct but still cause this test to bomb by causing an - ;; unlucky random collision. That's not very likely (since there are - ;; (EXPT 2 29) possible hash values and only on the order of 100 test - ;; cases, but it's probably worth checking if you are getting a - ;; mystifying error from this test. - (unless (eq (t->boolean (equal i j)) - (t->boolean (= (sxhash i) (sxhash j)))) - (error "bad SXHASH behavior for ~S ~S" i j)) - (unless (eq (t->boolean (equalp i j)) - (t->boolean (= (psxhash i) (psxhash j)))) - (error "bad PSXHASH behavior for ~S ~S" i j)))))) - -;;; FIXME: Test that the the hash functions can deal with common cases without -;;; consing. -;(defun consless-test () -; (dotimes (j 100000) -; (dolist (i '("yo" #(1 2 3) #2A((1 2) (1 2)) (1 2 (3)) 1 1.0 1.0d0)) -; (psxhash i))))