X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-sxhash.lisp;h=38422a105c00dfdd61d212faa698565b51faf52c;hb=59ac7389b0bead82dfe2c94a5edab79dc9569c61;hp=7f6f7b309547c05cbb0942423bd0e8737bafe2db;hpb=de201aeb12169d0bd377eca4da6116c2797a66ad;p=sbcl.git diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp index 7f6f7b3..38422a1 100644 --- a/src/code/target-sxhash.lisp +++ b/src/code/target-sxhash.lisp @@ -26,9 +26,9 @@ ;;; desiderata: ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the ;;; same value as #(5 1), and ending up in real trouble in some -;;; special cases like bit vectors the way that CMUCL SXHASH 18b +;;; special cases like bit vectors the way that CMUCL 18b SXHASH ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..) -;;; * We'd like to scatter our hash values the entire possible range +;;; * We'd like to scatter our hash values over the entire possible range ;;; of values instead of hashing small or common key values (like ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b ;;; SXHASH function does, again helping to avoid pathologies like @@ -36,9 +36,10 @@ ;;; * We'd like this to be simple and fast, too. ;;; ;;; FIXME: Should this be INLINE? -(declaim (ftype (function ((and fixnum unsigned-byte) - (and fixnum unsigned-byte)) - (and fixnum unsigned-byte)) mix)) +(declaim (ftype (sfunction ((and fixnum unsigned-byte) + (and fixnum unsigned-byte)) + (and fixnum unsigned-byte)) + mix)) (defun mix (x y) ;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler ;; were smarter about optimizing ASH. (Without the THE FIXNUM below, @@ -63,33 +64,53 @@ ;; algorithms, but we're not pushing them hard enough here for them ;; to be cryptographically strong.) (let* ((xy (+ (* x 3) y))) - (declare (type (unsigned-byte 32) xy)) - (the (and fixnum unsigned-byte) - (logand most-positive-fixnum - (logxor 441516657 - xy - (the fixnum (ash xy -5))))))) + (logand most-positive-fixnum + (logxor 441516657 + xy + (ash xy -5))))) ;;;; hashing strings ;;;; -;;;; Note that this operation is used in compiler symbol table lookups, so we'd -;;;; like it to be fast. +;;;; Note that this operation is used in compiler symbol table +;;;; lookups, so we'd like it to be fast. +;;;; +;;;; As of 2004-03-10, we implement the one-at-a-time algorithm +;;;; designed by Bob Jenkins (see +;;;; for some more +;;;; information). #!-sb-fluid (declaim (inline %sxhash-substring)) (defun %sxhash-substring (string &optional (count (length string))) ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the - ;; cross-compiler were smarter about ASH, but we need it for sbcl-0.5.0m. + ;; cross-compiler were smarter about ASH, but we need it for + ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0 + ;; to elide some type checks, but then again if this is inlined in + ;; all the critical places, we might not -- CSR, 2004-03-10) (declare (optimize (speed 3) (safety 0))) (declare (type string string)) (declare (type index count)) - (let ((result 408967240)) - (declare (type fixnum result)) - (dotimes (i count) - (declare (type index i)) - (mixf result - (the fixnum - (ash (char-code (aref string i)) 5)))) - result)) + (let ((result 0)) + (declare (type (unsigned-byte 32) result)) + (unless (typep string '(vector nil)) + (dotimes (i count) + (declare (type index i)) + (setf result + (ldb (byte 32 0) + (+ result (char-code (aref string i))))) + (setf result + (ldb (byte 32 0) + (+ result (ash result 10)))) + (setf result + (logxor result (ash result -6))))) + (setf result + (ldb (byte 32 0) + (+ result (ash result 3)))) + (setf result + (logxor result (ash result -11))) + (setf result + (ldb (byte 32 0) + (logxor result (ash result 15)))) + (logand result most-positive-fixnum))) ;;; test: ;;; (let ((ht (make-hash-table :test 'equal))) ;;; (do-all-symbols (symbol) @@ -99,25 +120,44 @@ ;;; (unless (string= (gethash hash ht) string) ;;; (format t "collision: ~S ~S~%" string (gethash hash ht))) ;;; (setf (gethash hash ht) string)))) -;;; (format t "final count=~D~%" (hash-table-count ht))) +;;; (format t "final count=~W~%" (hash-table-count ht))) (defun %sxhash-simple-string (x) (declare (optimize speed)) (declare (type simple-string x)) - (%sxhash-substring x)) + ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence + ;; of let conversion in the cross compiler, which otherwise causes + ;; strongly suboptimal register allocation. + (flet ((trick (x) + (%sxhash-substring x))) + (declare (notinline trick)) + (trick x))) (defun %sxhash-simple-substring (x count) (declare (optimize speed)) (declare (type simple-string x)) (declare (type index count)) - (%sxhash-substring x count)) + ;; see comment in %SXHASH-SIMPLE-STRING + (flet ((trick (x count) + (%sxhash-substring x count))) + (declare (notinline trick)) + (trick x count))) ;;;; the SXHASH function +;; simple cases +(declaim (ftype (sfunction (integer) (integer 0 #.sb!xc:most-positive-fixnum)) + sxhash-bignum)) +(declaim (ftype (sfunction (t) (integer 0 #.sb!xc:most-positive-fixnum)) + sxhash-instance)) + (defun sxhash (x) + ;; profiling SXHASH is hard, but we might as well try to make it go + ;; fast, in case it is the bottleneck somwhere. -- CSR, 2003-03-14 + (declare (optimize speed)) (labels ((sxhash-number (x) (etypecase x - (fixnum (sxhash x)) ; through DEFTRANSFORM + (fixnum (sxhash x)) ; through DEFTRANSFORM (integer (sb!bignum:sxhash-bignum x)) (single-float (sxhash x)) ; through DEFTRANSFORM (double-float (sxhash x)) ; through DEFTRANSFORM @@ -135,32 +175,47 @@ (sxhash-recurse (x &optional (depthoid +max-hash-depthoid+)) (declare (type index depthoid)) (typecase x + ;; we test for LIST here, rather than CONS, because the + ;; type test for CONS is in fact the test for + ;; LIST-POINTER-LOWTAG followed by a negated test for + ;; NIL. If we're going to have to test for NIL anyway, + ;; we might as well do it explicitly and pick off the + ;; answer. -- CSR, 2004-07-14 (list - (if (plusp depthoid) - (mix (sxhash-recurse (car x) (1- depthoid)) - (sxhash-recurse (cdr x) (1- depthoid))) - 261835505)) + (if (null x) + (sxhash x) ; through DEFTRANSFORM + (if (plusp depthoid) + (mix (sxhash-recurse (car x) (1- depthoid)) + (sxhash-recurse (cdr x) (1- depthoid))) + 261835505))) (instance - (if (typep x 'structure-object) + (if (or (typep x 'structure-object) (typep x 'condition)) (logxor 422371266 (sxhash ; through DEFTRANSFORM - (class-name (layout-class (%instance-layout x))))) - 309518995)) + (classoid-name + (layout-classoid (%instance-layout x))))) + (sxhash-instance x))) (symbol (sxhash x)) ; through DEFTRANSFORM - (number (sxhash-number x)) (array (typecase x (simple-string (sxhash x)) ; through DEFTRANSFORM (string (%sxhash-substring x)) - (bit-vector (let ((result 410823708)) - (declare (type fixnum result)) - (dotimes (i (min depthoid (length x))) - (mixf result (aref x i))) - result)) + (simple-bit-vector (sxhash x)) ; through DEFTRANSFORM + (bit-vector + ;; FIXME: It must surely be possible to do better + ;; than this. The problem is that a non-SIMPLE + ;; BIT-VECTOR could be displaced to another, with a + ;; non-zero offset -- so that significantly more + ;; work needs to be done using the %RAW-BITS + ;; approach. This will probably do for now. + (sxhash-recurse (copy-seq x) depthoid)) (t (logxor 191020317 (sxhash (array-rank x)))))) (character (logxor 72185131 (sxhash (char-code x)))) ; through DEFTRANSFORM + ;; general, inefficient case of NUMBER + (number (sxhash-number x)) + (generic-function (sxhash-instance x)) (t 42)))) (sxhash-recurse x))) @@ -185,7 +240,7 @@ (array (array-psxhash key depthoid)) (hash-table (hash-table-psxhash key)) (structure-object (structure-object-psxhash key depthoid)) - (list (list-psxhash key depthoid)) + (cons (list-psxhash key depthoid)) (number (number-psxhash key)) (character (sxhash (char-upcase key))) (t (sxhash key)))) @@ -238,8 +293,8 @@ (declare (type (integer 0 #.+max-hash-depthoid+) depthoid)) (let* ((layout (%instance-layout key)) ; i.e. slot #0 (length (layout-length layout)) - (class (layout-class layout)) - (name (class-name class)) + (classoid (layout-classoid layout)) + (name (classoid-name classoid)) (result (mix (sxhash name) (the fixnum 79867)))) (declare (type fixnum result)) (dotimes (i (min depthoid (1- length))) @@ -304,7 +359,7 @@ (etypecase key (single-float (frob single-float)) (double-float (frob double-float)) - (short-float (frob short-float)) + #!+long-float (long-float (error "LONG-FLOAT not currently supported"))))) (rational (if (and (<= most-negative-double-float key