X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcode%2Ftarget-sxhash.lisp;h=9938b72a67ae7536f98f549743283978dd35b18f;hb=7f1e94ae961a198e00daf281eb1dc858e5b2dcc7;hp=7dfcd3eff33e99cb6ebcd9d83797c00d892b5e08;hpb=1bfc464c657a8f4ad24ef612f76a38d8f6f1bbad;p=sbcl.git diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp index 7dfcd3e..9938b72 100644 --- a/src/code/target-sxhash.lisp +++ b/src/code/target-sxhash.lisp @@ -11,6 +11,9 @@ (in-package "SB!IMPL") +(defun pointer-hash (key) + (pointer-hash key)) + ;;; the depthoid explored when calculating hash values ;;; ;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls @@ -26,144 +29,202 @@ ;;; desiderata: ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the ;;; same value as #(5 1), and ending up in real trouble in some -;;; special cases like bit vectors the way that CMUCL SXHASH 18b +;;; special cases like bit vectors the way that CMUCL 18b SXHASH ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..) -;;; * We'd like to scatter our hash values the entire possible range +;;; * We'd like to scatter our hash values over the entire possible range ;;; of values instead of hashing small or common key values (like ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b ;;; SXHASH function does, again helping to avoid pathologies like ;;; hashing all bit vectors to 1. ;;; * We'd like this to be simple and fast, too. -;;; -;;; FIXME: Should this be INLINE? -(declaim (ftype (function ((and fixnum unsigned-byte) - (and fixnum unsigned-byte)) - (and fixnum unsigned-byte)) mix)) +(declaim (ftype (sfunction ((and fixnum unsigned-byte) + (and fixnum unsigned-byte)) + (and fixnum unsigned-byte)) + mix)) +(declaim (inline mix)) (defun mix (x y) - ;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler - ;; were smarter about optimizing ASH. (Without the THE FIXNUM below, - ;; and the (SAFETY 0) declaration here to get the compiler to trust - ;; it, the sbcl-0.5.0m cross-compiler running under Debian - ;; cmucl-2.4.17 turns the ASH into a full call, requiring the - ;; UNSIGNED-BYTE 32 argument to be coerced to a bignum, requiring - ;; consing, and thus generally obliterating performance.) - (declare (optimize (speed 3) (safety 0))) + (declare (optimize (speed 3))) (declare (type (and fixnum unsigned-byte) x y)) ;; the ideas here: - ;; * Bits diffuse in both directions (shifted left by up to 2 places - ;; in the calculation of XY, and shifted right by up to 5 places - ;; by the ASH). + ;; * Bits diffuse in both directions (shifted arbitrarily left by + ;; the multiplication in the calculation of XY, and shifted + ;; right by up to 5 places by the ASH). ;; * The #'+ and #'LOGXOR operations don't commute with each other, ;; so different bit patterns are mixed together as they shift ;; past each other. - ;; * The arbitrary constant in the #'LOGXOR expression is intended - ;; to help break up any weird anomalies we might otherwise get - ;; when hashing highly regular patterns. + ;; * The arbitrary constant XOR used in the LOGXOR expression is + ;; intended to help break up any weird anomalies we might + ;; otherwise get when hashing highly regular patterns. ;; (These are vaguely like the ideas used in many cryptographic ;; algorithms, but we're not pushing them hard enough here for them ;; to be cryptographically strong.) - (let* ((xy (+ (* x 3) y))) - (declare (type (unsigned-byte 32) xy)) - (the (and fixnum unsigned-byte) - (logand most-positive-fixnum - (logxor 441516657 - xy - (the fixnum (ash xy -5))))))) + ;; + ;; note: 3622009729038463111 is a 62-bit prime such that its low 61 + ;; bits, low 60 bits and low 29 bits are all also primes, thus + ;; giving decent distributions no matter which of the possible + ;; values of most-positive-fixnum we have. It is derived by simple + ;; search starting from 2^60*pi. The multiplication should be + ;; efficient no matter what the platform thanks to modular + ;; arithmetic. + (let* ((mul (logand 3622009729038463111 sb!xc:most-positive-fixnum)) + (xor (logand 608948948376289905 sb!xc:most-positive-fixnum)) + (xy (logand (+ (* x mul) y) sb!xc:most-positive-fixnum))) + (logand (logxor xor xy (ash xy -5)) sb!xc:most-positive-fixnum))) ;;;; hashing strings ;;;; -;;;; Note that this operation is used in compiler symbol table lookups, so we'd -;;;; like it to be fast. +;;;; Note that this operation is used in compiler symbol table +;;;; lookups, so we'd like it to be fast. +;;;; +;;;; As of 2004-03-10, we implement the one-at-a-time algorithm +;;;; designed by Bob Jenkins (see +;;;; for some more +;;;; information). -#!-sb-fluid (declaim (inline %sxhash-substring)) +(declaim (inline %sxhash-substring)) (defun %sxhash-substring (string &optional (count (length string))) ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the - ;; cross-compiler were smarter about ASH, but we need it for sbcl-0.5.0m. + ;; cross-compiler were smarter about ASH, but we need it for + ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0 + ;; to elide some type checks, but then again if this is inlined in + ;; all the critical places, we might not -- CSR, 2004-03-10) (declare (optimize (speed 3) (safety 0))) (declare (type string string)) (declare (type index count)) - (let ((result 408967240)) - (declare (type fixnum result)) - (dotimes (i count) - (declare (type index i)) - (mixf result - (the fixnum - (ash (char-code (aref string i)) 5)))) - result)) + (macrolet ((set-result (form) + `(setf result (ldb (byte #.sb!vm:n-word-bits 0) ,form)))) + (let ((result 0)) + (declare (type (unsigned-byte #.sb!vm:n-word-bits) result)) + (unless (typep string '(vector nil)) + (dotimes (i count) + (declare (type index i)) + (set-result (+ result (char-code (aref string i)))) + (set-result (+ result (ash result 10))) + (set-result (logxor result (ash result -6))))) + (set-result (+ result (ash result 3))) + (set-result (logxor result (ash result -11))) + (set-result (logxor result (ash result 15))) + (logand result most-positive-fixnum)))) ;;; test: ;;; (let ((ht (make-hash-table :test 'equal))) ;;; (do-all-symbols (symbol) ;;; (let* ((string (symbol-name symbol)) -;;; (hash (%sxhash-substring string))) -;;; (if (gethash hash ht) -;;; (unless (string= (gethash hash ht) string) -;;; (format t "collision: ~S ~S~%" string (gethash hash ht))) -;;; (setf (gethash hash ht) string)))) +;;; (hash (%sxhash-substring string))) +;;; (if (gethash hash ht) +;;; (unless (string= (gethash hash ht) string) +;;; (format t "collision: ~S ~S~%" string (gethash hash ht))) +;;; (setf (gethash hash ht) string)))) ;;; (format t "final count=~W~%" (hash-table-count ht))) (defun %sxhash-simple-string (x) (declare (optimize speed)) (declare (type simple-string x)) - (%sxhash-substring x)) + ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence + ;; of let conversion in the cross compiler, which otherwise causes + ;; strongly suboptimal register allocation. + (flet ((trick (x) + (%sxhash-substring x))) + (declare (notinline trick)) + (trick x))) (defun %sxhash-simple-substring (x count) (declare (optimize speed)) (declare (type simple-string x)) (declare (type index count)) - (%sxhash-substring x count)) + ;; see comment in %SXHASH-SIMPLE-STRING + (flet ((trick (x count) + (%sxhash-substring x count))) + (declare (notinline trick)) + (trick x count))) ;;;; the SXHASH function +;; simple cases +(declaim (ftype (sfunction (integer) hash) sxhash-bignum)) +(declaim (ftype (sfunction (t) hash) sxhash-instance)) + (defun sxhash (x) + ;; profiling SXHASH is hard, but we might as well try to make it go + ;; fast, in case it is the bottleneck somewhere. -- CSR, 2003-03-14 + (declare (optimize speed)) (labels ((sxhash-number (x) - (etypecase x - (fixnum (sxhash x)) ; through DEFTRANSFORM - (integer (sb!bignum:sxhash-bignum x)) - (single-float (sxhash x)) ; through DEFTRANSFORM - (double-float (sxhash x)) ; through DEFTRANSFORM - #!+long-float (long-float (error "stub: no LONG-FLOAT")) - (ratio (let ((result 127810327)) - (declare (type fixnum result)) - (mixf result (sxhash-number (numerator x))) - (mixf result (sxhash-number (denominator x))) - result)) - (complex (let ((result 535698211)) - (declare (type fixnum result)) - (mixf result (sxhash-number (realpart x))) - (mixf result (sxhash-number (imagpart x))) - result)))) - (sxhash-recurse (x &optional (depthoid +max-hash-depthoid+)) - (declare (type index depthoid)) - (typecase x - (list - (if (plusp depthoid) - (mix (sxhash-recurse (car x) (1- depthoid)) - (sxhash-recurse (cdr x) (1- depthoid))) - 261835505)) - (instance - (if (typep x 'structure-object) - (logxor 422371266 - (sxhash ; through DEFTRANSFORM - (class-name (layout-class (%instance-layout x))))) - 309518995)) - (symbol (sxhash x)) ; through DEFTRANSFORM - (array - (typecase x - (simple-string (sxhash x)) ; through DEFTRANSFORM - (string (%sxhash-substring x)) - (bit-vector (let ((result 410823708)) - (declare (type fixnum result)) - (dotimes (i (min depthoid (length x))) - (mixf result (aref x i))) - result)) - (t (logxor 191020317 (sxhash (array-rank x)))))) - (character - (logxor 72185131 - (sxhash (char-code x)))) ; through DEFTRANSFORM - ;; general, inefficient case of NUMBER - (number (sxhash-number x)) - (t 42)))) - (sxhash-recurse x))) + (etypecase x + (fixnum (sxhash x)) ; through DEFTRANSFORM + (integer (sb!bignum:sxhash-bignum x)) + (single-float (sxhash x)) ; through DEFTRANSFORM + (double-float (sxhash x)) ; through DEFTRANSFORM + #!+long-float (long-float (error "stub: no LONG-FLOAT")) + (ratio (let ((result 127810327)) + (declare (type fixnum result)) + (mixf result (sxhash-number (numerator x))) + (mixf result (sxhash-number (denominator x))) + result)) + (complex (let ((result 535698211)) + (declare (type fixnum result)) + (mixf result (sxhash-number (realpart x))) + (mixf result (sxhash-number (imagpart x))) + result)))) + (sxhash-recurse (x depthoid) + (declare (type index depthoid)) + (typecase x + ;; we test for LIST here, rather than CONS, because the + ;; type test for CONS is in fact the test for + ;; LIST-POINTER-LOWTAG followed by a negated test for + ;; NIL. If we're going to have to test for NIL anyway, + ;; we might as well do it explicitly and pick off the + ;; answer. -- CSR, 2004-07-14 + (list + (if (null x) + (sxhash x) ; through DEFTRANSFORM + (if (plusp depthoid) + (mix (sxhash-recurse (car x) (1- depthoid)) + (sxhash-recurse (cdr x) (1- depthoid))) + 261835505))) + (instance + (if (pathnamep x) + ;; Pathnames are EQUAL if all the components are EQUAL, so + ;; we hash all of the components of a pathname together. + (let ((hash (sxhash-recurse (pathname-host x) depthoid))) + (mixf hash (sxhash-recurse (pathname-device x) depthoid)) + (mixf hash (sxhash-recurse (pathname-directory x) depthoid)) + (mixf hash (sxhash-recurse (pathname-name x) depthoid)) + (mixf hash (sxhash-recurse (pathname-type x) depthoid)) + ;; Hash :NEWEST the same as NIL because EQUAL for + ;; pathnames assumes that :newest and nil are equal. + (let ((version (%pathname-version x))) + (mixf hash (sxhash-recurse (if (eq version :newest) + nil + version) + depthoid)))) + (if (or (typep x 'structure-object) (typep x 'condition)) + (logxor 422371266 + (sxhash ; through DEFTRANSFORM + (classoid-name + (layout-classoid (%instance-layout x))))) + (sxhash-instance x)))) + (symbol (sxhash x)) ; through DEFTRANSFORM + (array + (typecase x + (simple-string (sxhash x)) ; through DEFTRANSFORM + (string (%sxhash-substring x)) + (simple-bit-vector (sxhash x)) ; through DEFTRANSFORM + (bit-vector + ;; FIXME: It must surely be possible to do better + ;; than this. The problem is that a non-SIMPLE + ;; BIT-VECTOR could be displaced to another, with a + ;; non-zero offset -- so that significantly more + ;; work needs to be done using the %VECTOR-RAW-BITS + ;; approach. This will probably do for now. + (sxhash-recurse (copy-seq x) depthoid)) + (t (logxor 191020317 (sxhash (array-rank x)))))) + (character + (logxor 72185131 + (sxhash (char-code x)))) ; through DEFTRANSFORM + ;; general, inefficient case of NUMBER + (number (sxhash-number x)) + (generic-function (sxhash-instance x)) + (t 42)))) + (sxhash-recurse x +max-hash-depthoid+))) ;;;; the PSXHASH function @@ -186,9 +247,9 @@ (array (array-psxhash key depthoid)) (hash-table (hash-table-psxhash key)) (structure-object (structure-object-psxhash key depthoid)) - (list (list-psxhash key depthoid)) + (cons (list-psxhash key depthoid)) (number (number-psxhash key)) - (character (sxhash (char-upcase key))) + (character (char-code (char-upcase key))) (t (sxhash key)))) (defun array-psxhash (key depthoid) @@ -200,37 +261,39 @@ ;; that we must respect fill pointers. (vector (macrolet ((frob () - '(let ((result 572539)) - (declare (type fixnum result)) - (mixf result (length key)) - (dotimes (i (min depthoid (length key))) - (declare (type fixnum i)) - (mixf result - (psxhash (aref key i) - (- depthoid 1 i)))) - result))) - ;; CMU can compile SIMPLE-ARRAY operations so much more efficiently - ;; than the general case that it's probably worth picking off the - ;; common special cases. - (typecase key - (simple-string - ;;(format t "~&SIMPLE-STRING special case~%") - (frob)) - (simple-vector - ;;(format t "~&SIMPLE-VECTOR special case~%") - (frob)) - (t (frob))))) + '(let ((result 572539)) + (declare (type fixnum result)) + (mixf result (length key)) + (when (plusp depthoid) + (decf depthoid) + (dotimes (i (length key)) + (declare (type fixnum i)) + (mixf result + (psxhash (aref key i) depthoid)))) + result)) + (make-dispatch (types) + `(typecase key + ,@(loop for type in types + collect `(,type + (frob)))))) + (make-dispatch (simple-base-string + (simple-array character (*)) + simple-vector + (simple-array (unsigned-byte 8) (*)) + (simple-array fixnum (*)) + t)))) ;; Any other array can be hashed by working with its underlying ;; one-dimensional physical representation. (t (let ((result 60828)) (declare (type fixnum result)) - (dotimes (i (min depthoid (array-rank key))) - (mixf result (array-dimension key i))) - (dotimes (i (min depthoid (array-total-size key))) - (mixf result - (psxhash (row-major-aref key i) - (- depthoid 1 i)))) + (dotimes (i (array-rank key)) + (mixf result (array-dimension key i))) + (when (plusp depthoid) + (decf depthoid) + (dotimes (i (array-total-size key)) + (mixf result + (psxhash (row-major-aref key i) depthoid)))) result)))) (defun structure-object-psxhash (key depthoid) @@ -238,18 +301,21 @@ (declare (type structure-object key)) (declare (type (integer 0 #.+max-hash-depthoid+) depthoid)) (let* ((layout (%instance-layout key)) ; i.e. slot #0 - (length (layout-length layout)) - (class (layout-class layout)) - (name (class-name class)) - (result (mix (sxhash name) (the fixnum 79867)))) + (length (layout-length layout)) + (classoid (layout-classoid layout)) + (name (classoid-name classoid)) + (result (mix (sxhash name) (the fixnum 79867)))) (declare (type fixnum result)) - (dotimes (i (min depthoid (1- length))) + (dotimes (i (min depthoid (- length 1 (layout-n-untagged-slots layout)))) (declare (type fixnum i)) (let ((j (1+ i))) ; skipping slot #0, which is for LAYOUT - (declare (type fixnum j)) - (mixf result - (psxhash (%instance-ref key j) - (1- depthoid))))) + (declare (type fixnum j)) + (mixf result + (psxhash (%instance-ref key j) + (1- depthoid))))) + ;; KLUDGE: Should hash untagged slots, too. (Although +max-hash-depthoid+ + ;; is pretty low currently, so they might not make it into the hash + ;; value anyway.) result)) (defun list-psxhash (key depthoid) @@ -257,12 +323,12 @@ (declare (type list key)) (declare (type (integer 0 #.+max-hash-depthoid+) depthoid)) (cond ((null key) - (the fixnum 480929)) - ((zerop depthoid) - (the fixnum 779578)) - (t - (mix (psxhash (car key) (1- depthoid)) - (psxhash (cdr key) (1- depthoid)))))) + (the fixnum 480929)) + ((zerop depthoid) + (the fixnum 779578)) + (t + (mix (psxhash (car key) (1- depthoid)) + (psxhash (cdr key) (1- depthoid)))))) (defun hash-table-psxhash (key) (declare (optimize speed)) @@ -277,46 +343,46 @@ (declare (optimize speed)) (declare (type number key)) (flet ((sxhash-double-float (val) - (declare (type double-float val)) - ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the - ;; resulting code works without consing. (In Debian cmucl 2.4.17, - ;; it didn't.) - (sxhash val))) + (declare (type double-float val)) + ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the + ;; resulting code works without consing. (In Debian cmucl 2.4.17, + ;; it didn't.) + (sxhash val))) (etypecase key (integer (sxhash key)) (float (macrolet ((frob (type) - (let ((lo (coerce most-negative-fixnum type)) - (hi (coerce most-positive-fixnum type))) - `(cond (;; This clause allows FIXNUM-sized integer - ;; values to be handled without consing. - (<= ,lo key ,hi) - (multiple-value-bind (q r) - (floor (the (,type ,lo ,hi) key)) - (if (zerop (the ,type r)) - (sxhash q) - (sxhash-double-float - (coerce key 'double-float))))) - (t - (multiple-value-bind (q r) (floor key) - (if (zerop (the ,type r)) - (sxhash q) - (sxhash-double-float - (coerce key 'double-float))))))))) - (etypecase key - (single-float (frob single-float)) - (double-float (frob double-float)) - (short-float (frob short-float)) - (long-float (error "LONG-FLOAT not currently supported"))))) + (let ((lo (coerce sb!xc:most-negative-fixnum type)) + (hi (coerce sb!xc:most-positive-fixnum type))) + `(cond (;; This clause allows FIXNUM-sized integer + ;; values to be handled without consing. + (<= ,lo key ,hi) + (multiple-value-bind (q r) + (floor (the (,type ,lo ,hi) key)) + (if (zerop (the ,type r)) + (sxhash q) + (sxhash-double-float + (coerce key 'double-float))))) + (t + (multiple-value-bind (q r) (floor key) + (if (zerop (the ,type r)) + (sxhash q) + (sxhash-double-float + (coerce key 'double-float))))))))) + (etypecase key + (single-float (frob single-float)) + (double-float (frob double-float)) + #!+long-float + (long-float (error "LONG-FLOAT not currently supported"))))) (rational (if (and (<= most-negative-double-float - key - most-positive-double-float) - (= (coerce key 'double-float) key)) - (sxhash-double-float (coerce key 'double-float)) - (sxhash key))) + key + most-positive-double-float) + (= (coerce key 'double-float) key)) + (sxhash-double-float (coerce key 'double-float)) + (sxhash key))) (complex (if (zerop (imagpart key)) - (number-psxhash (realpart key)) - (let ((result 330231)) - (declare (type fixnum result)) - (mixf result (number-psxhash (realpart key))) - (mixf result (number-psxhash (imagpart key))) - result)))))) + (number-psxhash (realpart key)) + (let ((result 330231)) + (declare (type fixnum result)) + (mixf result (number-psxhash (realpart key))) + (mixf result (number-psxhash (imagpart key))) + result))))))