;;; desiderata:
;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
;;; same value as #(5 1), and ending up in real trouble in some
-;;; special cases like bit vectors the way that CMUCL SXHASH 18b
+;;; special cases like bit vectors the way that CMUCL 18b SXHASH
;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
-;;; * We'd like to scatter our hash values the entire possible range
+;;; * We'd like to scatter our hash values over the entire possible range
;;; of values instead of hashing small or common key values (like
;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
;;; SXHASH function does, again helping to avoid pathologies like
;;; * We'd like this to be simple and fast, too.
;;;
;;; FIXME: Should this be INLINE?
-(declaim (ftype (function ((and fixnum unsigned-byte)
- (and fixnum unsigned-byte))
- (and fixnum unsigned-byte)) mix))
+(declaim (ftype (sfunction ((and fixnum unsigned-byte)
+ (and fixnum unsigned-byte))
+ (and fixnum unsigned-byte))
+ mix))
(defun mix (x y)
;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler
;; were smarter about optimizing ASH. (Without the THE FIXNUM below,
;; algorithms, but we're not pushing them hard enough here for them
;; to be cryptographically strong.)
(let* ((xy (+ (* x 3) y)))
- (declare (type (unsigned-byte 32) xy))
- (the (and fixnum unsigned-byte)
- (logand most-positive-fixnum
- (logxor 441516657
- xy
- (the fixnum (ash xy -5)))))))
+ (logand most-positive-fixnum
+ (logxor 441516657
+ xy
+ (ash xy -5)))))
\f
;;;; hashing strings
;;;;
-;;;; Note that this operation is used in compiler symbol table lookups, so we'd
-;;;; like it to be fast.
+;;;; Note that this operation is used in compiler symbol table
+;;;; lookups, so we'd like it to be fast.
+;;;;
+;;;; As of 2004-03-10, we implement the one-at-a-time algorithm
+;;;; designed by Bob Jenkins (see
+;;;; <http://burtleburtle.net/bob/hash/doobs.html> for some more
+;;;; information).
#!-sb-fluid (declaim (inline %sxhash-substring))
(defun %sxhash-substring (string &optional (count (length string)))
;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
- ;; cross-compiler were smarter about ASH, but we need it for sbcl-0.5.0m.
+ ;; cross-compiler were smarter about ASH, but we need it for
+ ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0
+ ;; to elide some type checks, but then again if this is inlined in
+ ;; all the critical places, we might not -- CSR, 2004-03-10)
(declare (optimize (speed 3) (safety 0)))
(declare (type string string))
(declare (type index count))
- (let ((result 408967240))
- (declare (type fixnum result))
- (dotimes (i count)
- (declare (type index i))
- (mixf result
- (the fixnum
- (ash (char-code (aref string i)) 5))))
- result))
+ (let ((result 0))
+ (declare (type (unsigned-byte 32) result))
+ (unless (typep string '(vector nil))
+ (dotimes (i count)
+ (declare (type index i))
+ (setf result
+ (ldb (byte 32 0)
+ (+ result (char-code (aref string i)))))
+ (setf result
+ (ldb (byte 32 0)
+ (+ result (ash result 10))))
+ (setf result
+ (logxor result (ash result -6)))))
+ (setf result
+ (ldb (byte 32 0)
+ (+ result (ash result 3))))
+ (setf result
+ (logxor result (ash result -11)))
+ (setf result
+ (ldb (byte 32 0)
+ (logxor result (ash result 15))))
+ (logand result most-positive-fixnum)))
;;; test:
;;; (let ((ht (make-hash-table :test 'equal)))
;;; (do-all-symbols (symbol)
(defun %sxhash-simple-string (x)
(declare (optimize speed))
(declare (type simple-string x))
- (%sxhash-substring x))
+ ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence
+ ;; of let conversion in the cross compiler, which otherwise causes
+ ;; strongly suboptimal register allocation.
+ (flet ((trick (x)
+ (%sxhash-substring x)))
+ (declare (notinline trick))
+ (trick x)))
(defun %sxhash-simple-substring (x count)
(declare (optimize speed))
(declare (type simple-string x))
(declare (type index count))
- (%sxhash-substring x count))
+ ;; see comment in %SXHASH-SIMPLE-STRING
+ (flet ((trick (x count)
+ (%sxhash-substring x count)))
+ (declare (notinline trick))
+ (trick x count)))
\f
;;;; the SXHASH function
+;; simple cases
+(declaim (ftype (sfunction (integer) (integer 0 #.sb!xc:most-positive-fixnum))
+ sxhash-bignum))
+(declaim (ftype (sfunction (t) (integer 0 #.sb!xc:most-positive-fixnum))
+ sxhash-instance))
+
(defun sxhash (x)
+ ;; profiling SXHASH is hard, but we might as well try to make it go
+ ;; fast, in case it is the bottleneck somwhere. -- CSR, 2003-03-14
+ (declare (optimize speed))
(labels ((sxhash-number (x)
(etypecase x
(fixnum (sxhash x)) ; through DEFTRANSFORM
(sxhash-recurse (x &optional (depthoid +max-hash-depthoid+))
(declare (type index depthoid))
(typecase x
+ ;; we test for LIST here, rather than CONS, because the
+ ;; type test for CONS is in fact the test for
+ ;; LIST-POINTER-LOWTAG followed by a negated test for
+ ;; NIL. If we're going to have to test for NIL anyway,
+ ;; we might as well do it explicitly and pick off the
+ ;; answer. -- CSR, 2004-07-14
(list
- (if (plusp depthoid)
- (mix (sxhash-recurse (car x) (1- depthoid))
- (sxhash-recurse (cdr x) (1- depthoid)))
- 261835505))
+ (if (null x)
+ (sxhash x) ; through DEFTRANSFORM
+ (if (plusp depthoid)
+ (mix (sxhash-recurse (car x) (1- depthoid))
+ (sxhash-recurse (cdr x) (1- depthoid)))
+ 261835505)))
(instance
- (if (typep x 'structure-object)
+ (if (or (typep x 'structure-object) (typep x 'condition))
(logxor 422371266
(sxhash ; through DEFTRANSFORM
- (class-name (layout-class (%instance-layout x)))))
- ;; Nice though it might be to return a nontrivial
- ;; hash value for other instances (especially
- ;; STANDARD-OBJECTs) there seems to be no good way
- ;; to do so. We can't even do the CLASS-NAME trick
- ;; (as used above for STRUCTURE-OBJECT) because
- ;; then CHANGE-CLASS would cause SXHASH values to
- ;; change, ouch! -- WHN recording wisdom of CSR
- 309518995))
+ (classoid-name
+ (layout-classoid (%instance-layout x)))))
+ (sxhash-instance x)))
(symbol (sxhash x)) ; through DEFTRANSFORM
(array
(typecase x
(simple-string (sxhash x)) ; through DEFTRANSFORM
(string (%sxhash-substring x))
- (bit-vector (let ((result 410823708))
- (declare (type fixnum result))
- (dotimes (i (min depthoid (length x)))
- (mixf result (aref x i)))
- result))
+ (simple-bit-vector (sxhash x)) ; through DEFTRANSFORM
+ (bit-vector
+ ;; FIXME: It must surely be possible to do better
+ ;; than this. The problem is that a non-SIMPLE
+ ;; BIT-VECTOR could be displaced to another, with a
+ ;; non-zero offset -- so that significantly more
+ ;; work needs to be done using the %RAW-BITS
+ ;; approach. This will probably do for now.
+ (sxhash-recurse (copy-seq x) depthoid))
(t (logxor 191020317 (sxhash (array-rank x))))))
(character
(logxor 72185131
(sxhash (char-code x)))) ; through DEFTRANSFORM
;; general, inefficient case of NUMBER
(number (sxhash-number x))
+ (generic-function (sxhash-instance x))
(t 42))))
(sxhash-recurse x)))
\f
(array (array-psxhash key depthoid))
(hash-table (hash-table-psxhash key))
(structure-object (structure-object-psxhash key depthoid))
- (list (list-psxhash key depthoid))
+ (cons (list-psxhash key depthoid))
(number (number-psxhash key))
(character (sxhash (char-upcase key)))
(t (sxhash key))))
(declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
(let* ((layout (%instance-layout key)) ; i.e. slot #0
(length (layout-length layout))
- (class (layout-class layout))
- (name (class-name class))
+ (classoid (layout-classoid layout))
+ (name (classoid-name classoid))
(result (mix (sxhash name) (the fixnum 79867))))
(declare (type fixnum result))
(dotimes (i (min depthoid (1- length)))
(etypecase key
(single-float (frob single-float))
(double-float (frob double-float))
- (short-float (frob short-float))
+ #!+long-float
(long-float (error "LONG-FLOAT not currently supported")))))
(rational (if (and (<= most-negative-double-float
key