3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 ;;; the depthoid explored when calculating hash values
16 ;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls
17 ;;; depth and what Common Lisp ordinarily calls length; it's incremented either
18 ;;; when we descend into a compound object or when we step through elements of
19 ;;; a compound object.
20 (defconstant +max-hash-depthoid+ 4)
22 ;;;; mixing hash values
24 ;;; a function for mixing hash values
27 ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
28 ;;; same value as #(5 1), and ending up in real trouble in some
29 ;;; special cases like bit vectors the way that CMUCL SXHASH 18b
30 ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
31 ;;; * We'd like to scatter our hash values the entire possible range
32 ;;; of values instead of hashing small or common key values (like
33 ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
34 ;;; SXHASH function does, again helping to avoid pathologies like
35 ;;; hashing all bit vectors to 1.
36 ;;; * We'd like this to be simple and fast, too.
38 ;;; FIXME: Should this be INLINE?
39 (declaim (ftype (function ((and fixnum unsigned-byte)
40 (and fixnum unsigned-byte))
41 (and fixnum unsigned-byte)) mix))
43 ;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler
44 ;; were smarter about optimizing ASH. (Without the THE FIXNUM below,
45 ;; and the (SAFETY 0) declaration here to get the compiler to trust
46 ;; it, the sbcl-0.5.0m cross-compiler running under Debian
47 ;; cmucl-2.4.17 turns the ASH into a full call, requiring the
48 ;; UNSIGNED-BYTE 32 argument to be coerced to a bignum, requiring
49 ;; consing, and thus generally obliterating performance.)
50 (declare (optimize (speed 3) (safety 0)))
51 (declare (type (and fixnum unsigned-byte) x y))
53 ;; * Bits diffuse in both directions (shifted left by up to 2 places
54 ;; in the calculation of XY, and shifted right by up to 5 places
56 ;; * The #'+ and #'LOGXOR operations don't commute with each other,
57 ;; so different bit patterns are mixed together as they shift
59 ;; * The arbitrary constant in the #'LOGXOR expression is intended
60 ;; to help break up any weird anomalies we might otherwise get
61 ;; when hashing highly regular patterns.
62 ;; (These are vaguely like the ideas used in many cryptographic
63 ;; algorithms, but we're not pushing them hard enough here for them
64 ;; to be cryptographically strong.)
65 (let* ((xy (+ (* x 3) y)))
66 (declare (type (unsigned-byte 32) xy))
67 (the (and fixnum unsigned-byte)
68 (logand most-positive-fixnum
71 (the fixnum (ash xy -5)))))))
75 ;;;; Note that this operation is used in compiler symbol table lookups, so we'd
76 ;;;; like it to be fast.
78 #!-sb-fluid (declaim (inline %sxhash-substring))
79 (defun %sxhash-substring (string &optional (count (length string)))
80 ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
81 ;; cross-compiler were smarter about ASH, but we need it for sbcl-0.5.0m.
82 (declare (optimize (speed 3) (safety 0)))
83 (declare (type string string))
84 (declare (type index count))
85 (let ((result 408967240))
86 (declare (type fixnum result))
88 (declare (type index i))
91 (ash (char-code (aref string i)) 5))))
94 ;;; (let ((ht (make-hash-table :test 'equal)))
95 ;;; (do-all-symbols (symbol)
96 ;;; (let* ((string (symbol-name symbol))
97 ;;; (hash (%sxhash-substring string)))
98 ;;; (if (gethash hash ht)
99 ;;; (unless (string= (gethash hash ht) string)
100 ;;; (format t "collision: ~S ~S~%" string (gethash hash ht)))
101 ;;; (setf (gethash hash ht) string))))
102 ;;; (format t "final count=~D~%" (hash-table-count ht)))
104 (defun %sxhash-simple-string (x)
105 (declare (optimize speed))
106 (declare (type simple-string x))
107 (%sxhash-substring x))
109 (defun %sxhash-simple-substring (x count)
110 (declare (optimize speed))
111 (declare (type simple-string x))
112 (declare (type index count))
113 (%sxhash-substring x count))
115 ;;;; the SXHASH function
118 (labels ((sxhash-number (x)
120 (fixnum (sxhash x)) ; through DEFTRANSFORM
121 (integer (sb!bignum:sxhash-bignum x))
122 (single-float (sxhash x)) ; through DEFTRANSFORM
123 (double-float (sxhash x)) ; through DEFTRANSFORM
124 #!+long-float (long-float (error "stub: no LONG-FLOAT"))
125 (ratio (let ((result 127810327))
126 (declare (type fixnum result))
127 (mixf result (sxhash-number (numerator x)))
128 (mixf result (sxhash-number (denominator x)))
130 (complex (let ((result 535698211))
131 (declare (type fixnum result))
132 (mixf result (sxhash-number (realpart x)))
133 (mixf result (sxhash-number (imagpart x)))
135 (sxhash-recurse (x &optional (depthoid +max-hash-depthoid+))
136 (declare (type index depthoid))
140 (mix (sxhash-recurse (car x) (1- depthoid))
141 (sxhash-recurse (cdr x) (1- depthoid)))
144 (if (typep x 'structure-object)
146 (sxhash ; through DEFTRANSFORM
147 (class-name (layout-class (%instance-layout x)))))
149 (symbol (sxhash x)) ; through DEFTRANSFORM
150 (number (sxhash-number x))
153 (simple-string (sxhash x)) ; through DEFTRANSFORM
154 (string (%sxhash-substring x))
155 (bit-vector (let ((result 410823708))
156 (declare (type fixnum result))
157 (dotimes (i (min depthoid (length x)))
158 (mixf result (aref x i)))
160 (t (logxor 191020317 (sxhash (array-rank x))))))
163 (sxhash (char-code x)))) ; through DEFTRANSFORM
167 ;;;; the PSXHASH function
169 ;;;; FIXME: This code does a lot of unnecessary full calls. It could be made
170 ;;;; more efficient (in both time and space) by rewriting it along the lines
171 ;;;; of the SXHASH code above.
173 ;;; like SXHASH, but for EQUALP hashing instead of EQUAL hashing
174 (defun psxhash (key &optional (depthoid +max-hash-depthoid+))
175 (declare (optimize speed))
176 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
177 ;; Note: You might think it would be cleaner to use the ordering given in the
178 ;; table from Figure 5-13 in the EQUALP section of the ANSI specification
179 ;; here. So did I, but that is a snare for the unwary! Nothing in the ANSI
180 ;; spec says that HASH-TABLE can't be a STRUCTURE-OBJECT, and in fact our
181 ;; HASH-TABLEs *are* STRUCTURE-OBJECTs, so we need to pick off the special
182 ;; HASH-TABLE behavior before we fall through to the generic STRUCTURE-OBJECT
183 ;; comparison behavior.
185 (array (array-psxhash key depthoid))
186 (hash-table (hash-table-psxhash key))
187 (structure-object (structure-object-psxhash key depthoid))
188 (list (list-psxhash key depthoid))
189 (number (number-psxhash key))
190 (character (sxhash (char-upcase key)))
193 (defun array-psxhash (key depthoid)
194 (declare (optimize speed))
195 (declare (type array key))
196 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
198 ;; VECTORs have to be treated specially because ANSI specifies
199 ;; that we must respect fill pointers.
202 '(let ((result 572539))
203 (declare (type fixnum result))
204 (mixf result (length key))
205 (dotimes (i (min depthoid (length key)))
206 (declare (type fixnum i))
208 (psxhash (aref key i)
211 ;; CMU can compile SIMPLE-ARRAY operations so much more efficiently
212 ;; than the general case that it's probably worth picking off the
213 ;; common special cases.
216 ;;(format t "~&SIMPLE-STRING special case~%")
219 ;;(format t "~&SIMPLE-VECTOR special case~%")
222 ;; Any other array can be hashed by working with its underlying
223 ;; one-dimensional physical representation.
225 (let ((result 60828))
226 (declare (type fixnum result))
227 (dotimes (i (min depthoid (array-rank key)))
228 (mixf result (array-dimension key i)))
229 (dotimes (i (min depthoid (array-total-size key)))
231 (psxhash (row-major-aref key i)
235 (defun structure-object-psxhash (key depthoid)
236 (declare (optimize speed))
237 (declare (type structure-object key))
238 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
239 (let* ((layout (%instance-layout key)) ; i.e. slot #0
240 (length (layout-length layout))
241 (class (layout-class layout))
242 (name (class-name class))
243 (result (mix (sxhash name) (the fixnum 79867))))
244 (declare (type fixnum result))
245 (dotimes (i (min depthoid (1- length)))
246 (declare (type fixnum i))
247 (let ((j (1+ i))) ; skipping slot #0, which is for LAYOUT
248 (declare (type fixnum j))
250 (psxhash (%instance-ref key j)
254 (defun list-psxhash (key depthoid)
255 (declare (optimize speed))
256 (declare (type list key))
257 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
263 (mix (psxhash (car key) (1- depthoid))
264 (psxhash (cdr key) (1- depthoid))))))
266 (defun hash-table-psxhash (key)
267 (declare (optimize speed))
268 (declare (type hash-table key))
269 (let ((result 103924836))
270 (declare (type fixnum result))
271 (mixf result (hash-table-count key))
272 (mixf result (sxhash (hash-table-test key)))
275 (defun number-psxhash (key)
276 (declare (optimize speed))
277 (declare (type number key))
278 (flet ((sxhash-double-float (val)
279 (declare (type double-float val))
280 ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the
281 ;; resulting code works without consing. (In Debian cmucl 2.4.17,
285 (integer (sxhash key))
286 (float (macrolet ((frob (type)
287 (let ((lo (coerce most-negative-fixnum type))
288 (hi (coerce most-positive-fixnum type)))
289 `(cond (;; This clause allows FIXNUM-sized integer
290 ;; values to be handled without consing.
292 (multiple-value-bind (q r)
293 (floor (the (,type ,lo ,hi) key))
294 (if (zerop (the ,type r))
297 (coerce key 'double-float)))))
299 (multiple-value-bind (q r) (floor key)
300 (if (zerop (the ,type r))
303 (coerce key 'double-float)))))))))
305 (single-float (frob single-float))
306 (double-float (frob double-float))
307 (short-float (frob short-float))
308 (long-float (error "LONG-FLOAT not currently supported")))))
309 (rational (if (and (<= most-negative-double-float
311 most-positive-double-float)
312 (= (coerce key 'double-float) key))
313 (sxhash-double-float (coerce key 'double-float))
315 (complex (if (zerop (imagpart key))
316 (number-psxhash (realpart key))
317 (let ((result 330231))
318 (declare (type fixnum result))
319 (mixf result (number-psxhash (realpart key)))
320 (mixf result (number-psxhash (imagpart key)))
323 ;;; SXHASH and PSXHASH should distribute hash values well over the
324 ;;; space of possible values, so that collisions between the hash values
325 ;;; of unequal objects should be very uncommon.
327 ;;; FIXME: These tests should be enabled once the rest of the system is
328 ;;; stable. (For now, I don't want to mess with things like making sure
329 ;;; that bignums are hashed uniquely.)
332 (let* ((test-cases `((0 . 1)
353 #(#(-0.0d0 0) (1.0 0))
354 ;; KLUDGE: Some multi-dimensional array test cases would
355 ;; be good here too, but currently SBCL isn't smart enough
356 ;; to dump them as literals, and I'm too lazy to make
357 ;; code to create them at run time. -- WHN 20000111
368 ,(expt 2 33) ,(expt 2.0 33) ,(expt 2.0d0 33)
369 ,(- (expt 1/2 50)) ,(- (expt 0.5 50)) ,(- (expt 0.5d0 50))
370 ,(- (expt 1/2 50)) ,(- (expt 0.5 50)) ,(- (expt 0.5d0 50))
371 #c(1.0 2.0) #c(1 2.0) #c(1.5 -3/2) #c(3/2 -3/2) #c(0 1)
372 #c(1.0 2.0) #c(1 2.0) #c(1.5 -3/2) #c(3/2 -3/2) #c(0 1)
374 ,(make-hash-table :test 'equal)
375 "abc" "ABC" "aBc" 'abc #(#\a #\b #\c) #(a b c) #("A" b c)
377 "" #* #() () (()) #(()) (#())
378 "" #* #() () (()) #(()) (#())
381 (dolist (i test-cases)
382 (unless (typep (sxhash i) '(and fixnum unsigned-byte))
383 (error "bad SXHASH behavior for ~S" i))
384 (unless (typep (psxhash i) '(and fixnum unsigned-byte))
385 (error "bad PSXHASH behavior for ~S" i))
386 (dolist (j test-cases)
387 (flet ((t->boolean (x) (if x t nil)))
388 ;; Note: It's possible that a change to the hashing algorithm could
389 ;; leave it correct but still cause this test to bomb by causing an
390 ;; unlucky random collision. That's not very likely (since there are
391 ;; (EXPT 2 29) possible hash values and only on the order of 100 test
392 ;; cases, but it's probably worth checking if you are getting a
393 ;; mystifying error from this test.
394 (unless (eq (t->boolean (equal i j))
395 (t->boolean (= (sxhash i) (sxhash j))))
396 (error "bad SXHASH behavior for ~S ~S" i j))
397 (unless (eq (t->boolean (equalp i j))
398 (t->boolean (= (psxhash i) (psxhash j))))
399 (error "bad PSXHASH behavior for ~S ~S" i j))))))
401 ;;; FIXME: Test that the the hash functions can deal with common cases without
403 ;(defun consless-test ()
404 ; (dotimes (j 100000)
405 ; (dolist (i '("yo" #(1 2 3) #2A((1 2) (1 2)) (1 2 (3)) 1 1.0 1.0d0))