3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (defun pointer-hash (key)
17 ;;; the depthoid explored when calculating hash values
19 ;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls
20 ;;; depth and what Common Lisp ordinarily calls length; it's incremented either
21 ;;; when we descend into a compound object or when we step through elements of
22 ;;; a compound object.
23 (defconstant +max-hash-depthoid+ 4)
25 ;;;; mixing hash values
27 ;;; a function for mixing hash values
30 ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
31 ;;; same value as #(5 1), and ending up in real trouble in some
32 ;;; special cases like bit vectors the way that CMUCL 18b SXHASH
33 ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
34 ;;; * We'd like to scatter our hash values over the entire possible range
35 ;;; of values instead of hashing small or common key values (like
36 ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
37 ;;; SXHASH function does, again helping to avoid pathologies like
38 ;;; hashing all bit vectors to 1.
39 ;;; * We'd like this to be simple and fast, too.
41 ;;; FIXME: Should this be INLINE?
42 (declaim (ftype (sfunction ((and fixnum unsigned-byte)
43 (and fixnum unsigned-byte))
44 (and fixnum unsigned-byte))
46 (declaim (inline mix))
48 ;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler
49 ;; were smarter about optimizing ASH. (Without the THE FIXNUM below,
50 ;; and the (SAFETY 0) declaration here to get the compiler to trust
51 ;; it, the sbcl-0.5.0m cross-compiler running under Debian
52 ;; cmucl-2.4.17 turns the ASH into a full call, requiring the
53 ;; UNSIGNED-BYTE 32 argument to be coerced to a bignum, requiring
54 ;; consing, and thus generally obliterating performance.)
55 (declare (optimize (speed 3) (safety 0)))
56 (declare (type (and fixnum unsigned-byte) x y))
58 ;; * Bits diffuse in both directions (shifted left by up to 2 places
59 ;; in the calculation of XY, and shifted right by up to 5 places
61 ;; * The #'+ and #'LOGXOR operations don't commute with each other,
62 ;; so different bit patterns are mixed together as they shift
64 ;; * The arbitrary constant in the #'LOGXOR expression is intended
65 ;; to help break up any weird anomalies we might otherwise get
66 ;; when hashing highly regular patterns.
67 ;; (These are vaguely like the ideas used in many cryptographic
68 ;; algorithms, but we're not pushing them hard enough here for them
69 ;; to be cryptographically strong.)
70 (let* ((xy (+ (* x 3) y)))
71 (logand most-positive-fixnum
78 ;;;; Note that this operation is used in compiler symbol table
79 ;;;; lookups, so we'd like it to be fast.
81 ;;;; As of 2004-03-10, we implement the one-at-a-time algorithm
82 ;;;; designed by Bob Jenkins (see
83 ;;;; <http://burtleburtle.net/bob/hash/doobs.html> for some more
86 (declaim (inline %sxhash-substring))
87 (defun %sxhash-substring (string &optional (count (length string)))
88 ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
89 ;; cross-compiler were smarter about ASH, but we need it for
90 ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0
91 ;; to elide some type checks, but then again if this is inlined in
92 ;; all the critical places, we might not -- CSR, 2004-03-10)
93 (declare (optimize (speed 3) (safety 0)))
94 (declare (type string string))
95 (declare (type index count))
96 (macrolet ((set-result (form)
97 `(setf result (ldb (byte #.sb!vm:n-word-bits 0) ,form))))
99 (declare (type (unsigned-byte #.sb!vm:n-word-bits) result))
100 (unless (typep string '(vector nil))
102 (declare (type index i))
103 (set-result (+ result (char-code (aref string i))))
104 (set-result (+ result (ash result 10)))
105 (set-result (logxor result (ash result -6)))))
106 (set-result (+ result (ash result 3)))
107 (set-result (logxor result (ash result -11)))
108 (set-result (logxor result (ash result 15)))
109 (logand result most-positive-fixnum))))
111 ;;; (let ((ht (make-hash-table :test 'equal)))
112 ;;; (do-all-symbols (symbol)
113 ;;; (let* ((string (symbol-name symbol))
114 ;;; (hash (%sxhash-substring string)))
115 ;;; (if (gethash hash ht)
116 ;;; (unless (string= (gethash hash ht) string)
117 ;;; (format t "collision: ~S ~S~%" string (gethash hash ht)))
118 ;;; (setf (gethash hash ht) string))))
119 ;;; (format t "final count=~W~%" (hash-table-count ht)))
121 (defun %sxhash-simple-string (x)
122 (declare (optimize speed))
123 (declare (type simple-string x))
124 ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence
125 ;; of let conversion in the cross compiler, which otherwise causes
126 ;; strongly suboptimal register allocation.
128 (%sxhash-substring x)))
129 (declare (notinline trick))
132 (defun %sxhash-simple-substring (x count)
133 (declare (optimize speed))
134 (declare (type simple-string x))
135 (declare (type index count))
136 ;; see comment in %SXHASH-SIMPLE-STRING
137 (flet ((trick (x count)
138 (%sxhash-substring x count)))
139 (declare (notinline trick))
142 ;;;; the SXHASH function
145 (declaim (ftype (sfunction (integer) hash) sxhash-bignum))
146 (declaim (ftype (sfunction (t) hash) sxhash-instance))
149 ;; profiling SXHASH is hard, but we might as well try to make it go
150 ;; fast, in case it is the bottleneck somewhere. -- CSR, 2003-03-14
151 (declare (optimize speed))
152 (labels ((sxhash-number (x)
154 (fixnum (sxhash x)) ; through DEFTRANSFORM
155 (integer (sb!bignum:sxhash-bignum x))
156 (single-float (sxhash x)) ; through DEFTRANSFORM
157 (double-float (sxhash x)) ; through DEFTRANSFORM
158 #!+long-float (long-float (error "stub: no LONG-FLOAT"))
159 (ratio (let ((result 127810327))
160 (declare (type fixnum result))
161 (mixf result (sxhash-number (numerator x)))
162 (mixf result (sxhash-number (denominator x)))
164 (complex (let ((result 535698211))
165 (declare (type fixnum result))
166 (mixf result (sxhash-number (realpart x)))
167 (mixf result (sxhash-number (imagpart x)))
169 (sxhash-recurse (x depthoid)
170 (declare (type index depthoid))
172 ;; we test for LIST here, rather than CONS, because the
173 ;; type test for CONS is in fact the test for
174 ;; LIST-POINTER-LOWTAG followed by a negated test for
175 ;; NIL. If we're going to have to test for NIL anyway,
176 ;; we might as well do it explicitly and pick off the
177 ;; answer. -- CSR, 2004-07-14
180 (sxhash x) ; through DEFTRANSFORM
182 (mix (sxhash-recurse (car x) (1- depthoid))
183 (sxhash-recurse (cdr x) (1- depthoid)))
187 ;; Pathnames are EQUAL if all the components are EQUAL, so
188 ;; we hash all of the components of a pathname together.
189 (let ((hash (sxhash-recurse (pathname-host x) depthoid)))
190 (mixf hash (sxhash-recurse (pathname-device x) depthoid))
191 (mixf hash (sxhash-recurse (pathname-directory x) depthoid))
192 (mixf hash (sxhash-recurse (pathname-name x) depthoid))
193 (mixf hash (sxhash-recurse (pathname-type x) depthoid))
194 ;; Hash :NEWEST the same as NIL because EQUAL for
195 ;; pathnames assumes that :newest and nil are equal.
196 (let ((version (%pathname-version x)))
197 (mixf hash (sxhash-recurse (if (eq version :newest)
201 (if (or (typep x 'structure-object) (typep x 'condition))
203 (sxhash ; through DEFTRANSFORM
205 (layout-classoid (%instance-layout x)))))
206 (sxhash-instance x))))
207 (symbol (sxhash x)) ; through DEFTRANSFORM
210 (simple-string (sxhash x)) ; through DEFTRANSFORM
211 (string (%sxhash-substring x))
212 (simple-bit-vector (sxhash x)) ; through DEFTRANSFORM
214 ;; FIXME: It must surely be possible to do better
215 ;; than this. The problem is that a non-SIMPLE
216 ;; BIT-VECTOR could be displaced to another, with a
217 ;; non-zero offset -- so that significantly more
218 ;; work needs to be done using the %VECTOR-RAW-BITS
219 ;; approach. This will probably do for now.
220 (sxhash-recurse (copy-seq x) depthoid))
221 (t (logxor 191020317 (sxhash (array-rank x))))))
224 (sxhash (char-code x)))) ; through DEFTRANSFORM
225 ;; general, inefficient case of NUMBER
226 (number (sxhash-number x))
227 (generic-function (sxhash-instance x))
229 (sxhash-recurse x +max-hash-depthoid+)))
231 ;;;; the PSXHASH function
233 ;;;; FIXME: This code does a lot of unnecessary full calls. It could be made
234 ;;;; more efficient (in both time and space) by rewriting it along the lines
235 ;;;; of the SXHASH code above.
237 ;;; like SXHASH, but for EQUALP hashing instead of EQUAL hashing
238 (defun psxhash (key &optional (depthoid +max-hash-depthoid+))
239 (declare (optimize speed))
240 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
241 ;; Note: You might think it would be cleaner to use the ordering given in the
242 ;; table from Figure 5-13 in the EQUALP section of the ANSI specification
243 ;; here. So did I, but that is a snare for the unwary! Nothing in the ANSI
244 ;; spec says that HASH-TABLE can't be a STRUCTURE-OBJECT, and in fact our
245 ;; HASH-TABLEs *are* STRUCTURE-OBJECTs, so we need to pick off the special
246 ;; HASH-TABLE behavior before we fall through to the generic STRUCTURE-OBJECT
247 ;; comparison behavior.
249 (array (array-psxhash key depthoid))
250 (hash-table (hash-table-psxhash key))
251 (structure-object (structure-object-psxhash key depthoid))
252 (cons (list-psxhash key depthoid))
253 (number (number-psxhash key))
254 (character (char-code (char-upcase key)))
257 (defun array-psxhash (key depthoid)
258 (declare (optimize speed))
259 (declare (type array key))
260 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
262 ;; VECTORs have to be treated specially because ANSI specifies
263 ;; that we must respect fill pointers.
266 '(let ((result 572539))
267 (declare (type fixnum result))
268 (mixf result (length key))
269 (when (plusp depthoid)
271 (dotimes (i (length key))
272 (declare (type fixnum i))
274 (psxhash (aref key i) depthoid))))
276 (make-dispatch (types)
278 ,@(loop for type in types
281 (make-dispatch (simple-base-string
282 (simple-array character (*))
284 (simple-array (unsigned-byte 8) (*))
285 (simple-array fixnum (*))
287 ;; Any other array can be hashed by working with its underlying
288 ;; one-dimensional physical representation.
290 (let ((result 60828))
291 (declare (type fixnum result))
292 (dotimes (i (array-rank key))
293 (mixf result (array-dimension key i)))
294 (when (plusp depthoid)
296 (dotimes (i (array-total-size key))
298 (psxhash (row-major-aref key i) depthoid))))
301 (defun structure-object-psxhash (key depthoid)
302 (declare (optimize speed))
303 (declare (type structure-object key))
304 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
305 (let* ((layout (%instance-layout key)) ; i.e. slot #0
306 (length (layout-length layout))
307 (classoid (layout-classoid layout))
308 (name (classoid-name classoid))
309 (result (mix (sxhash name) (the fixnum 79867))))
310 (declare (type fixnum result))
311 (dotimes (i (min depthoid (- length 1 (layout-n-untagged-slots layout))))
312 (declare (type fixnum i))
313 (let ((j (1+ i))) ; skipping slot #0, which is for LAYOUT
314 (declare (type fixnum j))
316 (psxhash (%instance-ref key j)
318 ;; KLUDGE: Should hash untagged slots, too. (Although +max-hash-depthoid+
319 ;; is pretty low currently, so they might not make it into the hash
323 (defun list-psxhash (key depthoid)
324 (declare (optimize speed))
325 (declare (type list key))
326 (declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
332 (mix (psxhash (car key) (1- depthoid))
333 (psxhash (cdr key) (1- depthoid))))))
335 (defun hash-table-psxhash (key)
336 (declare (optimize speed))
337 (declare (type hash-table key))
338 (let ((result 103924836))
339 (declare (type fixnum result))
340 (mixf result (hash-table-count key))
341 (mixf result (sxhash (hash-table-test key)))
344 (defun number-psxhash (key)
345 (declare (optimize speed))
346 (declare (type number key))
347 (flet ((sxhash-double-float (val)
348 (declare (type double-float val))
349 ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the
350 ;; resulting code works without consing. (In Debian cmucl 2.4.17,
354 (integer (sxhash key))
355 (float (macrolet ((frob (type)
356 (let ((lo (coerce sb!xc:most-negative-fixnum type))
357 (hi (coerce sb!xc:most-positive-fixnum type)))
358 `(cond (;; This clause allows FIXNUM-sized integer
359 ;; values to be handled without consing.
361 (multiple-value-bind (q r)
362 (floor (the (,type ,lo ,hi) key))
363 (if (zerop (the ,type r))
366 (coerce key 'double-float)))))
368 (multiple-value-bind (q r) (floor key)
369 (if (zerop (the ,type r))
372 (coerce key 'double-float)))))))))
374 (single-float (frob single-float))
375 (double-float (frob double-float))
377 (long-float (error "LONG-FLOAT not currently supported")))))
378 (rational (if (and (<= most-negative-double-float
380 most-positive-double-float)
381 (= (coerce key 'double-float) key))
382 (sxhash-double-float (coerce key 'double-float))
384 (complex (if (zerop (imagpart key))
385 (number-psxhash (realpart key))
386 (let ((result 330231))
387 (declare (type fixnum result))
388 (mixf result (number-psxhash (realpart key)))
389 (mixf result (number-psxhash (imagpart key)))