1 ;;;; This file implements a sparse set abstraction, represented as a
2 ;;;; custom lightweight hash-table. We don't use bit-vectors to
3 ;;;; represent sets in flow analysis, since the universe may be quite
4 ;;;; large but the average number of elements is small. We also don't
5 ;;;; use sorted lists like in the original CMUCL code, since it had
6 ;;;; bad worst-case performance (on some real-life programs the
7 ;;;; hash-based sset gives a 20% compilation speedup). A custom
8 ;;;; hash-table is used since the standard one is too heavy (locking,
9 ;;;; memory use) for this use.
11 ;;;; This software is part of the SBCL system. See the README file for
12 ;;;; more information.
14 ;;;; This software is derived from the CMU CL system, which was
15 ;;;; written at Carnegie Mellon University and released into the
16 ;;;; public domain. The software is in the public domain and is
17 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
18 ;;;; files for more information. (This file no)
22 ;;; Each structure that may be placed in a SSET must include the
23 ;;; SSET-ELEMENT structure. We allow an initial value of NIL to mean
24 ;;; that no ordering has been assigned yet (although an ordering must
25 ;;; be assigned before doing set operations.)
26 (def!struct (sset-element (:constructor nil)
28 (number nil :type (or index null)))
30 (defstruct (sset (:copier nil))
31 ;; Vector containing the set values. 0 is used for empty (since
32 ;; initializing a vector with 0 is cheaper than with NIL), +DELETED+
33 ;; is used to mark buckets that used to contain an element, but no
35 (vector #() :type simple-vector)
36 ;; How many buckets currently contain or used to contain an element.
38 ;; How many elements are currently members of the set.
39 (count 0 :type index))
40 (defprinter (sset) vector)
42 ;;; Iterate over the elements in SSET, binding VAR to each element in
44 (defmacro do-sset-elements ((var sset &optional result) &body body)
45 `(loop for ,var across (sset-vector ,sset)
46 do (unless (member ,var '(0 +deleted+))
48 finally (return ,result)))
51 (declaim (inline sset-hash1))
52 (defun sset-hash1 (element)
54 (let ((result (sset-element-number element)))
55 ;; This is performance critical, and it's not certain that the host
56 ;; compiler does modular arithmetic optimization. Instad use
57 ;; something that most CL implementations will do efficiently.
58 (the fixnum (logxor (the fixnum result)
59 (the fixnum (ash result -9))
60 (the fixnum (ash result -5)))))
62 (let ((result (sset-element-number element)))
63 (declare (type sb!vm:word result))
64 ;; We only use the low-order bits.
65 (macrolet ((set-result (form)
66 `(setf result (ldb (byte #.sb!vm:n-word-bits 0) ,form))))
67 (set-result (+ result (ash result -19)))
68 (set-result (logxor result (ash result -13)))
69 (set-result (+ result (ash result -9)))
70 (set-result (logxor result (ash result -5)))
71 (set-result (+ result (ash result -2)))
72 (logand sb!xc:most-positive-fixnum result))))
74 ;;; Secondary hash (for double hash probing). Needs to return an odd
76 (declaim (inline sset-hash2))
77 (defun sset-hash2 (element)
78 (let ((number (sset-element-number element)))
79 (declare (fixnum number))
82 ;;; Double the size of the hash vector of SET.
83 (defun sset-grow (set)
84 (let* ((vector (sset-vector set))
85 (new-vector (make-array (if (zerop (length vector))
87 (* (length vector) 2))
89 (setf (sset-vector set) new-vector
90 (sset-free set) (length new-vector)
92 (loop for element across vector
93 do (unless (member element '(0 +deleted+))
94 (sset-adjoin element set)))))
96 ;;; Rehash the sset when the proportion of free cells in the set is
98 (defconstant +sset-rehash-threshold+ 1/4)
100 ;;; Destructively add ELEMENT to SET. If ELEMENT was not in the set,
101 ;;; then we return true, otherwise we return false.
102 (declaim (ftype (sfunction (sset-element sset) boolean) sset-adjoin))
103 (defun sset-adjoin (element set)
104 (declare (optimize (speed 2)))
105 (when (<= (sset-free set)
106 (max 1 (truncate (length (sset-vector set))
107 #.(round (/ +sset-rehash-threshold+)))))
109 (loop with vector = (sset-vector set)
110 with mask of-type fixnum = (1- (length vector))
111 with secondary-hash = (sset-hash2 element)
112 for hash of-type index = (logand mask (sset-hash1 element)) then
113 (logand mask (+ hash secondary-hash))
114 for current = (aref vector hash)
115 for deleted-index = nil
116 do (cond ((eql current 0)
117 (incf (sset-count set))
119 (setf (aref vector deleted-index) element))
121 (decf (sset-free set))
122 (setf (aref vector hash) element)))
124 ((and (eql current '+deleted+)
126 (setf deleted-index hash))
127 ((eq current element)
130 ;;; Destructively remove ELEMENT from SET. If element was in the set,
131 ;;; then return true, otherwise return false.
132 (declaim (ftype (sfunction (sset-element sset) boolean) sset-delete))
133 (defun sset-delete (element set)
134 (when (zerop (length (sset-vector set)))
135 (return-from sset-delete nil))
136 (loop with vector = (sset-vector set)
137 with mask fixnum = (1- (length vector))
138 with secondary-hash = (sset-hash2 element)
139 for hash of-type index = (logand mask (sset-hash1 element)) then
140 (logand mask (+ hash secondary-hash))
141 for current = (aref vector hash)
142 do (cond ((eql current 0)
144 ((eq current element)
145 (decf (sset-count set))
146 (setf (aref vector hash) '+deleted+)
149 ;;; Return true if ELEMENT is in SET, false otherwise.
150 (declaim (ftype (sfunction (sset-element sset) boolean) sset-member))
151 (defun sset-member (element set)
152 (when (zerop (length (sset-vector set)))
153 (return-from sset-member nil))
154 (loop with vector = (sset-vector set)
155 with mask fixnum = (1- (length vector))
156 with secondary-hash = (sset-hash2 element)
157 for hash of-type index = (logand mask (sset-hash1 element)) then
158 (logand mask (+ hash secondary-hash))
159 for current = (aref vector hash)
160 do (cond ((eql current 0)
162 ((eq current element)
165 (declaim (ftype (sfunction (sset sset) boolean) sset=))
166 (defun sset= (set1 set2)
167 (unless (eql (sset-count set1)
169 (return-from sset= nil))
170 (do-sset-elements (element set1)
171 (unless (sset-member element set2)
172 (return-from sset= nil)))
175 ;;; Return true if SET contains no elements, false otherwise.
176 (declaim (ftype (sfunction (sset) boolean) sset-empty))
177 (defun sset-empty (set)
178 (zerop (sset-count set)))
180 ;;; Return a new copy of SET.
181 (declaim (ftype (sfunction (sset) sset) copy-sset))
182 (defun copy-sset (set)
183 (make-sset :vector (let* ((vector (sset-vector set))
184 (new-vector (make-array (length vector))))
185 (declare (type simple-vector vector new-vector)
186 (optimize speed (safety 0)))
187 ;; There's no REPLACE deftransform for simple-vectors.
188 (dotimes (i (length vector))
189 (setf (aref new-vector i)
192 :count (sset-count set)
193 :free (sset-free set)))
195 ;;; Perform the appropriate set operation on SET1 and SET2 by
196 ;;; destructively modifying SET1. We return true if SET1 was modified,
198 (declaim (ftype (sfunction (sset sset) boolean) sset-union sset-intersection
200 (defun sset-union (set1 set2)
201 (loop with modified = nil
202 for element across (sset-vector set2)
203 do (unless (member element '(0 +deleted+))
204 (when (sset-adjoin element set1)
206 finally (return modified)))
207 (defun sset-intersection (set1 set2)
208 (loop with modified = nil
209 for element across (sset-vector set1)
210 for index of-type index from 0
211 do (unless (member element '(0 +deleted+))
212 (unless (sset-member element set2)
213 (decf (sset-count set1))
214 (setf (aref (sset-vector set1) index) '+deleted+
216 finally (return modified)))
217 (defun sset-difference (set1 set2)
218 (loop with modified = nil
219 for element across (sset-vector set1)
220 for index of-type index from 0
221 do (unless (member element '(0 +deleted+))
222 (when (sset-member element set2)
223 (decf (sset-count set1))
224 (setf (aref (sset-vector set1) index) '+deleted+
226 finally (return modified)))
228 ;;; Destructively modify SET1 to include its union with the difference
229 ;;; of SET2 and SET3. We return true if SET1 was modified, false
231 (declaim (ftype (sfunction (sset sset sset) boolean) sset-union-of-difference))
232 (defun sset-union-of-difference (set1 set2 set3)
233 (loop with modified = nil
234 for element across (sset-vector set2)
235 do (unless (member element '(0 +deleted+))
236 (unless (sset-member element set3)
237 (when (sset-adjoin element set1)
239 finally (return modified)))