src/code/sort.lisp

   1 ;;;; SORT and friends
   2
   3 ;;;; This software is part of the SBCL system. See the README file for
   4 ;;;; more information.
   5 ;;;;
   6 ;;;; This software is derived from the CMU CL system, which was
   7 ;;;; written at Carnegie Mellon University and released into the
   8 ;;;; public domain. The software is in the public domain and is
   9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
  10 ;;;; files for more information.
  11
  12 (in-package "SB!IMPL")
  13
  14 ;;; Like CMU CL, we use HEAPSORT. However, other than that, this code
  15 ;;; isn't really related to the CMU CL code, since instead of trying
  16 ;;; to generalize the CMU CL code to allow START and END values, this
  17 ;;; code has been written from scratch following Chapter 7 of
  18 ;;; _Introduction to Algorithms_ by Corman, Rivest, and Shamir.
  19 (macrolet ((%index (x) `(truly-the index ,x))
  20            (%parent (i) `(ash ,i -1))
  21            (%left (i) `(%index (ash ,i 1)))
  22            (%right (i) `(%index (1+ (ash ,i 1))))
  23            (%heapify (i)
  24              `(do* ((i ,i)
  25                     (left (%left i) (%left i)))
  26                   ((> left current-heap-size))
  27                 (declare (type index i left))
  28                 (let* ((i-elt (%elt i))
  29                        (i-key (funcall keyfun i-elt))
  30                        (left-elt (%elt left))
  31                        (left-key (funcall keyfun left-elt)))
  32                   (multiple-value-bind (large large-elt large-key)
  33                       (if (funcall predicate i-key left-key)
  34                           (values left left-elt left-key)
  35                           (values i i-elt i-key))
  36                     (let ((right (%right i)))
  37                       (multiple-value-bind (largest largest-elt)
  38                           (if (> right current-heap-size)
  39                               (values large large-elt)
  40                               (let* ((right-elt (%elt right))
  41                                      (right-key (funcall keyfun right-elt)))
  42                                 (if (funcall predicate large-key right-key)
  43                                     (values right right-elt)
  44                                     (values large large-elt))))
  45                         (cond ((= largest i)
  46                                (return))
  47                               (t
  48                                (setf (%elt i) largest-elt
  49                                      (%elt largest) i-elt
  50                                      i largest)))))))))
  51            (%sort-vector (keyfun &optional (vtype 'vector))
  52              `(macrolet (;; KLUDGE: In SBCL ca. 0.6.10, I had trouble getting
  53                          ;; type inference to propagate all the way
  54                          ;; through this tangled mess of inlining. The
  55                          ;; TRULY-THE here works around that. -- WHN
  56                          (%elt (i)
  57                            `(aref (truly-the ,',vtype vector)
  58                                   (%index (+ (%index ,i) start-1)))))
  59                 (let ((start-1 (1- start)) ; Heaps prefer 1-based addressing.
  60                       (current-heap-size (- end start))
  61                       (keyfun ,keyfun))
  62                   (declare (type (integer -1 #.(1- most-positive-fixnum))
  63                                  start-1))
  64                   (declare (type index current-heap-size))
  65                   (declare (type function keyfun))
  66                   (loop for i of-type index
  67                         from (ash current-heap-size -1) downto 1 do
  68                         (%heapify i))
  69                   (loop
  70                    (when (< current-heap-size 2)
  71                      (return))
  72                    (rotatef (%elt 1) (%elt current-heap-size))
  73                    (decf current-heap-size)
  74                    (%heapify 1))))))
  75
  76   (declaim (inline sort-vector))
  77   (defun sort-vector (vector start end predicate key)
  78     (declare (type vector vector))
  79     (declare (type index start end))
  80     (declare (type function predicate))
  81     (declare (type (or function null) key))
  82     (declare (optimize (speed 3) (safety 3) (debug 1) (space 1)))
  83     (if (typep vector 'simple-vector)
  84         ;; (VECTOR T) is worth optimizing for, and SIMPLE-VECTOR is
  85         ;; what we get from (VECTOR T) inside WITH-ARRAY-DATA.
  86         (if (null key)
  87             ;; Special-casing the KEY=NIL case lets us avoid some
  88             ;; function calls.
  89             (%sort-vector #'identity simple-vector)
  90             (%sort-vector key simple-vector))
  91         ;; It's hard to anticipate many speed-critical applications for
  92         ;; sorting vector types other than (VECTOR T), so we just lump
  93         ;; them all together in one slow dynamically typed mess.
  94         (locally
  95           (declare (optimize (speed 2) (space 2) (inhibit-warnings 3)))
  96           (%sort-vector (or key #'identity))))))
  97
  98 ;;; This is MAYBE-INLINE because it's not too hard to have an
  99 ;;; application where sorting is a major bottleneck, and inlining it
 100 ;;; allows the compiler to make enough optimizations that it might be
 101 ;;; worth the (large) cost in space.
 102 (declaim (maybe-inline sort))
 103 (defun sort (sequence predicate &key key)
 104   #!+sb-doc
 105   "Destructively sort SEQUENCE. PREDICATE should return non-NIL if
 106    ARG1 is to precede ARG2."
 107   (let ((predicate-function (%coerce-callable-to-fun predicate))
 108         (key-function (and key (%coerce-callable-to-fun key))))
 109     (typecase sequence
 110       (list (sort-list sequence predicate-function key-function))
 111       (vector
 112        (with-array-data ((vector (the vector sequence))
 113                          (start 0)
 114                          (end (length sequence)))
 115          (sort-vector vector start end predicate-function key-function))
 116        sequence)
 117       (t
 118        (error 'simple-type-error
 119               :datum sequence
 120               :expected-type 'sequence
 121               :format-control "~S is not a sequence."
 122               :format-arguments (list sequence))))))
 123 \f
 124 ;;;; stable sorting
 125
 126 (defun stable-sort (sequence predicate &key key)
 127   #!+sb-doc
 128   "Destructively sorts sequence. Predicate should return non-Nil if
 129    Arg1 is to precede Arg2."
 130   (typecase sequence
 131     (simple-vector
 132      (stable-sort-simple-vector sequence predicate key))
 133     (list
 134      (sort-list sequence predicate key))
 135     (vector
 136      (stable-sort-vector sequence predicate key))
 137     (t
 138      (error 'simple-type-error
 139             :datum sequence
 140             :expected-type 'sequence
 141             :format-control "~S is not a sequence."
 142             :format-arguments (list sequence)))))
 143
 144 ;;; stable sort of lists
 145
 146 ;;; SORT-LIST uses a bottom up merge sort. First a pass is made over
 147 ;;; the list grabbing one element at a time and merging it with the
 148 ;;; next one form pairs of sorted elements. Then n is doubled, and
 149 ;;; elements are taken in runs of two, merging one run with the next
 150 ;;; to form quadruples of sorted elements. This continues until n is
 151 ;;; large enough that the inner loop only runs for one iteration; that
 152 ;;; is, there are only two runs that can be merged, the first run
 153 ;;; starting at the beginning of the list, and the second being the
 154 ;;; remaining elements.
 155
 156 (defun sort-list (list pred key)
 157   (let ((head (cons :header list))  ; head holds on to everything
 158         (n 1)                       ; bottom-up size of lists to be merged
 159         unsorted                    ; unsorted is the remaining list to be
 160                                     ;   broken into n size lists and merged
 161         list-1                      ; list-1 is one length n list to be merged
 162         last)                       ; last points to the last visited cell
 163     (declare (fixnum n))
 164     (loop
 165      ;; start collecting runs of n at the first element
 166      (setf unsorted (cdr head))
 167      ;; tack on the first merge of two n-runs to the head holder
 168      (setf last head)
 169      (let ((n-1 (1- n)))
 170        (declare (fixnum n-1))
 171        (loop
 172         (setf list-1 unsorted)
 173         (let ((temp (nthcdr n-1 list-1))
 174               list-2)
 175           (cond (temp
 176                  ;; there are enough elements for a second run
 177                  (setf list-2 (cdr temp))
 178                  (setf (cdr temp) nil)
 179                  (setf temp (nthcdr n-1 list-2))
 180                  (cond (temp
 181                         (setf unsorted (cdr temp))
 182                         (setf (cdr temp) nil))
 183                        ;; the second run goes off the end of the list
 184                        (t (setf unsorted nil)))
 185                  (multiple-value-bind (merged-head merged-last)
 186                      (merge-lists* list-1 list-2 pred key)
 187                    (setf (cdr last) merged-head)
 188                    (setf last merged-last))
 189                  (if (null unsorted) (return)))
 190                 ;; if there is only one run, then tack it on to the end
 191                 (t (setf (cdr last) list-1)
 192                    (return)))))
 193        (setf n (ash n 1)) ; (+ n n)
 194        ;; If the inner loop only executed once, then there were only
 195        ;; enough elements for two runs given n, so all the elements
 196        ;; have been merged into one list. This may waste one outer
 197        ;; iteration to realize.
 198        (if (eq list-1 (cdr head))
 199            (return list-1))))))
 200
 201 ;;; APPLY-PRED saves us a function call sometimes.
 202 (eval-when (:compile-toplevel :execute)
 203   (sb!xc:defmacro apply-pred (one two pred key)
 204     `(if ,key
 205          (funcall ,pred (funcall ,key ,one)
 206                   (funcall ,key  ,two))
 207          (funcall ,pred ,one ,two)))
 208 ) ; EVAL-WHEN
 209
 210 (defvar *merge-lists-header* (list :header))
 211
 212 ;;; MERGE-LISTS*   originally written by Jim Large.
 213 ;;;                modified to return a pointer to the end of the result
 214 ;;;                   and to not cons header each time its called.
 215 ;;; It destructively merges list-1 with list-2. In the resulting
 216 ;;; list, elements of list-2 are guaranteed to come after equal elements
 217 ;;; of list-1.
 218 (defun merge-lists* (list-1 list-2 pred key)
 219   (do* ((result *merge-lists-header*)
 220         (P result))                  ; points to last cell of result
 221        ((or (null list-1) (null list-2)) ; done when either list used up
 222         (if (null list-1)              ; in which case, append the
 223             (rplacd p list-2)      ;   other list
 224             (rplacd p list-1))
 225         (do ((drag p lead)
 226              (lead (cdr p) (cdr lead)))
 227             ((null lead)
 228              (values (prog1 (cdr result) ; Return the result sans header
 229                             (rplacd result nil)) ; (free memory, be careful)
 230                      drag))))      ;   and return pointer to last element.
 231     (cond ((apply-pred (car list-2) (car list-1) pred key)
 232            (rplacd p list-2)       ; Append the lesser list to last cell of
 233            (setq p (cdr p))         ;   result. Note: test must bo done for
 234            (pop list-2))               ;   LIST-2 < LIST-1 so merge will be
 235           (T (rplacd p list-1)   ;   stable for LIST-1.
 236              (setq p (cdr p))
 237              (pop list-1)))))
 238
 239 ;;; stable sort of vectors
 240
 241 ;;; Stable sorting vectors is done with the same algorithm used for
 242 ;;; lists, using a temporary vector to merge back and forth between it
 243 ;;; and the given vector to sort.
 244
 245 (eval-when (:compile-toplevel :execute)
 246
 247 ;;; STABLE-SORT-MERGE-VECTORS* takes a source vector with subsequences,
 248 ;;;    start-1 (inclusive) ... end-1 (exclusive) and
 249 ;;;    end-1 (inclusive) ... end-2 (exclusive),
 250 ;;; and merges them into a target vector starting at index start-1.
 251
 252 (sb!xc:defmacro stable-sort-merge-vectors* (source target start-1 end-1 end-2
 253                                                      pred key source-ref
 254                                                      target-ref)
 255   (let ((i (gensym))
 256         (j (gensym))
 257         (target-i (gensym)))
 258     `(let ((,i ,start-1)
 259            (,j ,end-1) ; start-2
 260            (,target-i ,start-1))
 261        (declare (fixnum ,i ,j ,target-i))
 262        (loop
 263         (cond ((= ,i ,end-1)
 264                (loop (if (= ,j ,end-2) (return))
 265                      (setf (,target-ref ,target ,target-i)
 266                            (,source-ref ,source ,j))
 267                      (incf ,target-i)
 268                      (incf ,j))
 269                (return))
 270               ((= ,j ,end-2)
 271                (loop (if (= ,i ,end-1) (return))
 272                      (setf (,target-ref ,target ,target-i)
 273                            (,source-ref ,source ,i))
 274                      (incf ,target-i)
 275                      (incf ,i))
 276                (return))
 277               ((apply-pred (,source-ref ,source ,j)
 278                            (,source-ref ,source ,i)
 279                            ,pred ,key)
 280                (setf (,target-ref ,target ,target-i)
 281                      (,source-ref ,source ,j))
 282                (incf ,j))
 283               (t (setf (,target-ref ,target ,target-i)
 284                        (,source-ref ,source ,i))
 285                  (incf ,i)))
 286         (incf ,target-i)))))
 287
 288 ;;; VECTOR-MERGE-SORT is the same algorithm used to stable sort lists,
 289 ;;; but it uses a temporary vector. DIRECTION determines whether we
 290 ;;; are merging into the temporary (T) or back into the given vector
 291 ;;; (NIL).
 292 (sb!xc:defmacro vector-merge-sort (vector pred key vector-ref)
 293   (let ((vector-len (gensym)) (n (gensym))
 294         (direction (gensym))  (unsorted (gensym))
 295         (start-1 (gensym))    (end-1 (gensym))
 296         (end-2 (gensym))      (temp-len (gensym))
 297         (i (gensym)))
 298     `(let ((,vector-len (length (the vector ,vector)))
 299            (,n 1)        ; bottom-up size of contiguous runs to be merged
 300            (,direction t) ; t vector --> temp    nil temp --> vector
 301            (,temp-len (length (the simple-vector *merge-sort-temp-vector*)))
 302            (,unsorted 0)  ; unsorted..vector-len are the elements that need
 303                           ; to be merged for a given n
 304            (,start-1 0))  ; one n-len subsequence to be merged with the next
 305        (declare (fixnum ,vector-len ,n ,temp-len ,unsorted ,start-1))
 306        (if (> ,vector-len ,temp-len)
 307            (setf *merge-sort-temp-vector*
 308                  (make-array (max ,vector-len (+ ,temp-len ,temp-len)))))
 309        (loop
 310         ;; for each n, we start taking n-runs from the start of the vector
 311         (setf ,unsorted 0)
 312         (loop
 313          (setf ,start-1 ,unsorted)
 314          (let ((,end-1 (+ ,start-1 ,n)))
 315            (declare (fixnum ,end-1))
 316            (cond ((< ,end-1 ,vector-len)
 317                   ;; there are enough elements for a second run
 318                   (let ((,end-2 (+ ,end-1 ,n)))
 319                     (declare (fixnum ,end-2))
 320                     (if (> ,end-2 ,vector-len) (setf ,end-2 ,vector-len))
 321                     (setf ,unsorted ,end-2)
 322                     (if ,direction
 323                         (stable-sort-merge-vectors*
 324                          ,vector *merge-sort-temp-vector*
 325                          ,start-1 ,end-1 ,end-2 ,pred ,key ,vector-ref svref)
 326                         (stable-sort-merge-vectors*
 327                          *merge-sort-temp-vector* ,vector
 328                          ,start-1 ,end-1 ,end-2 ,pred ,key svref ,vector-ref))
 329                     (if (= ,unsorted ,vector-len) (return))))
 330                  ;; if there is only one run, copy those elements to the end
 331                  (t (if ,direction
 332                         (do ((,i ,start-1 (1+ ,i)))
 333                             ((= ,i ,vector-len))
 334                           (declare (fixnum ,i))
 335                           (setf (svref *merge-sort-temp-vector* ,i)
 336                                 (,vector-ref ,vector ,i)))
 337                         (do ((,i ,start-1 (1+ ,i)))
 338                             ((= ,i ,vector-len))
 339                           (declare (fixnum ,i))
 340                           (setf (,vector-ref ,vector ,i)
 341                                 (svref *merge-sort-temp-vector* ,i))))
 342                     (return)))))
 343         ;; If the inner loop only executed once, then there were only enough
 344         ;; elements for two subsequences given n, so all the elements have
 345         ;; been merged into one list. Start-1 will have remained 0 upon exit.
 346         (when (zerop ,start-1)
 347           (if ,direction
 348               ;; if we just merged into the temporary, copy it all back
 349               ;; to the given vector.
 350               (dotimes (,i ,vector-len)
 351                 (setf (,vector-ref ,vector ,i)
 352                       (svref *merge-sort-temp-vector* ,i))))
 353           (return ,vector))
 354         (setf ,n (ash ,n 1)) ; (* 2 n)
 355         (setf ,direction (not ,direction))))))
 356
 357 ) ; EVAL-when
 358
 359 ;;; temporary vector for stable sorting vectors
 360 (defvar *merge-sort-temp-vector*
 361   (make-array 50))
 362
 363 (declaim (simple-vector *merge-sort-temp-vector*))
 364
 365 (defun stable-sort-simple-vector (vector pred key)
 366   (declare (simple-vector vector))
 367   (vector-merge-sort vector pred key svref))
 368
 369 (defun stable-sort-vector (vector pred key)
 370   (vector-merge-sort vector pred key aref))
 371
 372 ;;;; merging
 373
 374 (eval-when (:compile-toplevel :execute)
 375
 376 ;;; MERGE-VECTORS returns a new vector which contains an interleaving
 377 ;;; of the elements of VECTOR-1 and VECTOR-2. Elements from VECTOR-2
 378 ;;; are chosen only if they are strictly less than elements of
 379 ;;; VECTOR-1, (PRED ELT-2 ELT-1), as specified in the manual.
 380 (sb!xc:defmacro merge-vectors (vector-1 length-1 vector-2 length-2
 381                                result-vector pred key access)
 382   (let ((result-i (gensym))
 383         (i (gensym))
 384         (j (gensym)))
 385     `(let* ((,result-i 0)
 386             (,i 0)
 387             (,j 0))
 388        (declare (fixnum ,result-i ,i ,j))
 389        (loop
 390         (cond ((= ,i ,length-1)
 391                (loop (if (= ,j ,length-2) (return))
 392                      (setf (,access ,result-vector ,result-i)
 393                            (,access ,vector-2 ,j))
 394                      (incf ,result-i)
 395                      (incf ,j))
 396                (return ,result-vector))
 397               ((= ,j ,length-2)
 398                (loop (if (= ,i ,length-1) (return))
 399                      (setf (,access ,result-vector ,result-i)
 400                            (,access ,vector-1 ,i))
 401                      (incf ,result-i)
 402                      (incf ,i))
 403                (return ,result-vector))
 404               ((apply-pred (,access ,vector-2 ,j) (,access ,vector-1 ,i)
 405                            ,pred ,key)
 406                (setf (,access ,result-vector ,result-i)
 407                      (,access ,vector-2 ,j))
 408                (incf ,j))
 409               (t (setf (,access ,result-vector ,result-i)
 410                        (,access ,vector-1 ,i))
 411                  (incf ,i)))
 412         (incf ,result-i)))))
 413
 414 ) ; EVAL-WHEN
 415
 416 (defun merge (result-type sequence1 sequence2 predicate &key key)
 417   #!+sb-doc
 418   "Merge the sequences SEQUENCE1 and SEQUENCE2 destructively into a
 419    sequence of type RESULT-TYPE using PREDICATE to order the elements."
 420   (if (eq result-type 'list)
 421       (let ((result (merge-lists* (coerce sequence1 'list)
 422                                   (coerce sequence2 'list)
 423                                   predicate key)))
 424         result)
 425       (let* ((vector-1 (coerce sequence1 'vector))
 426              (vector-2 (coerce sequence2 'vector))
 427              (length-1 (length vector-1))
 428              (length-2 (length vector-2))
 429              (result (make-sequence-of-type result-type
 430                                             (+ length-1 length-2))))
 431         (declare (vector vector-1 vector-2)
 432                  (fixnum length-1 length-2))
 433
 434         #!+high-security (aver (typep result result-type))
 435         (if (and (simple-vector-p result)
 436                  (simple-vector-p vector-1)
 437                  (simple-vector-p vector-2))
 438             (merge-vectors vector-1 length-1 vector-2 length-2
 439                            result predicate key svref)
 440             (merge-vectors vector-1 length-1 vector-2 length-2
 441                            result predicate key aref)))))