More efficient integer division by multiplication

author Paul Khuong <pvk@pvk.ca>

Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)

committer Paul Khuong <pvk@pvk.ca>

Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)
author Paul Khuong <pvk@pvk.ca>
Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)
committer Paul Khuong <pvk@pvk.ca>
Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)
diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp

index 03bb32a..44806ec 100644 (file)
--- a/src/compiler/srctran.lisp
+++ b/src/compiler/srctran.lisp
@@ -3283,14 +3283,22 @@
  ;;; Integers using Multiplication", 1994 by Torbj\"{o}rn Granlund and
  ;;; Peter L. Montgomery, Figures 4.2 and 6.2, modified to exclude the
  ;;; case of division by powers of two.
+;;; The algorithm includes an adaptive precision argument.  Use it, since
+;;; we often have sub-word value ranges.  Careful, in this case, we need
+;;; p s.t 2^p > n, not the ceiling of the binary log.
+;;; Also, for some reason, the paper prefers shifting to masking.  Mask
+;;; instead.  Masking is equivalent to shifting right, then left again;
+;;; all the intermediate values are still words, so we just have to shift
+;;; right a bit more to compensate, at the end.
+;;;
  ;;; The following two examples show an average case and the worst case
  ;;; with respect to the complexity of the generated expression, under
  ;;; a word size of 64 bits:
  ;;;
-;;; (UNSIGNED-DIV-TRANSFORMER 10) ->
-;;; (ASH (%MULTIPLY (ASH X 0) 14757395258967641293) -3)
+;;; (UNSIGNED-DIV-TRANSFORMER 10 MOST-POSITIVE-WORD) ->
+;;; (ASH (%MULTIPLY (LOGANDC2 X 0) 14757395258967641293) -3)
  ;;;
-;;; (UNSIGNED-DIV-TRANSFORMER 7) ->
+;;; (UNSIGNED-DIV-TRANSFORMER 7 MOST-POSITIVE-WORD) ->
  ;;; (LET* ((NUM X)
  ;;;        (T1 (%MULTIPLY NUM 2635249153387078803)))
  ;;;   (ASH (LDB (BYTE 64 0)
@@ -3299,8 +3307,9 @@
  ;;;                        -1)))
  ;;;        -2))
  ;;;
-(defun gen-unsigned-div-by-constant-expr (y)
-  (declare (type (integer 3 #.most-positive-word) y))
+(defun gen-unsigned-div-by-constant-expr (y max-x)
+  (declare (type (integer 3 #.most-positive-word) y)
+           (type word max-x))
    (aver (not (zerop (logand y (1- y)))))
    (labels ((ld (x)
               ;; the floor of the binary logarithm of (positive) X
@@ -3318,24 +3327,25 @@
                               (> shift 0)))
                     (values m-high shift)))))
      (let ((n (expt 2 sb!vm:n-word-bits))
+          (precision (integer-length max-x))
            (shift1 0))
        (multiple-value-bind (m shift2)
-          (choose-multiplier y sb!vm:n-word-bits)
+          (choose-multiplier y precision)
          (when (and (>= m n) (evenp y))
            (setq shift1 (ld (logand y (- y))))
            (multiple-value-setq (m shift2)
              (choose-multiplier (/ y (ash 1 shift1))
-                               (- sb!vm:n-word-bits shift1))))
+                               (- precision shift1))))
          (if (>= m n)
-            (flet ((word-mod (x)
-                     `(ldb (byte #.sb!vm:n-word-bits 0) ,x)))
+            (flet ((word (x)
+                     `(truly-the word ,x)))
                `(let* ((num x)
                        (t1 (%multiply num ,(- m n))))
-                 (ash ,(word-mod `(+ t1 (ash ,(word-mod `(- num t1))
-                                             -1)))
+                 (ash ,(word `(+ t1 (ash ,(word `(- num t1))
+                                         -1)))
                        ,(- 1 shift2))))
-            `(ash (%multiply (ash x ,(- shift1)) ,m)
-                  ,(- shift2)))))))
+            `(ash (%multiply (logandc2 x ,(1- (ash 1 shift1))) ,m)
+                  ,(- (+ shift1 shift2))))))))
  
  ;;; If the divisor is constant and both args are positive and fit in a
  ;;; machine word, replace the division by a multiplication and possibly
@@ -3346,18 +3356,20 @@
  ;;; the same value, emit much simpler code to handle that. (This case
  ;;; may be rare but it's easy to detect and the compiler doesn't find
  ;;; this optimization on its own.)
-(deftransform truncate ((x y) ((unsigned-byte #.sb!vm:n-word-bits)
-                               (constant-arg
-                                (unsigned-byte #.sb!vm:n-word-bits)))
+(deftransform truncate ((x y) (word (constant-arg word))
                          *
                          :policy (and (> speed compilation-speed)
                                       (> speed space)))
    "convert integer division to multiplication"
-  (let ((y (lvar-value y)))
+  (let* ((y      (lvar-value y))
+         (x-type (lvar-type x))
+         (max-x  (or (and (numeric-type-p x-type)
+                          (numeric-type-high x-type))
+                     most-positive-word)))
      ;; Division by zero, one or powers of two is handled elsewhere.
      (when (zerop (logand y (1- y)))
        (give-up-ir1-transform))
-    `(let* ((quot ,(gen-unsigned-div-by-constant-expr y))
+    `(let* ((quot ,(gen-unsigned-div-by-constant-expr y max-x))
              (rem (ldb (byte #.sb!vm:n-word-bits 0)
                        (- x (* quot ,y)))))
         (values quot rem))))
diff --git a/tests/arith.pure.lisp b/tests/arith.pure.lisp

index b1ebdb2..d58da25 100644 (file)
--- a/tests/arith.pure.lisp
+++ b/tests/arith.pure.lisp
@@ -456,7 +456,8 @@
                                  ,@(loop repeat 4
                                          collect (+ 10000 (random 101)))
                                  ,@(loop for i from 4 to sb-vm:n-word-bits
-                                        for r = (random (expt 2 i))
+                                        for pow = (expt 2 (1- i))
+                                        for r = (+ pow (random pow))
                                          collect r)))
                (when (typep dividend dividend-type)
                  (multiple-value-bind (q1 r1)
author	Paul Khuong <pvk@pvk.ca>
	Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)
committer	Paul Khuong <pvk@pvk.ca>
	Sun, 14 Aug 2011 20:46:01 +0000 (16:46 -0400)
src/compiler/srctran.lisp		patch \| blob \| history
tests/arith.pure.lisp		patch \| blob \| history