From f17e3d27d7ff599f9443d011d17017a2a858c81a Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Sun, 14 Aug 2011 16:46:01 -0400 Subject: [PATCH] More efficient integer division by multiplication * Exploit restricted range for inputs (e.g. fixnums). * When the divisor is even, simplify with a mask instead of a shift. * Clean up the code a bit: we don't want modular arithmetic, it's actually all guaranteed not to wrap around. * Change the test so that larger divisors are a bit more likely to get tested too. * Lots more things can be done, including generalizing the transform to pretty much arbitrary rational divisor, or avoiding the costly general code sequence in nearly all cases. Unfortunately, it's a lot of (somewhat original, at that) code, and can be fairly slow; if it matters to someone, I can try and find a compromise (contrib?). --- src/compiler/srctran.lisp | 48 ++++++++++++++++++++++++++++----------------- tests/arith.pure.lisp | 3 ++- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index 03bb32a..44806ec 100644 --- a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ -3283,14 +3283,22 @@ ;;; Integers using Multiplication", 1994 by Torbj\"{o}rn Granlund and ;;; Peter L. Montgomery, Figures 4.2 and 6.2, modified to exclude the ;;; case of division by powers of two. +;;; The algorithm includes an adaptive precision argument. Use it, since +;;; we often have sub-word value ranges. Careful, in this case, we need +;;; p s.t 2^p > n, not the ceiling of the binary log. +;;; Also, for some reason, the paper prefers shifting to masking. Mask +;;; instead. Masking is equivalent to shifting right, then left again; +;;; all the intermediate values are still words, so we just have to shift +;;; right a bit more to compensate, at the end. +;;; ;;; The following two examples show an average case and the worst case ;;; with respect to the complexity of the generated expression, under ;;; a word size of 64 bits: ;;; -;;; (UNSIGNED-DIV-TRANSFORMER 10) -> -;;; (ASH (%MULTIPLY (ASH X 0) 14757395258967641293) -3) +;;; (UNSIGNED-DIV-TRANSFORMER 10 MOST-POSITIVE-WORD) -> +;;; (ASH (%MULTIPLY (LOGANDC2 X 0) 14757395258967641293) -3) ;;; -;;; (UNSIGNED-DIV-TRANSFORMER 7) -> +;;; (UNSIGNED-DIV-TRANSFORMER 7 MOST-POSITIVE-WORD) -> ;;; (LET* ((NUM X) ;;; (T1 (%MULTIPLY NUM 2635249153387078803))) ;;; (ASH (LDB (BYTE 64 0) @@ -3299,8 +3307,9 @@ ;;; -1))) ;;; -2)) ;;; -(defun gen-unsigned-div-by-constant-expr (y) - (declare (type (integer 3 #.most-positive-word) y)) +(defun gen-unsigned-div-by-constant-expr (y max-x) + (declare (type (integer 3 #.most-positive-word) y) + (type word max-x)) (aver (not (zerop (logand y (1- y))))) (labels ((ld (x) ;; the floor of the binary logarithm of (positive) X @@ -3318,24 +3327,25 @@ (> shift 0))) (values m-high shift))))) (let ((n (expt 2 sb!vm:n-word-bits)) + (precision (integer-length max-x)) (shift1 0)) (multiple-value-bind (m shift2) - (choose-multiplier y sb!vm:n-word-bits) + (choose-multiplier y precision) (when (and (>= m n) (evenp y)) (setq shift1 (ld (logand y (- y)))) (multiple-value-setq (m shift2) (choose-multiplier (/ y (ash 1 shift1)) - (- sb!vm:n-word-bits shift1)))) + (- precision shift1)))) (if (>= m n) - (flet ((word-mod (x) - `(ldb (byte #.sb!vm:n-word-bits 0) ,x))) + (flet ((word (x) + `(truly-the word ,x))) `(let* ((num x) (t1 (%multiply num ,(- m n)))) - (ash ,(word-mod `(+ t1 (ash ,(word-mod `(- num t1)) - -1))) + (ash ,(word `(+ t1 (ash ,(word `(- num t1)) + -1))) ,(- 1 shift2)))) - `(ash (%multiply (ash x ,(- shift1)) ,m) - ,(- shift2))))))) + `(ash (%multiply (logandc2 x ,(1- (ash 1 shift1))) ,m) + ,(- (+ shift1 shift2)))))))) ;;; If the divisor is constant and both args are positive and fit in a ;;; machine word, replace the division by a multiplication and possibly @@ -3346,18 +3356,20 @@ ;;; the same value, emit much simpler code to handle that. (This case ;;; may be rare but it's easy to detect and the compiler doesn't find ;;; this optimization on its own.) -(deftransform truncate ((x y) ((unsigned-byte #.sb!vm:n-word-bits) - (constant-arg - (unsigned-byte #.sb!vm:n-word-bits))) +(deftransform truncate ((x y) (word (constant-arg word)) * :policy (and (> speed compilation-speed) (> speed space))) "convert integer division to multiplication" - (let ((y (lvar-value y))) + (let* ((y (lvar-value y)) + (x-type (lvar-type x)) + (max-x (or (and (numeric-type-p x-type) + (numeric-type-high x-type)) + most-positive-word))) ;; Division by zero, one or powers of two is handled elsewhere. (when (zerop (logand y (1- y))) (give-up-ir1-transform)) - `(let* ((quot ,(gen-unsigned-div-by-constant-expr y)) + `(let* ((quot ,(gen-unsigned-div-by-constant-expr y max-x)) (rem (ldb (byte #.sb!vm:n-word-bits 0) (- x (* quot ,y))))) (values quot rem)))) diff --git a/tests/arith.pure.lisp b/tests/arith.pure.lisp index b1ebdb2..d58da25 100644 --- a/tests/arith.pure.lisp +++ b/tests/arith.pure.lisp @@ -456,7 +456,8 @@ ,@(loop repeat 4 collect (+ 10000 (random 101))) ,@(loop for i from 4 to sb-vm:n-word-bits - for r = (random (expt 2 i)) + for pow = (expt 2 (1- i)) + for r = (+ pow (random pow)) collect r))) (when (typep dividend dividend-type) (multiple-value-bind (q1 r1) -- 1.7.10.4