;;; Integers using Multiplication", 1994 by Torbj\"{o}rn Granlund and
;;; Peter L. Montgomery, Figures 4.2 and 6.2, modified to exclude the
;;; case of division by powers of two.
+;;; The algorithm includes an adaptive precision argument. Use it, since
+;;; we often have sub-word value ranges. Careful, in this case, we need
+;;; p s.t 2^p > n, not the ceiling of the binary log.
+;;; Also, for some reason, the paper prefers shifting to masking. Mask
+;;; instead. Masking is equivalent to shifting right, then left again;
+;;; all the intermediate values are still words, so we just have to shift
+;;; right a bit more to compensate, at the end.
+;;;
;;; The following two examples show an average case and the worst case
;;; with respect to the complexity of the generated expression, under
;;; a word size of 64 bits:
;;;
-;;; (UNSIGNED-DIV-TRANSFORMER 10) ->
-;;; (ASH (%MULTIPLY (ASH X 0) 14757395258967641293) -3)
+;;; (UNSIGNED-DIV-TRANSFORMER 10 MOST-POSITIVE-WORD) ->
+;;; (ASH (%MULTIPLY (LOGANDC2 X 0) 14757395258967641293) -3)
;;;
-;;; (UNSIGNED-DIV-TRANSFORMER 7) ->
+;;; (UNSIGNED-DIV-TRANSFORMER 7 MOST-POSITIVE-WORD) ->
;;; (LET* ((NUM X)
;;; (T1 (%MULTIPLY NUM 2635249153387078803)))
;;; (ASH (LDB (BYTE 64 0)
;;; -1)))
;;; -2))
;;;
-(defun gen-unsigned-div-by-constant-expr (y)
- (declare (type (integer 3 #.most-positive-word) y))
+(defun gen-unsigned-div-by-constant-expr (y max-x)
+ (declare (type (integer 3 #.most-positive-word) y)
+ (type word max-x))
(aver (not (zerop (logand y (1- y)))))
(labels ((ld (x)
;; the floor of the binary logarithm of (positive) X
(> shift 0)))
(values m-high shift)))))
(let ((n (expt 2 sb!vm:n-word-bits))
+ (precision (integer-length max-x))
(shift1 0))
(multiple-value-bind (m shift2)
- (choose-multiplier y sb!vm:n-word-bits)
+ (choose-multiplier y precision)
(when (and (>= m n) (evenp y))
(setq shift1 (ld (logand y (- y))))
(multiple-value-setq (m shift2)
(choose-multiplier (/ y (ash 1 shift1))
- (- sb!vm:n-word-bits shift1))))
+ (- precision shift1))))
(if (>= m n)
- (flet ((word-mod (x)
- `(ldb (byte #.sb!vm:n-word-bits 0) ,x)))
+ (flet ((word (x)
+ `(truly-the word ,x)))
`(let* ((num x)
(t1 (%multiply num ,(- m n))))
- (ash ,(word-mod `(+ t1 (ash ,(word-mod `(- num t1))
- -1)))
+ (ash ,(word `(+ t1 (ash ,(word `(- num t1))
+ -1)))
,(- 1 shift2))))
- `(ash (%multiply (ash x ,(- shift1)) ,m)
- ,(- shift2)))))))
+ `(ash (%multiply (logandc2 x ,(1- (ash 1 shift1))) ,m)
+ ,(- (+ shift1 shift2))))))))
;;; If the divisor is constant and both args are positive and fit in a
;;; machine word, replace the division by a multiplication and possibly
;;; the same value, emit much simpler code to handle that. (This case
;;; may be rare but it's easy to detect and the compiler doesn't find
;;; this optimization on its own.)
-(deftransform truncate ((x y) ((unsigned-byte #.sb!vm:n-word-bits)
- (constant-arg
- (unsigned-byte #.sb!vm:n-word-bits)))
+(deftransform truncate ((x y) (word (constant-arg word))
*
:policy (and (> speed compilation-speed)
(> speed space)))
"convert integer division to multiplication"
- (let ((y (lvar-value y)))
+ (let* ((y (lvar-value y))
+ (x-type (lvar-type x))
+ (max-x (or (and (numeric-type-p x-type)
+ (numeric-type-high x-type))
+ most-positive-word)))
;; Division by zero, one or powers of two is handled elsewhere.
(when (zerop (logand y (1- y)))
(give-up-ir1-transform))
- `(let* ((quot ,(gen-unsigned-div-by-constant-expr y))
+ `(let* ((quot ,(gen-unsigned-div-by-constant-expr y max-x))
(rem (ldb (byte #.sb!vm:n-word-bits 0)
(- x (* quot ,y)))))
(values quot rem))))