X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fsrctran.lisp;h=44806ec023e21ebd86372c547d0b08c7a43b784c;hb=f17e3d27d7ff599f9443d011d17017a2a858c81a;hp=0426eef672f0be7030d7f4fa4ca0d1d2ad2bb9b3;hpb=6d67d71e21d95c26119b8c7cea1bc64811892767;p=sbcl.git diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index 0426eef..44806ec 100644 --- a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ -3283,14 +3283,22 @@ ;;; Integers using Multiplication", 1994 by Torbj\"{o}rn Granlund and ;;; Peter L. Montgomery, Figures 4.2 and 6.2, modified to exclude the ;;; case of division by powers of two. +;;; The algorithm includes an adaptive precision argument. Use it, since +;;; we often have sub-word value ranges. Careful, in this case, we need +;;; p s.t 2^p > n, not the ceiling of the binary log. +;;; Also, for some reason, the paper prefers shifting to masking. Mask +;;; instead. Masking is equivalent to shifting right, then left again; +;;; all the intermediate values are still words, so we just have to shift +;;; right a bit more to compensate, at the end. +;;; ;;; The following two examples show an average case and the worst case ;;; with respect to the complexity of the generated expression, under ;;; a word size of 64 bits: ;;; -;;; (UNSIGNED-DIV-TRANSFORMER 10) -> -;;; (ASH (%MULTIPLY (ASH X 0) 14757395258967641293) -3) +;;; (UNSIGNED-DIV-TRANSFORMER 10 MOST-POSITIVE-WORD) -> +;;; (ASH (%MULTIPLY (LOGANDC2 X 0) 14757395258967641293) -3) ;;; -;;; (UNSIGNED-DIV-TRANSFORMER 7) -> +;;; (UNSIGNED-DIV-TRANSFORMER 7 MOST-POSITIVE-WORD) -> ;;; (LET* ((NUM X) ;;; (T1 (%MULTIPLY NUM 2635249153387078803))) ;;; (ASH (LDB (BYTE 64 0) @@ -3299,8 +3307,9 @@ ;;; -1))) ;;; -2)) ;;; -(defun gen-unsigned-div-by-constant-expr (y) - (declare (type (integer 3 #.most-positive-word) y)) +(defun gen-unsigned-div-by-constant-expr (y max-x) + (declare (type (integer 3 #.most-positive-word) y) + (type word max-x)) (aver (not (zerop (logand y (1- y))))) (labels ((ld (x) ;; the floor of the binary logarithm of (positive) X @@ -3318,24 +3327,25 @@ (> shift 0))) (values m-high shift))))) (let ((n (expt 2 sb!vm:n-word-bits)) + (precision (integer-length max-x)) (shift1 0)) (multiple-value-bind (m shift2) - (choose-multiplier y sb!vm:n-word-bits) + (choose-multiplier y precision) (when (and (>= m n) (evenp y)) (setq shift1 (ld (logand y (- y)))) (multiple-value-setq (m shift2) (choose-multiplier (/ y (ash 1 shift1)) - (- sb!vm:n-word-bits shift1)))) + (- precision shift1)))) (if (>= m n) - (flet ((word-mod (x) - `(ldb (byte #.sb!vm:n-word-bits 0) ,x))) + (flet ((word (x) + `(truly-the word ,x))) `(let* ((num x) (t1 (%multiply num ,(- m n)))) - (ash ,(word-mod `(+ t1 (ash ,(word-mod `(- num t1)) - -1))) + (ash ,(word `(+ t1 (ash ,(word `(- num t1)) + -1))) ,(- 1 shift2)))) - `(ash (%multiply (ash x ,(- shift1)) ,m) - ,(- shift2))))))) + `(ash (%multiply (logandc2 x ,(1- (ash 1 shift1))) ,m) + ,(- (+ shift1 shift2)))))))) ;;; If the divisor is constant and both args are positive and fit in a ;;; machine word, replace the division by a multiplication and possibly @@ -3346,39 +3356,23 @@ ;;; the same value, emit much simpler code to handle that. (This case ;;; may be rare but it's easy to detect and the compiler doesn't find ;;; this optimization on its own.) -(deftransform truncate ((x y) ((unsigned-byte #.sb!vm:n-word-bits) - (constant-arg - (unsigned-byte #.sb!vm:n-word-bits))) +(deftransform truncate ((x y) (word (constant-arg word)) * :policy (and (> speed compilation-speed) (> speed space))) "convert integer division to multiplication" - (let ((y (lvar-value y))) + (let* ((y (lvar-value y)) + (x-type (lvar-type x)) + (max-x (or (and (numeric-type-p x-type) + (numeric-type-high x-type)) + most-positive-word))) ;; Division by zero, one or powers of two is handled elsewhere. (when (zerop (logand y (1- y))) (give-up-ir1-transform)) - ;; The compiler can't derive the result types to maximal tightness - ;; from the transformed expression, so we calculate them here and - ;; add the corresponding specifiers explicitly through TRULY-THE. - ;; This duplicates parts of the TRUNCATE DERIVE-TYPE optimizer but - ;; using that here would be too cumbersome. - (let* ((x-type (lvar-type x)) - (x-low (or (and (numeric-type-p x-type) - (numeric-type-low x-type)) - 0)) - (x-high (or (and (numeric-type-p x-type) - (numeric-type-high x-type)) - (1- (expt 2 #.sb!vm:n-word-bits)))) - (quot-low (truncate x-low y)) - (quot-high (truncate x-high y))) - (if (= quot-low quot-high) - `(values ,quot-low - (- x ,(* quot-low y))) - `(let* ((quot ,(gen-unsigned-div-by-constant-expr y)) - (rem (ldb (byte #.sb!vm:n-word-bits 0) - (- x (* quot ,y))))) - (values (truly-the (integer ,quot-low ,quot-high) quot) - (truly-the (integer 0 ,(1- y)) rem))))))) + `(let* ((quot ,(gen-unsigned-div-by-constant-expr y max-x)) + (rem (ldb (byte #.sb!vm:n-word-bits 0) + (- x (* quot ,y))))) + (values quot rem)))) ;;;; arithmetic and logical identity operation elimination