From dabb26b93be9bbf2718951ea2200fca874d3d587 Mon Sep 17 00:00:00 2001 From: Lutz Euler Date: Mon, 29 Apr 2013 22:57:41 +0200 Subject: [PATCH] Faster ISQRT on small (about fixnum sized) numbers. ISQRT is implemented using a recursive algorithm for arguments above 24 which is compiled using generic arithmetic only (as it must support both fixnums and bignums). Improve this by compiling this recursive part twice, once using generic and once fixnum-only arithmetic, and dispatching on function entry into the applicable part. For maximum speed, the fixnum part recurs directly into itself, thereby avoiding further type dispatching. This makes ISQRT run about three times as fast on fixnum inputs while the generated code is about 40 percent larger (both measured on x86-64). For bignums a speedup can be seen, too, as ISQRT always recurs into fixnum territory eventually, but the relative gain obviously becomes smaller very fast with increasing size of the argument. I have changed the variable names in the recursive part; they no longer have an "n-" prefix as this in SBCL by convention means "number of" and as the argument of the recursive part is no longer visibly "n". Slightly augment the test case. --- NEWS | 1 + src/code/numbers.lisp | 77 +++++++++++++++++++++++++++++++++++-------------- tests/arith.pure.lisp | 2 ++ 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 053e3a2..afc31de 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ changes relative to sbcl-1.1.7: * bug fix: handle errors when initializing *default-pathname-defaults*, sb-ext:*runtime-pathname*, sb-ext:*posix-argv* on startup, like character decoding errors, or directories being deleted. + * optimization: faster ISQRT on fixnums and small bignums changes in sbcl-1.1.7 relative to sbcl-1.1.6: * enhancement: TRACE :PRINT-ALL handles multiple-valued forms. diff --git a/src/code/numbers.lisp b/src/code/numbers.lisp index 89d35de..bf3a03a 100644 --- a/src/code/numbers.lisp +++ b/src/code/numbers.lisp @@ -1406,31 +1406,66 @@ the first." ((fixnum bignum) (bignum-gcd (make-small-bignum u) v)))))) -;;;; from Robert Smith; slightly changed not to cons unnecessarily. +;;; from Robert Smith; changed not to cons unnecessarily, and tuned for +;;; faster operation on fixnum inputs by compiling the central recursive +;;; algorithm twice, once using generic and once fixnum arithmetic, and +;;; dispatching on function entry into the applicable part. For maximum +;;; speed, the fixnum part recurs into itself, thereby avoiding further +;;; type dispatching. This pattern is not supported by NUMBER-DISPATCH +;;; thus some special-purpose macrology is needed. (defun isqrt (n) #!+sb-doc "Return the greatest integer less than or equal to the square root of N." (declare (type unsigned-byte n)) - (cond - ((> n 24) - (let* ((n-fourth-size (ash (1- (integer-length n)) -2)) - (n-significant-half (ash n (- (ash n-fourth-size 1)))) - (n-significant-half-isqrt (isqrt n-significant-half)) - (zeroth-iteration (ash n-significant-half-isqrt n-fourth-size))) - (multiple-value-bind (quot rem) - (floor n zeroth-iteration) - (let ((first-iteration (ash (+ zeroth-iteration quot) -1))) - (cond ((oddp quot) - first-iteration) - ((> (expt (- first-iteration zeroth-iteration) 2) rem) - (1- first-iteration)) - (t - first-iteration)))))) - ((> n 15) 4) - ((> n 8) 3) - ((> n 3) 2) - ((> n 0) 1) - ((= n 0) 0))) + (macrolet + ((isqrt-recursion (arg recurse fixnum-p) + ;; Expands into code for the recursive step of the ISQRT + ;; calculation. ARG is the input variable and RECURSE the name + ;; of the function to recur into. If FIXNUM-P is true, some + ;; type declarations are added that, together with ARG being + ;; declared as a fixnum outside of here, make the resulting code + ;; compile into fixnum-specialized code without any calls to + ;; generic arithmetic. Else, the code works for bignums, too. + ;; The input must be at least 16 to ensure that RECURSE is called + ;; with a strictly smaller number and that the result is correct + ;; (provided that RECURSE correctly implements ISQRT, itself). + `(macrolet ((if-fixnum-p-truly-the (type expr) + ,@(if fixnum-p + '(`(truly-the ,type ,expr)) + '((declare (ignore type)) + expr)))) + (let* ((fourth-size (ash (1- (integer-length ,arg)) -2)) + (significant-half (ash ,arg (- (ash fourth-size 1)))) + (significant-half-isqrt + (if-fixnum-p-truly-the + (integer 1 #.(isqrt sb!xc:most-positive-fixnum)) + (,recurse significant-half))) + (zeroth-iteration (ash significant-half-isqrt + fourth-size))) + (multiple-value-bind (quot rem) + (floor ,arg zeroth-iteration) + (let ((first-iteration (ash (+ zeroth-iteration quot) -1))) + (cond ((oddp quot) + first-iteration) + ((> (if-fixnum-p-truly-the + fixnum + (expt (- first-iteration zeroth-iteration) 2)) + rem) + (1- first-iteration)) + (t + first-iteration)))))))) + (typecase n + (fixnum (labels ((fixnum-isqrt (n) + (declare (type fixnum n)) + (cond ((> n 24) + (isqrt-recursion n fixnum-isqrt t)) + ((> n 15) 4) + ((> n 8) 3) + ((> n 3) 2) + ((> n 0) 1) + ((= n 0) 0)))) + (fixnum-isqrt n))) + (bignum (isqrt-recursion n isqrt nil))))) ;;;; miscellaneous number predicates diff --git a/tests/arith.pure.lisp b/tests/arith.pure.lisp index 777b370..c78b328 100644 --- a/tests/arith.pure.lisp +++ b/tests/arith.pure.lisp @@ -576,6 +576,8 @@ (test x2) (test (1+ x2)) (test (1- x2))))) + (test most-positive-fixnum) + (test (1+ most-positive-fixnum)) (loop for i from 1 to 200 for pow = (expt 2 (1- i)) for j = (+ pow (random pow)) -- 1.7.10.4