From 7485a0eb6bd09d24fd22c93c7ae713e3e5a245a2 Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Wed, 29 Jul 2009 13:35:33 +0000 Subject: [PATCH] 1.0.30.8: redo the recent FP optimizations in a better way * Multiplication and division should respect signed zeros. * Optimize division to multiplication by reciprocal when an exact reciprocal exits -- and always for FLOAT-ACCURACY=0. (Thanks to Paul Khuong!) --- NEWS | 4 +- src/compiler/float-tran.lisp | 52 +++++++++++++++++++----- tests/compiler.pure.lisp | 90 ++++++++++++++++++++++-------------------- version.lisp-expr | 2 +- 4 files changed, 92 insertions(+), 56 deletions(-) diff --git a/NEWS b/NEWS index bc57154..96b7c1f 100644 --- a/NEWS +++ b/NEWS @@ -6,8 +6,8 @@ changes relative to sbcl-1.0.30: * new feature: experimental :EMIT-CFASL parameter to COMPILE-FILE can be used to output toplevel compile-time effects into a separate .CFASL file. - * optimization: multiplication and division of single- and double-floats - with constant +/-one has been optimized. + * optimization: division of floating point numbers by constants uses + multiplication by reciprocal when an exact reciprocal exists. * optimization: multiplication of single- and double-floats floats by constant two has been optimized. * bug fix: moderately complex combinations of inline expansions could diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp index 3016282..6c73bf3 100644 --- a/src/compiler/float-tran.lisp +++ b/src/compiler/float-tran.lisp @@ -332,16 +332,48 @@ (%deftransform x '(function (double-float single-float) *) #'float-contagion-arg2)) -;;; Optimize division and multiplication by one and minus one. -(macrolet ((def (op type &rest args) - `(deftransform ,op ((x y) (,type (constant-arg (member ,@args)))) - (if (minusp (lvar-value y)) - '(+ (%negate x) ,(coerce 0 type)) - '(+ x ,(coerce 0 type)))))) - (def / single-float 1 1.0 -1 -1.0) - (def * single-float 1 1.0 -1 -1.0) - (def / double-float 1 1.0 1.0d0 -1 -1.0 -1.0d0) - (def * double-float 1 1.0 1.0d0 -1 -1.0 -1.0d0)) +(macrolet ((def (type &rest args) + `(deftransform * ((x y) (,type (constant-arg (member ,@args))) * + ;; Beware the SNaN! + :policy (zerop float-accuracy)) + "optimize multiplication by one" + (let ((y (lvar-value y))) + (if (minusp y) + '(%negate x) + 'x))))) + (def * single-float 1.0 -1.0) + (def * double-float 1.0d0 -1.0d0)) + +;;; Return the reciprocal of X if it can be represented exactly, NIL otherwise. +(defun maybe-exact-reciprocal (x) + (unless (zerop x) + (multiple-value-bind (significand exponent sign) + ;; Signals an error for NaNs and infinities. + (handler-case (integer-decode-float x) + (error () (return-from maybe-exact-reciprocal nil))) + (let ((expected (/ sign significand (expt 2 exponent)))) + (let ((reciprocal (/ 1 x))) + (multiple-value-bind (significand exponent sign) (integer-decode-float reciprocal) + (when (eql expected (* sign significand (expt 2 exponent))) + reciprocal))))))) + +;;; Replace constant division by multiplication with exact reciprocal, +;;; if one exists. +(macrolet ((def (type) + `(deftransform / ((x y) (,type (constant-arg ,type)) * + :node node) + "convert to multiplication by reciprocal" + (let ((n (lvar-value y))) + (if (policy node (zerop float-accuracy)) + `(* x ,(/ n)) + (let ((r (maybe-exact-reciprocal n))) + (if r + `(* x ,r) + (give-up-ir1-transform + "~S does not have an exact reciprocal" + n)))))))) + (def single-float) + (def double-float)) ;;; Optimize addition and subsctraction of zero (macrolet ((def (op type &rest args) diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp index 0d27fe2..089871f 100644 --- a/tests/compiler.pure.lisp +++ b/tests/compiler.pure.lisp @@ -2973,37 +2973,44 @@ (let ((* "fooo")) (test '(integer 4 4) '(length *) t)))) -(with-test (:name :float-division-by-one) - (flet ((test (lambda-form arg &optional (result arg)) - (let* ((fun1 (compile nil lambda-form)) - (fun2 (funcall (compile nil `(lambda () - (declare (optimize (sb-c::float-accuracy 0))) - ,lambda-form)))) - (disassembly1 (with-output-to-string (s) - (disassemble fun1 :stream s))) - (disassembly2 (with-output-to-string (s) - (disassemble fun2 :stream s)))) +(with-test (:name :float-division-using-exact-reciprocal) + (flet ((test (lambda-form arg res &key (check-insts t)) + (let* ((fun (compile nil lambda-form)) + (disassembly (with-output-to-string (s) + (disassemble fun :stream s)))) ;; Let's make sure there is no division at runtime: for x86 and ;; x86-64 that implies an FDIV, DIVSS, or DIVSD instruction, so ;; look for DIV in the disassembly. It's a terrible KLUDGE, but ;; it works. #+(or x86 x86-64) - (assert (and (not (search "DIV" disassembly1)) - (not (search "DIV" disassembly2)))) - (assert (eql result (funcall fun1 arg))) - (assert (eql result (funcall fun2 arg)))))) - (test `(lambda (x) (declare (single-float x)) (/ x 1)) 123.45) - (test `(lambda (x) (declare (single-float x)) (/ x -1)) 123.45 -123.45) - (test `(lambda (x) (declare (single-float x)) (/ x 1.0)) 543.21) - (test `(lambda (x) (declare (single-float x)) (/ x -1.0)) 543.21 -543.21) - (test `(lambda (x) (declare (single-float x)) (/ x 1.0d0)) 42.00 42.d0) - (test `(lambda (x) (declare (single-float x)) (/ x -1.0d0)) 42.00 -42.d0) - (test `(lambda (x) (declare (double-float x)) (/ x 1)) 123.45d0) - (test `(lambda (x) (declare (double-float x)) (/ x -1)) 123.45d0 -123.45d0) - (test `(lambda (x) (declare (double-float x)) (/ x 1.0)) 543.21d0) - (test `(lambda (x) (declare (double-float x)) (/ x -1.0)) 543.21d0 -543.21d0) - (test `(lambda (x) (declare (double-float x)) (/ x 1.0d0)) 42.d0) - (test `(lambda (x) (declare (double-float x)) (/ x -1.0d0)) 42.d0 -42.0d0))) + (when check-insts + (assert (not (search "DIV" disassembly)))) + ;; No generic arithmetic! + (assert (not (search "GENERIC" disassembly))) + (assert (eql res (funcall fun arg)))))) + (dolist (c '(128 64 32 16 8 4 2 1 1/2 1/4 1/8 1/16 1/32 1/64)) + (dolist (type '(single-float double-float)) + (let* ((cf (coerce c type)) + (arg (- (random (* 2 cf)) cf)) + (r1 (eval `(/ ,arg ,cf))) + (r2 (eval `(/ ,arg ,(- cf))))) + (test `(lambda (x) (declare (,type x)) (/ x ,cf)) arg r1) + (test `(lambda (x) (declare (,type x)) (/ x ,(- cf))) arg r2) + ;; rational args should get optimized as well + (test `(lambda (x) (declare (,type x)) (/ x ,c)) arg r1) + (test `(lambda (x) (declare (,type x)) (/ x ,(- c))) arg r2)))) + ;; Also check that inexact reciprocals (1) are not used by default (2) are + ;; used with FLOAT-ACCURACY=0. + (dolist (type '(single-float double-float)) + (let ((trey (coerce 3 type)) + (one (coerce 1 type))) + (test `(lambda (x) (declare (,type x)) (/ x 3)) trey one + :check-insts nil) + (test `(lambda (x) + (declare (,type x) + (optimize (sb-c::float-accuracy 0))) + (/ x 3)) + trey (eval `(* ,trey (/ ,trey)))))))) (with-test (:name :float-multiplication-by-one) (flet ((test (lambda-form arg &optional (result arg)) @@ -3015,27 +3022,24 @@ (disassemble fun1 :stream s))) (disassembly2 (with-output-to-string (s) (disassemble fun2 :stream s)))) - ;; Let's make sure there is no multiplication at runtime: for x86 - ;; and x86-64 that implies an FMUL, MULSS, or MULSD instruction, - ;; so look for MUL in the disassembly. It's a terrible KLUDGE, - ;; but it works. + ;; Multiplication at runtime should be eliminated only with + ;; FLOAT-ACCURACY=0. (To catch SNaNs.) #+(or x86 x86-64) - (assert (and (not (search "MUL" disassembly1)) + (assert (and (search "MUL" disassembly1) (not (search "MUL" disassembly2)))) + ;; Not generic arithmetic, please! + (assert (and (not (search "GENERIC" disassembly1)) + (not (search "GENERIC" disassembly2)))) (assert (eql result (funcall fun1 arg))) (assert (eql result (funcall fun2 arg)))))) - (test `(lambda (x) (declare (single-float x)) (* x 1)) 123.45) - (test `(lambda (x) (declare (single-float x)) (* x -1)) 123.45 -123.45) - (test `(lambda (x) (declare (single-float x)) (* x 1.0)) 543.21) - (test `(lambda (x) (declare (single-float x)) (* x -1.0)) 543.21 -543.21) - (test `(lambda (x) (declare (single-float x)) (* x 1.0d0)) 42.00 42.d0) - (test `(lambda (x) (declare (single-float x)) (* x -1.0d0)) 42.00 -42.d0) - (test `(lambda (x) (declare (double-float x)) (* x 1)) 123.45d0) - (test `(lambda (x) (declare (double-float x)) (* x -1)) 123.45d0 -123.45d0) - (test `(lambda (x) (declare (double-float x)) (* x 1.0)) 543.21d0) - (test `(lambda (x) (declare (double-float x)) (* x -1.0)) 543.21d0 -543.21d0) - (test `(lambda (x) (declare (double-float x)) (* x 1.0d0)) 42.d0) - (test `(lambda (x) (declare (double-float x)) (* x -1.0d0)) 42.d0 -42.0d0))) + (dolist (type '(single-float double-float)) + (let* ((one (coerce 1 type)) + (arg (random (* 2 one))) + (-r (- arg))) + (test `(lambda (x) (declare (,type x)) (* x 1)) arg) + (test `(lambda (x) (declare (,type x)) (* x -1)) arg -r) + (test `(lambda (x) (declare (,type x)) (* x ,one)) arg) + (test `(lambda (x) (declare (,type x)) (* x ,(- one))) arg -r))))) (with-test (:name :float-addition-of-zero) (flet ((test (lambda-form arg &optional (result arg)) diff --git a/version.lisp-expr b/version.lisp-expr index e5d5a8b..2ee8655 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.30.7" +"1.0.30.8" -- 1.7.10.4