1.0.30.5: optimize some floating point operations

author Nikodemus Siivola <nikodemus@random-state.net>

Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)

committer Nikodemus Siivola <nikodemus@random-state.net>

Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)
author Nikodemus Siivola <nikodemus@random-state.net>
Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)
committer Nikodemus Siivola <nikodemus@random-state.net>
Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)
diff --git a/NEWS b/NEWS

index f7630ec..b111249 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,10 @@ changes relative to sbcl-1.0.30:
    * new feature: experimental :EMIT-CFASL parameter to COMPILE-FILE can
      be used to output toplevel compile-time effects into a separate .CFASL
      file.
+  * optimization: multiplication and division of single- and double-floats
+    with constant +/-one has been optimized.
+  * optimization: multiplication of single- and double-floats floats by
+    constant two has been optimized.
    * bug fix: moderately complex combinations of inline expansions could
      be miscompiled if the result was declared to be dynamic extent.
    * bug fix: in some cases no compiler note about failure to stack allocate
diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp

index c44d149..3016282 100644 (file)
--- a/src/compiler/float-tran.lisp
+++ b/src/compiler/float-tran.lisp
@@ -332,6 +332,36 @@
    (%deftransform x '(function (double-float single-float) *)
                   #'float-contagion-arg2))
  
+;;; Optimize division and multiplication by one and minus one.
+(macrolet ((def (op type &rest args)
+             `(deftransform ,op ((x y) (,type (constant-arg (member ,@args))))
+                (if (minusp (lvar-value y))
+                    '(+ (%negate x) ,(coerce 0 type))
+                    '(+ x ,(coerce 0 type))))))
+  (def / single-float 1 1.0 -1 -1.0)
+  (def * single-float 1 1.0 -1 -1.0)
+  (def / double-float 1 1.0 1.0d0 -1 -1.0 -1.0d0)
+  (def * double-float 1 1.0 1.0d0 -1 -1.0 -1.0d0))
+
+;;; Optimize addition and subsctraction of zero
+(macrolet ((def (op type &rest args)
+             `(deftransform ,op ((x y) (,type (constant-arg (member ,@args))) *
+                                 ;; Beware the SNaN!
+                                 :policy (zerop float-accuracy))
+                'x)))
+  ;; No signed zeros, thanks.
+  (def + single-float 0 0.0)
+  (def - single-float 0 0.0)
+  (def + double-float 0 0.0 0.0d0)
+  (def - double-float 0 0.0 0.0d0))
+
+;;; On most platforms (+ x x) is faster than (* x 2)
+(macrolet ((def (type &rest args)
+             `(deftransform * ((x y) (,type (constant-arg (member ,@args))))
+                '(+ x x))))
+  (def single-float 2 2.0)
+  (def double-float 2 2.0 2.0d0))
+
  ;;; Prevent ZEROP, PLUSP, and MINUSP from losing horribly. We can't in
  ;;; general float rational args to comparison, since Common Lisp
  ;;; semantics says we are supposed to compare as rationals, but we can
diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp

index afe9767..0d27fe2 100644 (file)
--- a/tests/compiler.pure.lisp
+++ b/tests/compiler.pure.lisp
@@ -2972,3 +2972,157 @@
        (test '(integer 11 11) '(+ * 1) nil))
      (let ((* "fooo"))
        (test '(integer 4 4) '(length *) t))))
+
+(with-test (:name :float-division-by-one)
+  (flet ((test (lambda-form arg &optional (result arg))
+           (let* ((fun1 (compile nil lambda-form))
+                  (fun2 (funcall (compile nil `(lambda ()
+                                                 (declare (optimize (sb-c::float-accuracy 0)))
+                                                 ,lambda-form))))
+                  (disassembly1 (with-output-to-string (s)
+                                  (disassemble fun1 :stream s)))
+                  (disassembly2 (with-output-to-string (s)
+                                  (disassemble fun2 :stream s))))
+             ;; Let's make sure there is no division at runtime: for x86 and
+             ;; x86-64 that implies an FDIV, DIVSS, or DIVSD instruction, so
+             ;; look for DIV in the disassembly. It's a terrible KLUDGE, but
+             ;; it works.
+             #+(or x86 x86-64)
+             (assert (and (not (search "DIV" disassembly1))
+                          (not (search "DIV" disassembly2))))
+             (assert (eql result (funcall fun1 arg)))
+             (assert (eql result (funcall fun2 arg))))))
+    (test `(lambda (x) (declare (single-float x)) (/ x 1)) 123.45)
+    (test `(lambda (x) (declare (single-float x)) (/ x -1)) 123.45 -123.45)
+    (test `(lambda (x) (declare (single-float x)) (/ x 1.0)) 543.21)
+    (test `(lambda (x) (declare (single-float x)) (/ x -1.0)) 543.21 -543.21)
+    (test `(lambda (x) (declare (single-float x)) (/ x 1.0d0)) 42.00 42.d0)
+    (test `(lambda (x) (declare (single-float x)) (/ x -1.0d0)) 42.00 -42.d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x 1)) 123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x -1)) 123.45d0 -123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x 1.0)) 543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x -1.0)) 543.21d0 -543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x 1.0d0)) 42.d0)
+    (test `(lambda (x) (declare (double-float x)) (/ x -1.0d0)) 42.d0 -42.0d0)))
+
+(with-test (:name :float-multiplication-by-one)
+  (flet ((test (lambda-form arg &optional (result arg))
+           (let* ((fun1 (compile nil lambda-form))
+                  (fun2 (funcall (compile nil `(lambda ()
+                                                 (declare (optimize (sb-c::float-accuracy 0)))
+                                                 ,lambda-form))))
+                  (disassembly1 (with-output-to-string (s)
+                                  (disassemble fun1 :stream s)))
+                  (disassembly2 (with-output-to-string (s)
+                                  (disassemble fun2 :stream s))))
+             ;; Let's make sure there is no multiplication at runtime: for x86
+             ;; and x86-64 that implies an FMUL, MULSS, or MULSD instruction,
+             ;; so look for MUL in the disassembly. It's a terrible KLUDGE,
+             ;; but it works.
+             #+(or x86 x86-64)
+             (assert (and (not (search "MUL" disassembly1))
+                          (not (search "MUL" disassembly2))))
+             (assert (eql result (funcall fun1 arg)))
+             (assert (eql result (funcall fun2 arg))))))
+    (test `(lambda (x) (declare (single-float x)) (* x 1)) 123.45)
+    (test `(lambda (x) (declare (single-float x)) (* x -1)) 123.45 -123.45)
+    (test `(lambda (x) (declare (single-float x)) (* x 1.0)) 543.21)
+    (test `(lambda (x) (declare (single-float x)) (* x -1.0)) 543.21 -543.21)
+    (test `(lambda (x) (declare (single-float x)) (* x 1.0d0)) 42.00 42.d0)
+    (test `(lambda (x) (declare (single-float x)) (* x -1.0d0)) 42.00 -42.d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 1)) 123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (* x -1)) 123.45d0 -123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 1.0)) 543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (* x -1.0)) 543.21d0 -543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 1.0d0)) 42.d0)
+    (test `(lambda (x) (declare (double-float x)) (* x -1.0d0)) 42.d0 -42.0d0)))
+
+(with-test (:name :float-addition-of-zero)
+  (flet ((test (lambda-form arg &optional (result arg))
+           (let* ((fun1 (compile nil lambda-form))
+                  (fun2 (funcall (compile nil `(lambda ()
+                                                 (declare (optimize (sb-c::float-accuracy 0)))
+                                                 ,lambda-form))))
+                  (disassembly1 (with-output-to-string (s)
+                                  (disassemble fun1 :stream s)))
+                  (disassembly2 (with-output-to-string (s)
+                                  (disassemble fun2 :stream s))))
+             ;; Let's make sure there is no addition at runtime: for x86 and
+             ;; x86-64 that implies an FADD, ADDSS, or ADDSD instruction, so
+             ;; look for the ADDs in the disassembly. It's a terrible KLUDGE,
+             ;; but it works. Unless FLOAT-ACCURACY is zero, we leave the
+             ;; addition in to catch SNaNs.
+             #+x86
+             (assert (and (search "FADD" disassembly1)
+                          (not (search "FADD" disassembly2))))
+             #+x86-64
+             (let ((inst (if (typep result 'double-float)
+                             "ADDSD" "ADDSS")))
+               (assert (and (search inst disassembly1)
+                            (not (search inst disassembly2)))))
+             (assert (eql result (funcall fun1 arg)))
+             (assert (eql result (funcall fun2 arg))))))
+    (test `(lambda (x) (declare (single-float x)) (+ x 0)) 123.45)
+    (test `(lambda (x) (declare (single-float x)) (+ x 0.0)) 543.21)
+    (test `(lambda (x) (declare (single-float x)) (+ x 0.0d0)) 42.00 42.d0)
+    (test `(lambda (x) (declare (double-float x)) (+ x 0)) 123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (+ x 0.0)) 543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (+ x 0.0d0)) 42.d0)))
+
+(with-test (:name :float-substraction-of-zero)
+  (flet ((test (lambda-form arg &optional (result arg))
+           (let* ((fun1 (compile nil lambda-form))
+                  (fun2 (funcall (compile nil `(lambda ()
+                                                 (declare (optimize (sb-c::float-accuracy 0)))
+                                                 ,lambda-form))))
+                  (disassembly1 (with-output-to-string (s)
+                                  (disassemble fun1 :stream s)))
+                  (disassembly2 (with-output-to-string (s)
+                                  (disassemble fun2 :stream s))))
+             ;; Let's make sure there is no substraction at runtime: for x86
+             ;; and x86-64 that implies an FSUB, SUBSS, or SUBSD instruction,
+             ;; so look for SUB in the disassembly. It's a terrible KLUDGE,
+             ;; but it works. Unless FLOAT-ACCURACY is zero, we leave the
+             ;; substraction in in to catch SNaNs.
+             #+x86
+             (assert (and (search "FSUB" disassembly1)
+                          (not (search "FSUB" disassembly2))))
+             #+x86-64
+             (let ((inst (if (typep result 'double-float)
+                             "SUBSD" "SUBSS")))
+               (assert (and (search inst disassembly1)
+                            (not (search inst disassembly2)))))
+             (assert (eql result (funcall fun1 arg)))
+             (assert (eql result (funcall fun2 arg))))))
+    (test `(lambda (x) (declare (single-float x)) (- x 0)) 123.45)
+    (test `(lambda (x) (declare (single-float x)) (- x 0.0)) 543.21)
+    (test `(lambda (x) (declare (single-float x)) (- x 0.0d0)) 42.00 42.d0)
+    (test `(lambda (x) (declare (double-float x)) (- x 0)) 123.45d0)
+    (test `(lambda (x) (declare (double-float x)) (- x 0.0)) 543.21d0)
+    (test `(lambda (x) (declare (double-float x)) (- x 0.0d0)) 42.d0)))
+
+(with-test (:name :float-multiplication-by-two)
+  (flet ((test (lambda-form arg &optional (result arg))
+           (let* ((fun1 (compile nil lambda-form))
+                  (fun2 (funcall (compile nil `(lambda ()
+                                                 (declare (optimize (sb-c::float-accuracy 0)))
+                                                 ,lambda-form))))
+                  (disassembly1 (with-output-to-string (s)
+                                  (disassemble fun1 :stream s)))
+                  (disassembly2 (with-output-to-string (s)
+                                  (disassemble fun2 :stream s))))
+             ;; Let's make sure there is no multiplication at runtime: for x86
+             ;; and x86-64 that implies an FMUL, MULSS, or MULSD instruction,
+             ;; so look for MUL in the disassembly. It's a terrible KLUDGE,
+             ;; but it works.
+             #+(or x86 x86-64)
+             (assert (and (not (search "MUL" disassembly1))
+                          (not (search "MUL" disassembly2))))
+             (assert (eql result (funcall fun1 arg)))
+             (assert (eql result (funcall fun2 arg))))))
+    (test `(lambda (x) (declare (single-float x)) (* x 2)) 123.45 246.9)
+    (test `(lambda (x) (declare (single-float x)) (* x 2.0)) 543.21 1086.42)
+    (test `(lambda (x) (declare (single-float x)) (* x 2.0d0)) 42.00 84.d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 2)) 123.45d0 246.9d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 2.0)) 543.21d0 1086.42d0)
+    (test `(lambda (x) (declare (double-float x)) (* x 2.0d0)) 42.0d0 84.0d0)))
diff --git a/version.lisp-expr b/version.lisp-expr

index 6e8e17b..a7bca07 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.30.4"
+"1.0.30.5"
author	Nikodemus Siivola <nikodemus@random-state.net>
	Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)
committer	Nikodemus Siivola <nikodemus@random-state.net>
	Tue, 28 Jul 2009 17:12:25 +0000 (17:12 +0000)
NEWS		patch \| blob \| history
src/compiler/float-tran.lisp		patch \| blob \| history
tests/compiler.pure.lisp		patch \| blob \| history
version.lisp-expr		patch \| blob \| history