1.0.30.38: faster TRUNCATE on floats

author Nikodemus Siivola <nikodemus@random-state.net>

Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)

committer Nikodemus Siivola <nikodemus@random-state.net>

Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)
author Nikodemus Siivola <nikodemus@random-state.net>
Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)
committer Nikodemus Siivola <nikodemus@random-state.net>
Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)
diff --git a/NEWS b/NEWS

index b9d2bb4..c1e06eb 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,8 @@ changes relative to sbcl-1.0.30:
    * optimization: COERCE to VECTOR, STRING, SIMPLE-STRING and recognizable
      one-dimenstional subtypes of ARRAY is upto 70% faster when the coercion is
      actually needed.
+  * optimization: TRUNCATE on known single- and double-floats is upto 25%
+    faster.
    * optimization: division of floating point numbers by constants uses
      multiplication by reciprocal when an exact reciprocal exists.
    * optimization: multiplication of single- and double-floats floats by
diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr

index 64f90e8..b0caa93 100644 (file)
--- a/package-data-list.lisp-expr
+++ b/package-data-list.lisp-expr
@@ -1309,7 +1309,11 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                 "%SIN" "%SIN-QUICK" "%SINGLE-FLOAT"
                 "%SINH" "%SQRT" "%SXHASH-SIMPLE-STRING"
                 "%SXHASH-SIMPLE-SUBSTRING" "%TAN" "%TAN-QUICK" "%TANH"
-               "%UNARY-ROUND" "%UNARY-TRUNCATE" "%UNARY-FTRUNCATE"
+               "%UNARY-ROUND"
+               "%UNARY-TRUNCATE"
+               "%UNARY-TRUNCATE/SINGLE-FLOAT"
+               "%UNARY-TRUNCATE/DOUBLE-FLOAT"
+               "%UNARY-FTRUNCATE"
                 "%WITH-ARRAY-DATA"
                 "%WITH-ARRAY-DATA/FP"
                 "%WITH-ARRAY-DATA-MACRO"
diff --git a/src/code/float.lisp b/src/code/float.lisp

index a8dd5f0..c8aee59 100644 (file)
--- a/src/code/float.lisp
+++ b/src/code/float.lisp
@@ -695,60 +695,58 @@
              (setq shifted-num (ash shifted-num -1))
              (incf scale)))))))
  
-#|
-These might be useful if we ever have a machine without float/integer
-conversion hardware. For now, we'll use special ops that
-uninterruptibly frob the rounding modes & do ieee round-to-integer.
-
-;;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
-;;; and the result is known to be a fixnum. We can avoid some generic
-;;; arithmetic in this case.
-(defun %unary-truncate-single-float/fixnum (x)
-  (declare (single-float x) (values fixnum))
-  (locally (declare (optimize (speed 3) (safety 0)))
-    (let* ((bits (single-float-bits x))
-           (exp (ldb sb!vm:single-float-exponent-byte bits))
-           (frac (logior (ldb sb!vm:single-float-significand-byte bits)
-                         sb!vm:single-float-hidden-bit))
-           (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias)))
-      (when (> exp sb!vm:single-float-normal-exponent-max)
-        (error 'floating-point-invalid-operation :operator 'truncate
-               :operands (list x)))
-      (if (<= shift (- sb!vm:single-float-digits))
-          0
-          (let ((res (ash frac shift)))
-            (declare (type (unsigned-byte 31) res))
-            (if (minusp bits)
-                (- res)
-                res))))))
-
-;;; Double-float version of this operation (see above single op).
-(defun %unary-truncate-double-float/fixnum (x)
-  (declare (double-float x) (values fixnum))
-  (locally (declare (optimize (speed 3) (safety 0)))
-    (let* ((hi-bits (double-float-high-bits x))
-           (exp (ldb sb!vm:double-float-exponent-byte hi-bits))
-           (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits)
-                         sb!vm:double-float-hidden-bit))
-           (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits)
-                     sb!vm:double-float-bias)))
-      (when (> exp sb!vm:double-float-normal-exponent-max)
-        (error 'floating-point-invalid-operation :operator 'truncate
-               :operands (list x)))
-      (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits))
-          0
-          (let* ((res-hi (ash frac shift))
-                 (res (if (plusp shift)
-                          (logior res-hi
-                                  (the fixnum
-                                       (ash (double-float-low-bits x)
-                                            (- shift sb!vm:n-word-bits))))
-                          res-hi)))
-            (declare (type (unsigned-byte 31) res-hi res))
-            (if (minusp hi-bits)
-                (- res)
-                res))))))
-|#
+;;; These might be useful if we ever have a machine without float/integer
+;;; conversion hardware. For now, we'll use special ops that
+;;; uninterruptibly frob the rounding modes & do ieee round-to-integer.
+#+nil
+(progn
+  ;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
+  ;; and the result is known to be a fixnum. We can avoid some generic
+  ;; arithmetic in this case.
+  (defun %unary-truncate-single-float/fixnum (x)
+    (declare (single-float x) (values fixnum))
+    (locally (declare (optimize (speed 3) (safety 0)))
+      (let* ((bits (single-float-bits x))
+             (exp (ldb sb!vm:single-float-exponent-byte bits))
+             (frac (logior (ldb sb!vm:single-float-significand-byte bits)
+                           sb!vm:single-float-hidden-bit))
+             (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias)))
+        (when (> exp sb!vm:single-float-normal-exponent-max)
+          (error 'floating-point-invalid-operation :operator 'truncate
+                 :operands (list x)))
+        (if (<= shift (- sb!vm:single-float-digits))
+            0
+            (let ((res (ash frac shift)))
+              (declare (type (unsigned-byte 31) res))
+              (if (minusp bits)
+                  (- res)
+                  res))))))
+  ;; Double-float version of this operation (see above single op).
+  (defun %unary-truncate-double-float/fixnum (x)
+    (declare (double-float x) (values fixnum))
+    (locally (declare (optimize (speed 3) (safety 0)))
+      (let* ((hi-bits (double-float-high-bits x))
+             (exp (ldb sb!vm:double-float-exponent-byte hi-bits))
+             (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits)
+                           sb!vm:double-float-hidden-bit))
+             (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits)
+                       sb!vm:double-float-bias)))
+        (when (> exp sb!vm:double-float-normal-exponent-max)
+          (error 'floating-point-invalid-operation :operator 'truncate
+                 :operands (list x)))
+        (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits))
+            0
+            (let* ((res-hi (ash frac shift))
+                   (res (if (plusp shift)
+                            (logior res-hi
+                                    (the fixnum
+                                      (ash (double-float-low-bits x)
+                                           (- shift sb!vm:n-word-bits))))
+                            res-hi)))
+              (declare (type (unsigned-byte 31) res-hi res))
+              (if (minusp hi-bits)
+                  (- res)
+                  res)))))))
  
  ;;; This function is called when we are doing a truncate without any funky
  ;;; divisor, i.e. converting a float or ratio to an integer. Note that we do
@@ -774,6 +772,24 @@ uninterruptibly frob the rounding modes & do ieee round-to-integer.
                   (- res)
                   res)))))))
  
+;;; Specialized versions for floats.
+(macrolet ((def (type name)
+             `(defun ,name (number)
+                (if (< ,(coerce most-negative-fixnum type)
+                       number
+                       ,(coerce most-positive-fixnum type))
+                    (truly-the fixnum (,name number))
+                    ;; General -- slow -- case.
+                    (multiple-value-bind (bits exp) (integer-decode-float number)
+                      (let ((res (ash bits exp)))
+                        (if (minusp number)
+                            (- res)
+                            res)))))))
+  (def single-float %unary-truncate/single-float)
+  (def double-float %unary-truncate/double-float)
+  #!+long-float
+  (def double-float %unary-truncate/long-float))
+
  ;;; Similar to %UNARY-TRUNCATE, but rounds to the nearest integer. If we
  ;;; can't use the round primitive, then we do our own round-to-nearest on the
  ;;; result of i-d-f. [Note that this rounding will really only happen with
diff --git a/src/compiler/alpha/float.lisp b/src/compiler/alpha/float.lisp

index 6ed495c..4134382 100644 (file)
--- a/src/compiler/alpha/float.lisp
+++ b/src/compiler/alpha/float.lisp
@@ -573,8 +573,8 @@
                    (current-nfp-tn vop))))
            (inst excb)
            ))))
-  (frob %unary-truncate single-reg single-float cvttq/c_sv t)
-  (frob %unary-truncate double-reg double-float cvttq/c_sv)
+  (frob %unary-truncate/single-float single-reg single-float cvttq/c_sv t)
+  (frob %unary-truncate/double-float double-reg double-float cvttq/c_sv)
    (frob %unary-round single-reg single-float cvttq_sv t)
    (frob %unary-round double-reg double-float cvttq_sv))
  
diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp

index 6c73bf3..6a51a47 100644 (file)
--- a/src/compiler/float-tran.lisp
+++ b/src/compiler/float-tran.lisp
@@ -15,8 +15,10 @@
  \f
  ;;;; coercions
  
-(defknown %single-float (real) single-float (movable foldable))
-(defknown %double-float (real) double-float (movable foldable))
+(defknown %single-float (real) single-float
+    (movable foldable))
+(defknown %double-float (real) double-float
+    (movable foldable))
  
  (deftransform float ((n f) (* single-float) *)
    '(%single-float n))
@@ -1492,15 +1494,48 @@
    (define-frobs truncate %unary-truncate)
    (define-frobs round %unary-round))
  
-;;; Convert (TRUNCATE x y) to the obvious implementation.  We only want
-;;; this when under certain conditions and let the generic TRUNCATE
-;;; handle the rest.  (Note: if Y = 1, the divide and multiply by Y
-;;; should be removed by other DEFTRANSFORMs.)
-(deftransform truncate ((x &optional y)
-                        (float &optional (or float integer)))
-  (let ((defaulted-y (if y 'y 1)))
-    `(let ((res (%unary-truncate (/ x ,defaulted-y))))
-       (values res (- x (* ,defaulted-y res))))))
+(deftransform %unary-truncate ((x) (single-float))
+  `(%unary-truncate/single-float x))
+(deftransform %unary-truncate ((x) (double-float))
+  `(%unary-truncate/double-float x))
+
+;;; Convert (TRUNCATE x y) to the obvious implementation.
+;;;
+;;; ...plus hair: Insert explicit coercions to appropriate float types: Python
+;;; is reluctant it generate explicit integer->float coercions due to
+;;; precision issues (see SAFE-SINGLE-COERCION-P &co), but this is not an
+;;; issue here as there is no DERIVE-TYPE optimizer on specialized versions of
+;;; %UNARY-TRUNCATE, so the derived type of TRUNCATE remains the best we can
+;;; do here -- which is fine. Also take care not to add unnecassary division
+;;; or multiplication by 1, since we are not able to always eliminate them,
+;;; depending on FLOAT-ACCURACY. Finally, leave out the secondary value when
+;;; we know it is unused: COERCE is not flushable.
+(macrolet ((def (type other-float-arg-types)
+             (let ((unary (symbolicate "%UNARY-TRUNCATE/" type))
+                   (coerce (symbolicate "%" type)))
+               `(deftransform truncate ((x &optional y)
+                                        (,type
+                                         &optional (or ,type ,@other-float-arg-types integer))
+                                        * :result result)
+                  (let ((result-type (lvar-type result)))
+                    (if (or (not y)
+                            (and (constant-lvar-p y) (= 1 (lvar-value y))))
+                        (if (values-type-p result-type)
+                            `(let ((res (,',unary x)))
+                               (values res (- x (,',coerce res))))
+                            `(let ((res (,',unary x)))
+                               ;; Dummy secondary value!
+                               (values res x)))
+                        (if (values-type-p result-type)
+                            `(let* ((f (,',coerce y))
+                                    (res (,',unary (/ x f))))
+                               (values res (- x (* f (,',coerce res)))))
+                            `(let* ((f (,',coerce y))
+                                    (res (,',unary (/ x f))))
+                               ;; Dummy secondary value!
+                               (values res x)))))))))
+  (def single-float ())
+  (def double-float (single-float)))
  
  (deftransform floor ((number &optional divisor)
                       (float &optional (or integer float)))
diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp

index 25a45dc..66d8f46 100644 (file)
--- a/src/compiler/fndb.lisp
+++ b/src/compiler/fndb.lisp
@@ -1387,6 +1387,9 @@
  \f
  ;;;; magical compiler frobs
  
+(defknown %unary-truncate/single-float (single-float) integer (movable foldable flushable))
+(defknown %unary-truncate/double-float (double-float) integer (movable foldable flushable))
+
  ;;; We can't fold this in general because of SATISFIES. There is a
  ;;; special optimizer anyway.
  (defknown %typep (t (or type-specifier ctype)) boolean
diff --git a/src/compiler/hppa/float.lisp b/src/compiler/hppa/float.lisp

index 649c922..67445d5 100644 (file)
--- a/src/compiler/hppa/float.lisp
+++ b/src/compiler/hppa/float.lisp
@@ -613,9 +613,9 @@
                        (loadw y nfp (tn-offset stack-tn))))))))
    (frob %unary-round single-reg single-float fcnvfx "inline float round")
    (frob %unary-round double-reg double-float fcnvfx "inline float round")
-  (frob %unary-truncate single-reg single-float fcnvfxt
+  (frob %unary-truncate/single-float single-reg single-float fcnvfxt
      "inline float truncate")
-  (frob %unary-truncate double-reg double-float fcnvfxt
+  (frob %unary-truncate/double-float double-reg double-float fcnvfxt
      "inline float truncate"))
  
  (define-vop (make-single-float)
diff --git a/src/compiler/ppc/float.lisp b/src/compiler/ppc/float.lisp

index de01833..5b008cd 100644 (file)
--- a/src/compiler/ppc/float.lisp
+++ b/src/compiler/ppc/float.lisp
@@ -547,8 +547,8 @@
                          (* (tn-offset stack-temp) n-word-bytes))
                    (inst lwz y (current-nfp-tn vop)
                          (+ 4 (* (tn-offset stack-temp) n-word-bytes)))))))
-  (frob %unary-truncate single-reg single-float fctiwz)
-  (frob %unary-truncate double-reg double-float fctiwz)
+  (frob %unary-truncate/single-float single-reg single-float fctiwz)
+  (frob %unary-truncate/double-float double-reg double-float fctiwz)
    (frob %unary-round single-reg single-float fctiw)
    (frob %unary-round double-reg double-float fctiw))
  
diff --git a/src/compiler/sparc/float.lisp b/src/compiler/sparc/float.lisp

index 181a6a2..08e431b 100644 (file)
--- a/src/compiler/sparc/float.lisp
+++ b/src/compiler/sparc/float.lisp
@@ -935,10 +935,10 @@
                             (* (tn-offset stack-temp) n-word-bytes))
                       (inst ld y (current-nfp-tn vop)
                             (* (tn-offset stack-temp) n-word-bytes))))))))
-  (frob %unary-truncate single-reg single-float fstoi)
-  (frob %unary-truncate double-reg double-float fdtoi)
+  (frob %unary-truncate/single-float single-reg single-float fstoi)
+  (frob %unary-truncate/double-float double-reg double-float fdtoi)
    #!+long-float
-  (frob %unary-truncate long-reg long-float fqtoi)
+  (frob %unary-truncate/long-float long-reg long-float fqtoi)
    ;; KLUDGE -- these two forms were protected by #-sun4.
    ;; (frob %unary-round single-reg single-float fstoir)
    ;; (frob %unary-round double-reg double-float fdtoir)
diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp

index b0d1b77..ab615bb 100644 (file)
--- a/src/compiler/x86-64/float.lisp
+++ b/src/compiler/x86-64/float.lisp
@@ -1102,8 +1102,8 @@
                            (signed-reg
                             (inst ,inst y x)
                             ))))))
-  (frob %unary-truncate cvttss2si single-reg single-float nil)
-  (frob %unary-truncate cvttsd2si double-reg double-float nil)
+  (frob %unary-truncate/single-float cvttss2si single-reg single-float nil)
+  (frob %unary-truncate/double-float cvttsd2si double-reg double-float nil)
  
    (frob %unary-round cvtss2si single-reg single-float t)
    (frob %unary-round cvtsd2si double-reg double-float t))
diff --git a/src/compiler/x86/float.lisp b/src/compiler/x86/float.lisp

index bde111a..f0c0921 100644 (file)
--- a/src/compiler/x86/float.lisp
+++ b/src/compiler/x86/float.lisp
@@ -1732,10 +1732,10 @@
                        (inst mov y stack-temp)))
                     ,@(unless round-p
                        '((inst fldcw scw)))))))))
-  (frob %unary-truncate single-reg single-float nil)
-  (frob %unary-truncate double-reg double-float nil)
+  (frob %unary-truncate/single-float single-reg single-float nil)
+  (frob %unary-truncate/double-float double-reg double-float nil)
    #!+long-float
-  (frob %unary-truncate long-reg long-float nil)
+  (frob %unary-truncate/long-float long-reg long-float nil)
    (frob %unary-round single-reg single-float t)
    (frob %unary-round double-reg double-float t)
    #!+long-float
@@ -1779,10 +1779,10 @@
                  (inst add esp-tn 4)
                  ,@(unless round-p
                     '((inst fldcw scw)))))))
-  (frob %unary-truncate single-reg single-float nil)
-  (frob %unary-truncate double-reg double-float nil)
+  (frob %unary-truncate/single-float single-reg single-float nil)
+  (frob %unary-truncate/double-float double-reg double-float nil)
    #!+long-float
-  (frob %unary-truncate long-reg long-float nil)
+  (frob %unary-truncate/long-float long-reg long-float nil)
    (frob %unary-round single-reg single-float t)
    (frob %unary-round double-reg double-float t)
    #!+long-float
diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp

index b3ef02c..e8309c7 100644 (file)
--- a/tests/compiler.pure.lisp
+++ b/tests/compiler.pure.lisp
@@ -3232,3 +3232,18 @@
                               (vector i i i))
                             t))))
      (ctu:assert-no-consing (funcall f))))
+
+(with-test (:name :truncate-float)
+  (let ((s (compile nil `(lambda (x)
+                           (declare (single-float x))
+                           (truncate x))))
+        (d (compile nil `(lambda (x)
+                           (declare (double-float x))
+                           (truncate x)))))
+    ;; Check that there is no generic arithmetic
+    (assert (not (search "GENERIC"
+                         (with-output-to-string (out)
+                           (disassemble s :stream out)))))
+    (assert (not (search "GENERIC"
+                         (with-output-to-string (out)
+                           (disassemble d :stream out)))))))
diff --git a/version.lisp-expr b/version.lisp-expr

index caf56fd..874d18d 100644 (file)
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
  ;;; checkins which aren't released. (And occasionally for internal
  ;;; versions, especially for internal versions off the main CVS
  ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.30.37"
+"1.0.30.38"
author	Nikodemus Siivola <nikodemus@random-state.net>
	Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)
committer	Nikodemus Siivola <nikodemus@random-state.net>
	Thu, 6 Aug 2009 12:52:58 +0000 (12:52 +0000)
NEWS		patch \| blob \| history
package-data-list.lisp-expr		patch \| blob \| history
src/code/float.lisp		patch \| blob \| history
src/compiler/alpha/float.lisp		patch \| blob \| history
src/compiler/float-tran.lisp		patch \| blob \| history
src/compiler/fndb.lisp		patch \| blob \| history
src/compiler/hppa/float.lisp		patch \| blob \| history
src/compiler/ppc/float.lisp		patch \| blob \| history
src/compiler/sparc/float.lisp		patch \| blob \| history
src/compiler/x86-64/float.lisp		patch \| blob \| history
src/compiler/x86/float.lisp		patch \| blob \| history
tests/compiler.pure.lisp		patch \| blob \| history
version.lisp-expr		patch \| blob \| history