From 89b82a03269446741ab4b7bba8656d6e37502fe9 Mon Sep 17 00:00:00 2001
From: Nikodemus Siivola <nikodemus@random-state.net>
Date: Thu, 6 Aug 2009 12:52:58 +0000
Subject: [PATCH 1/1] 1.0.30.38: faster TRUNCATE on floats

 * Specialized %UNARY-TRUNCATE/SINGLE-FLOAT and
   %UNARY-TRUNCATE/DOUBLE-FLOAT.

 * Explicit coercions to appropriate float types in the TRUNCATE
   transforms. This gets rid of generic arithmetic in the general case
   (Python is reluctant to insert explicit integer-tofloat coercions
   for integers of unknown range due to precision issues.)

 * Since COERCE (and %SINGLE-FLOAT and %DOUBLE-FLOAT) are not
   flushable, take core not to generate leftover code in the TRUNCATE
   transform when the result lvar has a single-value type.

 * Rename %UNARY-TRUNCATE float VOPs, so that transforming to a
   specialized floating point version doesn't make use unable to
   implement it directly as a VOP when the range of the float is
   sufficiently constrained.
---
 NEWS                           |    2 +
 package-data-list.lisp-expr    |    6 +-
 src/code/float.lisp            |  124 +++++++++++++++++++++++-----------------
 src/compiler/alpha/float.lisp  |    4 +-
 src/compiler/float-tran.lisp   |   57 ++++++++++++++----
 src/compiler/fndb.lisp         |    3 +
 src/compiler/hppa/float.lisp   |    4 +-
 src/compiler/ppc/float.lisp    |    4 +-
 src/compiler/sparc/float.lisp  |    6 +-
 src/compiler/x86-64/float.lisp |    4 +-
 src/compiler/x86/float.lisp    |   12 ++--
 tests/compiler.pure.lisp       |   15 +++++
 version.lisp-expr              |    2 +-
 13 files changed, 159 insertions(+), 84 deletions(-)

diff --git a/NEWS b/NEWS
index b9d2bb4..c1e06eb 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,8 @@ changes relative to sbcl-1.0.30:
   * optimization: COERCE to VECTOR, STRING, SIMPLE-STRING and recognizable
     one-dimenstional subtypes of ARRAY is upto 70% faster when the coercion is
     actually needed.
+  * optimization: TRUNCATE on known single- and double-floats is upto 25%
+    faster.
   * optimization: division of floating point numbers by constants uses
     multiplication by reciprocal when an exact reciprocal exists.
   * optimization: multiplication of single- and double-floats floats by
diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr
index 64f90e8..b0caa93 100644
--- a/package-data-list.lisp-expr
+++ b/package-data-list.lisp-expr
@@ -1309,7 +1309,11 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%SIN" "%SIN-QUICK" "%SINGLE-FLOAT"
                "%SINH" "%SQRT" "%SXHASH-SIMPLE-STRING"
                "%SXHASH-SIMPLE-SUBSTRING" "%TAN" "%TAN-QUICK" "%TANH"
-               "%UNARY-ROUND" "%UNARY-TRUNCATE" "%UNARY-FTRUNCATE"
+               "%UNARY-ROUND"
+               "%UNARY-TRUNCATE"
+               "%UNARY-TRUNCATE/SINGLE-FLOAT"
+               "%UNARY-TRUNCATE/DOUBLE-FLOAT"
+               "%UNARY-FTRUNCATE"
                "%WITH-ARRAY-DATA"
                "%WITH-ARRAY-DATA/FP"
                "%WITH-ARRAY-DATA-MACRO"
diff --git a/src/code/float.lisp b/src/code/float.lisp
index a8dd5f0..c8aee59 100644
--- a/src/code/float.lisp
+++ b/src/code/float.lisp
@@ -695,60 +695,58 @@
             (setq shifted-num (ash shifted-num -1))
             (incf scale)))))))
 
-#|
-These might be useful if we ever have a machine without float/integer
-conversion hardware. For now, we'll use special ops that
-uninterruptibly frob the rounding modes & do ieee round-to-integer.
-
-;;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
-;;; and the result is known to be a fixnum. We can avoid some generic
-;;; arithmetic in this case.
-(defun %unary-truncate-single-float/fixnum (x)
-  (declare (single-float x) (values fixnum))
-  (locally (declare (optimize (speed 3) (safety 0)))
-    (let* ((bits (single-float-bits x))
-           (exp (ldb sb!vm:single-float-exponent-byte bits))
-           (frac (logior (ldb sb!vm:single-float-significand-byte bits)
-                         sb!vm:single-float-hidden-bit))
-           (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias)))
-      (when (> exp sb!vm:single-float-normal-exponent-max)
-        (error 'floating-point-invalid-operation :operator 'truncate
-               :operands (list x)))
-      (if (<= shift (- sb!vm:single-float-digits))
-          0
-          (let ((res (ash frac shift)))
-            (declare (type (unsigned-byte 31) res))
-            (if (minusp bits)
-                (- res)
-                res))))))
-
-;;; Double-float version of this operation (see above single op).
-(defun %unary-truncate-double-float/fixnum (x)
-  (declare (double-float x) (values fixnum))
-  (locally (declare (optimize (speed 3) (safety 0)))
-    (let* ((hi-bits (double-float-high-bits x))
-           (exp (ldb sb!vm:double-float-exponent-byte hi-bits))
-           (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits)
-                         sb!vm:double-float-hidden-bit))
-           (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits)
-                     sb!vm:double-float-bias)))
-      (when (> exp sb!vm:double-float-normal-exponent-max)
-        (error 'floating-point-invalid-operation :operator 'truncate
-               :operands (list x)))
-      (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits))
-          0
-          (let* ((res-hi (ash frac shift))
-                 (res (if (plusp shift)
-                          (logior res-hi
-                                  (the fixnum
-                                       (ash (double-float-low-bits x)
-                                            (- shift sb!vm:n-word-bits))))
-                          res-hi)))
-            (declare (type (unsigned-byte 31) res-hi res))
-            (if (minusp hi-bits)
-                (- res)
-                res))))))
-|#
+;;; These might be useful if we ever have a machine without float/integer
+;;; conversion hardware. For now, we'll use special ops that
+;;; uninterruptibly frob the rounding modes & do ieee round-to-integer.
+#+nil
+(progn
+  ;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
+  ;; and the result is known to be a fixnum. We can avoid some generic
+  ;; arithmetic in this case.
+  (defun %unary-truncate-single-float/fixnum (x)
+    (declare (single-float x) (values fixnum))
+    (locally (declare (optimize (speed 3) (safety 0)))
+      (let* ((bits (single-float-bits x))
+             (exp (ldb sb!vm:single-float-exponent-byte bits))
+             (frac (logior (ldb sb!vm:single-float-significand-byte bits)
+                           sb!vm:single-float-hidden-bit))
+             (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias)))
+        (when (> exp sb!vm:single-float-normal-exponent-max)
+          (error 'floating-point-invalid-operation :operator 'truncate
+                 :operands (list x)))
+        (if (<= shift (- sb!vm:single-float-digits))
+            0
+            (let ((res (ash frac shift)))
+              (declare (type (unsigned-byte 31) res))
+              (if (minusp bits)
+                  (- res)
+                  res))))))
+  ;; Double-float version of this operation (see above single op).
+  (defun %unary-truncate-double-float/fixnum (x)
+    (declare (double-float x) (values fixnum))
+    (locally (declare (optimize (speed 3) (safety 0)))
+      (let* ((hi-bits (double-float-high-bits x))
+             (exp (ldb sb!vm:double-float-exponent-byte hi-bits))
+             (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits)
+                           sb!vm:double-float-hidden-bit))
+             (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits)
+                       sb!vm:double-float-bias)))
+        (when (> exp sb!vm:double-float-normal-exponent-max)
+          (error 'floating-point-invalid-operation :operator 'truncate
+                 :operands (list x)))
+        (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits))
+            0
+            (let* ((res-hi (ash frac shift))
+                   (res (if (plusp shift)
+                            (logior res-hi
+                                    (the fixnum
+                                      (ash (double-float-low-bits x)
+                                           (- shift sb!vm:n-word-bits))))
+                            res-hi)))
+              (declare (type (unsigned-byte 31) res-hi res))
+              (if (minusp hi-bits)
+                  (- res)
+                  res)))))))
 
 ;;; This function is called when we are doing a truncate without any funky
 ;;; divisor, i.e. converting a float or ratio to an integer. Note that we do
@@ -774,6 +772,24 @@ uninterruptibly frob the rounding modes & do ieee round-to-integer.
                  (- res)
                  res)))))))
 
+;;; Specialized versions for floats.
+(macrolet ((def (type name)
+             `(defun ,name (number)
+                (if (< ,(coerce most-negative-fixnum type)
+                       number
+                       ,(coerce most-positive-fixnum type))
+                    (truly-the fixnum (,name number))
+                    ;; General -- slow -- case.
+                    (multiple-value-bind (bits exp) (integer-decode-float number)
+                      (let ((res (ash bits exp)))
+                        (if (minusp number)
+                            (- res)
+                            res)))))))
+  (def single-float %unary-truncate/single-float)
+  (def double-float %unary-truncate/double-float)
+  #!+long-float
+  (def double-float %unary-truncate/long-float))
+
 ;;; Similar to %UNARY-TRUNCATE, but rounds to the nearest integer. If we
 ;;; can't use the round primitive, then we do our own round-to-nearest on the
 ;;; result of i-d-f. [Note that this rounding will really only happen with
diff --git a/src/compiler/alpha/float.lisp b/src/compiler/alpha/float.lisp
index 6ed495c..4134382 100644
--- a/src/compiler/alpha/float.lisp
+++ b/src/compiler/alpha/float.lisp
@@ -573,8 +573,8 @@
                   (current-nfp-tn vop))))
           (inst excb)
           ))))
-  (frob %unary-truncate single-reg single-float cvttq/c_sv t)
-  (frob %unary-truncate double-reg double-float cvttq/c_sv)
+  (frob %unary-truncate/single-float single-reg single-float cvttq/c_sv t)
+  (frob %unary-truncate/double-float double-reg double-float cvttq/c_sv)
   (frob %unary-round single-reg single-float cvttq_sv t)
   (frob %unary-round double-reg double-float cvttq_sv))
 
diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp
index 6c73bf3..6a51a47 100644
--- a/src/compiler/float-tran.lisp
+++ b/src/compiler/float-tran.lisp
@@ -15,8 +15,10 @@
 
 ;;;; coercions
 
-(defknown %single-float (real) single-float (movable foldable))
-(defknown %double-float (real) double-float (movable foldable))
+(defknown %single-float (real) single-float
+    (movable foldable))
+(defknown %double-float (real) double-float
+    (movable foldable))
 
 (deftransform float ((n f) (* single-float) *)
   '(%single-float n))
@@ -1492,15 +1494,48 @@
   (define-frobs truncate %unary-truncate)
   (define-frobs round %unary-round))
 
-;;; Convert (TRUNCATE x y) to the obvious implementation.  We only want
-;;; this when under certain conditions and let the generic TRUNCATE
-;;; handle the rest.  (Note: if Y = 1, the divide and multiply by Y
-;;; should be removed by other DEFTRANSFORMs.)
-(deftransform truncate ((x &optional y)
-                        (float &optional (or float integer)))
-  (let ((defaulted-y (if y 'y 1)))
-    `(let ((res (%unary-truncate (/ x ,defaulted-y))))
-       (values res (- x (* ,defaulted-y res))))))
+(deftransform %unary-truncate ((x) (single-float))
+  `(%unary-truncate/single-float x))
+(deftransform %unary-truncate ((x) (double-float))
+  `(%unary-truncate/double-float x))
+
+;;; Convert (TRUNCATE x y) to the obvious implementation.
+;;;
+;;; ...plus hair: Insert explicit coercions to appropriate float types: Python
+;;; is reluctant it generate explicit integer->float coercions due to
+;;; precision issues (see SAFE-SINGLE-COERCION-P &co), but this is not an
+;;; issue here as there is no DERIVE-TYPE optimizer on specialized versions of
+;;; %UNARY-TRUNCATE, so the derived type of TRUNCATE remains the best we can
+;;; do here -- which is fine. Also take care not to add unnecassary division
+;;; or multiplication by 1, since we are not able to always eliminate them,
+;;; depending on FLOAT-ACCURACY. Finally, leave out the secondary value when
+;;; we know it is unused: COERCE is not flushable.
+(macrolet ((def (type other-float-arg-types)
+             (let ((unary (symbolicate "%UNARY-TRUNCATE/" type))
+                   (coerce (symbolicate "%" type)))
+               `(deftransform truncate ((x &optional y)
+                                        (,type
+                                         &optional (or ,type ,@other-float-arg-types integer))
+                                        * :result result)
+                  (let ((result-type (lvar-type result)))
+                    (if (or (not y)
+                            (and (constant-lvar-p y) (= 1 (lvar-value y))))
+                        (if (values-type-p result-type)
+                            `(let ((res (,',unary x)))
+                               (values res (- x (,',coerce res))))
+                            `(let ((res (,',unary x)))
+                               ;; Dummy secondary value!
+                               (values res x)))
+                        (if (values-type-p result-type)
+                            `(let* ((f (,',coerce y))
+                                    (res (,',unary (/ x f))))
+                               (values res (- x (* f (,',coerce res)))))
+                            `(let* ((f (,',coerce y))
+                                    (res (,',unary (/ x f))))
+                               ;; Dummy secondary value!
+                               (values res x)))))))))
+  (def single-float ())
+  (def double-float (single-float)))
 
 (deftransform floor ((number &optional divisor)
                      (float &optional (or integer float)))
diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp
index 25a45dc..66d8f46 100644
--- a/src/compiler/fndb.lisp
+++ b/src/compiler/fndb.lisp
@@ -1387,6 +1387,9 @@
 
 ;;;; magical compiler frobs
 
+(defknown %unary-truncate/single-float (single-float) integer (movable foldable flushable))
+(defknown %unary-truncate/double-float (double-float) integer (movable foldable flushable))
+
 ;;; We can't fold this in general because of SATISFIES. There is a
 ;;; special optimizer anyway.
 (defknown %typep (t (or type-specifier ctype)) boolean
diff --git a/src/compiler/hppa/float.lisp b/src/compiler/hppa/float.lisp
index 649c922..67445d5 100644
--- a/src/compiler/hppa/float.lisp
+++ b/src/compiler/hppa/float.lisp
@@ -613,9 +613,9 @@
                       (loadw y nfp (tn-offset stack-tn))))))))
   (frob %unary-round single-reg single-float fcnvfx "inline float round")
   (frob %unary-round double-reg double-float fcnvfx "inline float round")
-  (frob %unary-truncate single-reg single-float fcnvfxt
+  (frob %unary-truncate/single-float single-reg single-float fcnvfxt
     "inline float truncate")
-  (frob %unary-truncate double-reg double-float fcnvfxt
+  (frob %unary-truncate/double-float double-reg double-float fcnvfxt
     "inline float truncate"))
 
 (define-vop (make-single-float)
diff --git a/src/compiler/ppc/float.lisp b/src/compiler/ppc/float.lisp
index de01833..5b008cd 100644
--- a/src/compiler/ppc/float.lisp
+++ b/src/compiler/ppc/float.lisp
@@ -547,8 +547,8 @@
                         (* (tn-offset stack-temp) n-word-bytes))
                   (inst lwz y (current-nfp-tn vop)
                         (+ 4 (* (tn-offset stack-temp) n-word-bytes)))))))
-  (frob %unary-truncate single-reg single-float fctiwz)
-  (frob %unary-truncate double-reg double-float fctiwz)
+  (frob %unary-truncate/single-float single-reg single-float fctiwz)
+  (frob %unary-truncate/double-float double-reg double-float fctiwz)
   (frob %unary-round single-reg single-float fctiw)
   (frob %unary-round double-reg double-float fctiw))
 
diff --git a/src/compiler/sparc/float.lisp b/src/compiler/sparc/float.lisp
index 181a6a2..08e431b 100644
--- a/src/compiler/sparc/float.lisp
+++ b/src/compiler/sparc/float.lisp
@@ -935,10 +935,10 @@
                            (* (tn-offset stack-temp) n-word-bytes))
                      (inst ld y (current-nfp-tn vop)
                            (* (tn-offset stack-temp) n-word-bytes))))))))
-  (frob %unary-truncate single-reg single-float fstoi)
-  (frob %unary-truncate double-reg double-float fdtoi)
+  (frob %unary-truncate/single-float single-reg single-float fstoi)
+  (frob %unary-truncate/double-float double-reg double-float fdtoi)
   #!+long-float
-  (frob %unary-truncate long-reg long-float fqtoi)
+  (frob %unary-truncate/long-float long-reg long-float fqtoi)
   ;; KLUDGE -- these two forms were protected by #-sun4.
   ;; (frob %unary-round single-reg single-float fstoir)
   ;; (frob %unary-round double-reg double-float fdtoir)
diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp
index b0d1b77..ab615bb 100644
--- a/src/compiler/x86-64/float.lisp
+++ b/src/compiler/x86-64/float.lisp
@@ -1102,8 +1102,8 @@
                           (signed-reg
                            (inst ,inst y x)
                            ))))))
-  (frob %unary-truncate cvttss2si single-reg single-float nil)
-  (frob %unary-truncate cvttsd2si double-reg double-float nil)
+  (frob %unary-truncate/single-float cvttss2si single-reg single-float nil)
+  (frob %unary-truncate/double-float cvttsd2si double-reg double-float nil)
 
   (frob %unary-round cvtss2si single-reg single-float t)
   (frob %unary-round cvtsd2si double-reg double-float t))
diff --git a/src/compiler/x86/float.lisp b/src/compiler/x86/float.lisp
index bde111a..f0c0921 100644
--- a/src/compiler/x86/float.lisp
+++ b/src/compiler/x86/float.lisp
@@ -1732,10 +1732,10 @@
                       (inst mov y stack-temp)))
                    ,@(unless round-p
                       '((inst fldcw scw)))))))))
-  (frob %unary-truncate single-reg single-float nil)
-  (frob %unary-truncate double-reg double-float nil)
+  (frob %unary-truncate/single-float single-reg single-float nil)
+  (frob %unary-truncate/double-float double-reg double-float nil)
   #!+long-float
-  (frob %unary-truncate long-reg long-float nil)
+  (frob %unary-truncate/long-float long-reg long-float nil)
   (frob %unary-round single-reg single-float t)
   (frob %unary-round double-reg double-float t)
   #!+long-float
@@ -1779,10 +1779,10 @@
                 (inst add esp-tn 4)
                 ,@(unless round-p
                    '((inst fldcw scw)))))))
-  (frob %unary-truncate single-reg single-float nil)
-  (frob %unary-truncate double-reg double-float nil)
+  (frob %unary-truncate/single-float single-reg single-float nil)
+  (frob %unary-truncate/double-float double-reg double-float nil)
   #!+long-float
-  (frob %unary-truncate long-reg long-float nil)
+  (frob %unary-truncate/long-float long-reg long-float nil)
   (frob %unary-round single-reg single-float t)
   (frob %unary-round double-reg double-float t)
   #!+long-float
diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp
index b3ef02c..e8309c7 100644
--- a/tests/compiler.pure.lisp
+++ b/tests/compiler.pure.lisp
@@ -3232,3 +3232,18 @@
                              (vector i i i))
                            t))))
     (ctu:assert-no-consing (funcall f))))
+
+(with-test (:name :truncate-float)
+  (let ((s (compile nil `(lambda (x)
+                           (declare (single-float x))
+                           (truncate x))))
+        (d (compile nil `(lambda (x)
+                           (declare (double-float x))
+                           (truncate x)))))
+    ;; Check that there is no generic arithmetic
+    (assert (not (search "GENERIC"
+                         (with-output-to-string (out)
+                           (disassemble s :stream out)))))
+    (assert (not (search "GENERIC"
+                         (with-output-to-string (out)
+                           (disassemble d :stream out)))))))
diff --git a/version.lisp-expr b/version.lisp-expr
index caf56fd..874d18d 100644
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.30.37"
+"1.0.30.38"
-- 
1.7.10.4