From 89b82a03269446741ab4b7bba8656d6e37502fe9 Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Thu, 6 Aug 2009 12:52:58 +0000 Subject: [PATCH 1/1] 1.0.30.38: faster TRUNCATE on floats * Specialized %UNARY-TRUNCATE/SINGLE-FLOAT and %UNARY-TRUNCATE/DOUBLE-FLOAT. * Explicit coercions to appropriate float types in the TRUNCATE transforms. This gets rid of generic arithmetic in the general case (Python is reluctant to insert explicit integer-tofloat coercions for integers of unknown range due to precision issues.) * Since COERCE (and %SINGLE-FLOAT and %DOUBLE-FLOAT) are not flushable, take core not to generate leftover code in the TRUNCATE transform when the result lvar has a single-value type. * Rename %UNARY-TRUNCATE float VOPs, so that transforming to a specialized floating point version doesn't make use unable to implement it directly as a VOP when the range of the float is sufficiently constrained. --- NEWS | 2 + package-data-list.lisp-expr | 6 +- src/code/float.lisp | 124 +++++++++++++++++++++++----------------- src/compiler/alpha/float.lisp | 4 +- src/compiler/float-tran.lisp | 57 ++++++++++++++---- src/compiler/fndb.lisp | 3 + src/compiler/hppa/float.lisp | 4 +- src/compiler/ppc/float.lisp | 4 +- src/compiler/sparc/float.lisp | 6 +- src/compiler/x86-64/float.lisp | 4 +- src/compiler/x86/float.lisp | 12 ++-- tests/compiler.pure.lisp | 15 +++++ version.lisp-expr | 2 +- 13 files changed, 159 insertions(+), 84 deletions(-) diff --git a/NEWS b/NEWS index b9d2bb4..c1e06eb 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ changes relative to sbcl-1.0.30: * optimization: COERCE to VECTOR, STRING, SIMPLE-STRING and recognizable one-dimenstional subtypes of ARRAY is upto 70% faster when the coercion is actually needed. + * optimization: TRUNCATE on known single- and double-floats is upto 25% + faster. * optimization: division of floating point numbers by constants uses multiplication by reciprocal when an exact reciprocal exists. * optimization: multiplication of single- and double-floats floats by diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 64f90e8..b0caa93 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1309,7 +1309,11 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%SIN" "%SIN-QUICK" "%SINGLE-FLOAT" "%SINH" "%SQRT" "%SXHASH-SIMPLE-STRING" "%SXHASH-SIMPLE-SUBSTRING" "%TAN" "%TAN-QUICK" "%TANH" - "%UNARY-ROUND" "%UNARY-TRUNCATE" "%UNARY-FTRUNCATE" + "%UNARY-ROUND" + "%UNARY-TRUNCATE" + "%UNARY-TRUNCATE/SINGLE-FLOAT" + "%UNARY-TRUNCATE/DOUBLE-FLOAT" + "%UNARY-FTRUNCATE" "%WITH-ARRAY-DATA" "%WITH-ARRAY-DATA/FP" "%WITH-ARRAY-DATA-MACRO" diff --git a/src/code/float.lisp b/src/code/float.lisp index a8dd5f0..c8aee59 100644 --- a/src/code/float.lisp +++ b/src/code/float.lisp @@ -695,60 +695,58 @@ (setq shifted-num (ash shifted-num -1)) (incf scale))))))) -#| -These might be useful if we ever have a machine without float/integer -conversion hardware. For now, we'll use special ops that -uninterruptibly frob the rounding modes & do ieee round-to-integer. - -;;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE -;;; and the result is known to be a fixnum. We can avoid some generic -;;; arithmetic in this case. -(defun %unary-truncate-single-float/fixnum (x) - (declare (single-float x) (values fixnum)) - (locally (declare (optimize (speed 3) (safety 0))) - (let* ((bits (single-float-bits x)) - (exp (ldb sb!vm:single-float-exponent-byte bits)) - (frac (logior (ldb sb!vm:single-float-significand-byte bits) - sb!vm:single-float-hidden-bit)) - (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias))) - (when (> exp sb!vm:single-float-normal-exponent-max) - (error 'floating-point-invalid-operation :operator 'truncate - :operands (list x))) - (if (<= shift (- sb!vm:single-float-digits)) - 0 - (let ((res (ash frac shift))) - (declare (type (unsigned-byte 31) res)) - (if (minusp bits) - (- res) - res)))))) - -;;; Double-float version of this operation (see above single op). -(defun %unary-truncate-double-float/fixnum (x) - (declare (double-float x) (values fixnum)) - (locally (declare (optimize (speed 3) (safety 0))) - (let* ((hi-bits (double-float-high-bits x)) - (exp (ldb sb!vm:double-float-exponent-byte hi-bits)) - (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits) - sb!vm:double-float-hidden-bit)) - (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits) - sb!vm:double-float-bias))) - (when (> exp sb!vm:double-float-normal-exponent-max) - (error 'floating-point-invalid-operation :operator 'truncate - :operands (list x))) - (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits)) - 0 - (let* ((res-hi (ash frac shift)) - (res (if (plusp shift) - (logior res-hi - (the fixnum - (ash (double-float-low-bits x) - (- shift sb!vm:n-word-bits)))) - res-hi))) - (declare (type (unsigned-byte 31) res-hi res)) - (if (minusp hi-bits) - (- res) - res)))))) -|# +;;; These might be useful if we ever have a machine without float/integer +;;; conversion hardware. For now, we'll use special ops that +;;; uninterruptibly frob the rounding modes & do ieee round-to-integer. +#+nil +(progn + ;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE + ;; and the result is known to be a fixnum. We can avoid some generic + ;; arithmetic in this case. + (defun %unary-truncate-single-float/fixnum (x) + (declare (single-float x) (values fixnum)) + (locally (declare (optimize (speed 3) (safety 0))) + (let* ((bits (single-float-bits x)) + (exp (ldb sb!vm:single-float-exponent-byte bits)) + (frac (logior (ldb sb!vm:single-float-significand-byte bits) + sb!vm:single-float-hidden-bit)) + (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias))) + (when (> exp sb!vm:single-float-normal-exponent-max) + (error 'floating-point-invalid-operation :operator 'truncate + :operands (list x))) + (if (<= shift (- sb!vm:single-float-digits)) + 0 + (let ((res (ash frac shift))) + (declare (type (unsigned-byte 31) res)) + (if (minusp bits) + (- res) + res)))))) + ;; Double-float version of this operation (see above single op). + (defun %unary-truncate-double-float/fixnum (x) + (declare (double-float x) (values fixnum)) + (locally (declare (optimize (speed 3) (safety 0))) + (let* ((hi-bits (double-float-high-bits x)) + (exp (ldb sb!vm:double-float-exponent-byte hi-bits)) + (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits) + sb!vm:double-float-hidden-bit)) + (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits) + sb!vm:double-float-bias))) + (when (> exp sb!vm:double-float-normal-exponent-max) + (error 'floating-point-invalid-operation :operator 'truncate + :operands (list x))) + (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits)) + 0 + (let* ((res-hi (ash frac shift)) + (res (if (plusp shift) + (logior res-hi + (the fixnum + (ash (double-float-low-bits x) + (- shift sb!vm:n-word-bits)))) + res-hi))) + (declare (type (unsigned-byte 31) res-hi res)) + (if (minusp hi-bits) + (- res) + res))))))) ;;; This function is called when we are doing a truncate without any funky ;;; divisor, i.e. converting a float or ratio to an integer. Note that we do @@ -774,6 +772,24 @@ uninterruptibly frob the rounding modes & do ieee round-to-integer. (- res) res))))))) +;;; Specialized versions for floats. +(macrolet ((def (type name) + `(defun ,name (number) + (if (< ,(coerce most-negative-fixnum type) + number + ,(coerce most-positive-fixnum type)) + (truly-the fixnum (,name number)) + ;; General -- slow -- case. + (multiple-value-bind (bits exp) (integer-decode-float number) + (let ((res (ash bits exp))) + (if (minusp number) + (- res) + res))))))) + (def single-float %unary-truncate/single-float) + (def double-float %unary-truncate/double-float) + #!+long-float + (def double-float %unary-truncate/long-float)) + ;;; Similar to %UNARY-TRUNCATE, but rounds to the nearest integer. If we ;;; can't use the round primitive, then we do our own round-to-nearest on the ;;; result of i-d-f. [Note that this rounding will really only happen with diff --git a/src/compiler/alpha/float.lisp b/src/compiler/alpha/float.lisp index 6ed495c..4134382 100644 --- a/src/compiler/alpha/float.lisp +++ b/src/compiler/alpha/float.lisp @@ -573,8 +573,8 @@ (current-nfp-tn vop)))) (inst excb) )))) - (frob %unary-truncate single-reg single-float cvttq/c_sv t) - (frob %unary-truncate double-reg double-float cvttq/c_sv) + (frob %unary-truncate/single-float single-reg single-float cvttq/c_sv t) + (frob %unary-truncate/double-float double-reg double-float cvttq/c_sv) (frob %unary-round single-reg single-float cvttq_sv t) (frob %unary-round double-reg double-float cvttq_sv)) diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp index 6c73bf3..6a51a47 100644 --- a/src/compiler/float-tran.lisp +++ b/src/compiler/float-tran.lisp @@ -15,8 +15,10 @@ ;;;; coercions -(defknown %single-float (real) single-float (movable foldable)) -(defknown %double-float (real) double-float (movable foldable)) +(defknown %single-float (real) single-float + (movable foldable)) +(defknown %double-float (real) double-float + (movable foldable)) (deftransform float ((n f) (* single-float) *) '(%single-float n)) @@ -1492,15 +1494,48 @@ (define-frobs truncate %unary-truncate) (define-frobs round %unary-round)) -;;; Convert (TRUNCATE x y) to the obvious implementation. We only want -;;; this when under certain conditions and let the generic TRUNCATE -;;; handle the rest. (Note: if Y = 1, the divide and multiply by Y -;;; should be removed by other DEFTRANSFORMs.) -(deftransform truncate ((x &optional y) - (float &optional (or float integer))) - (let ((defaulted-y (if y 'y 1))) - `(let ((res (%unary-truncate (/ x ,defaulted-y)))) - (values res (- x (* ,defaulted-y res)))))) +(deftransform %unary-truncate ((x) (single-float)) + `(%unary-truncate/single-float x)) +(deftransform %unary-truncate ((x) (double-float)) + `(%unary-truncate/double-float x)) + +;;; Convert (TRUNCATE x y) to the obvious implementation. +;;; +;;; ...plus hair: Insert explicit coercions to appropriate float types: Python +;;; is reluctant it generate explicit integer->float coercions due to +;;; precision issues (see SAFE-SINGLE-COERCION-P &co), but this is not an +;;; issue here as there is no DERIVE-TYPE optimizer on specialized versions of +;;; %UNARY-TRUNCATE, so the derived type of TRUNCATE remains the best we can +;;; do here -- which is fine. Also take care not to add unnecassary division +;;; or multiplication by 1, since we are not able to always eliminate them, +;;; depending on FLOAT-ACCURACY. Finally, leave out the secondary value when +;;; we know it is unused: COERCE is not flushable. +(macrolet ((def (type other-float-arg-types) + (let ((unary (symbolicate "%UNARY-TRUNCATE/" type)) + (coerce (symbolicate "%" type))) + `(deftransform truncate ((x &optional y) + (,type + &optional (or ,type ,@other-float-arg-types integer)) + * :result result) + (let ((result-type (lvar-type result))) + (if (or (not y) + (and (constant-lvar-p y) (= 1 (lvar-value y)))) + (if (values-type-p result-type) + `(let ((res (,',unary x))) + (values res (- x (,',coerce res)))) + `(let ((res (,',unary x))) + ;; Dummy secondary value! + (values res x))) + (if (values-type-p result-type) + `(let* ((f (,',coerce y)) + (res (,',unary (/ x f)))) + (values res (- x (* f (,',coerce res))))) + `(let* ((f (,',coerce y)) + (res (,',unary (/ x f)))) + ;; Dummy secondary value! + (values res x))))))))) + (def single-float ()) + (def double-float (single-float))) (deftransform floor ((number &optional divisor) (float &optional (or integer float))) diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index 25a45dc..66d8f46 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -1387,6 +1387,9 @@ ;;;; magical compiler frobs +(defknown %unary-truncate/single-float (single-float) integer (movable foldable flushable)) +(defknown %unary-truncate/double-float (double-float) integer (movable foldable flushable)) + ;;; We can't fold this in general because of SATISFIES. There is a ;;; special optimizer anyway. (defknown %typep (t (or type-specifier ctype)) boolean diff --git a/src/compiler/hppa/float.lisp b/src/compiler/hppa/float.lisp index 649c922..67445d5 100644 --- a/src/compiler/hppa/float.lisp +++ b/src/compiler/hppa/float.lisp @@ -613,9 +613,9 @@ (loadw y nfp (tn-offset stack-tn)))))))) (frob %unary-round single-reg single-float fcnvfx "inline float round") (frob %unary-round double-reg double-float fcnvfx "inline float round") - (frob %unary-truncate single-reg single-float fcnvfxt + (frob %unary-truncate/single-float single-reg single-float fcnvfxt "inline float truncate") - (frob %unary-truncate double-reg double-float fcnvfxt + (frob %unary-truncate/double-float double-reg double-float fcnvfxt "inline float truncate")) (define-vop (make-single-float) diff --git a/src/compiler/ppc/float.lisp b/src/compiler/ppc/float.lisp index de01833..5b008cd 100644 --- a/src/compiler/ppc/float.lisp +++ b/src/compiler/ppc/float.lisp @@ -547,8 +547,8 @@ (* (tn-offset stack-temp) n-word-bytes)) (inst lwz y (current-nfp-tn vop) (+ 4 (* (tn-offset stack-temp) n-word-bytes))))))) - (frob %unary-truncate single-reg single-float fctiwz) - (frob %unary-truncate double-reg double-float fctiwz) + (frob %unary-truncate/single-float single-reg single-float fctiwz) + (frob %unary-truncate/double-float double-reg double-float fctiwz) (frob %unary-round single-reg single-float fctiw) (frob %unary-round double-reg double-float fctiw)) diff --git a/src/compiler/sparc/float.lisp b/src/compiler/sparc/float.lisp index 181a6a2..08e431b 100644 --- a/src/compiler/sparc/float.lisp +++ b/src/compiler/sparc/float.lisp @@ -935,10 +935,10 @@ (* (tn-offset stack-temp) n-word-bytes)) (inst ld y (current-nfp-tn vop) (* (tn-offset stack-temp) n-word-bytes)))))))) - (frob %unary-truncate single-reg single-float fstoi) - (frob %unary-truncate double-reg double-float fdtoi) + (frob %unary-truncate/single-float single-reg single-float fstoi) + (frob %unary-truncate/double-float double-reg double-float fdtoi) #!+long-float - (frob %unary-truncate long-reg long-float fqtoi) + (frob %unary-truncate/long-float long-reg long-float fqtoi) ;; KLUDGE -- these two forms were protected by #-sun4. ;; (frob %unary-round single-reg single-float fstoir) ;; (frob %unary-round double-reg double-float fdtoir) diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp index b0d1b77..ab615bb 100644 --- a/src/compiler/x86-64/float.lisp +++ b/src/compiler/x86-64/float.lisp @@ -1102,8 +1102,8 @@ (signed-reg (inst ,inst y x) )))))) - (frob %unary-truncate cvttss2si single-reg single-float nil) - (frob %unary-truncate cvttsd2si double-reg double-float nil) + (frob %unary-truncate/single-float cvttss2si single-reg single-float nil) + (frob %unary-truncate/double-float cvttsd2si double-reg double-float nil) (frob %unary-round cvtss2si single-reg single-float t) (frob %unary-round cvtsd2si double-reg double-float t)) diff --git a/src/compiler/x86/float.lisp b/src/compiler/x86/float.lisp index bde111a..f0c0921 100644 --- a/src/compiler/x86/float.lisp +++ b/src/compiler/x86/float.lisp @@ -1732,10 +1732,10 @@ (inst mov y stack-temp))) ,@(unless round-p '((inst fldcw scw))))))))) - (frob %unary-truncate single-reg single-float nil) - (frob %unary-truncate double-reg double-float nil) + (frob %unary-truncate/single-float single-reg single-float nil) + (frob %unary-truncate/double-float double-reg double-float nil) #!+long-float - (frob %unary-truncate long-reg long-float nil) + (frob %unary-truncate/long-float long-reg long-float nil) (frob %unary-round single-reg single-float t) (frob %unary-round double-reg double-float t) #!+long-float @@ -1779,10 +1779,10 @@ (inst add esp-tn 4) ,@(unless round-p '((inst fldcw scw))))))) - (frob %unary-truncate single-reg single-float nil) - (frob %unary-truncate double-reg double-float nil) + (frob %unary-truncate/single-float single-reg single-float nil) + (frob %unary-truncate/double-float double-reg double-float nil) #!+long-float - (frob %unary-truncate long-reg long-float nil) + (frob %unary-truncate/long-float long-reg long-float nil) (frob %unary-round single-reg single-float t) (frob %unary-round double-reg double-float t) #!+long-float diff --git a/tests/compiler.pure.lisp b/tests/compiler.pure.lisp index b3ef02c..e8309c7 100644 --- a/tests/compiler.pure.lisp +++ b/tests/compiler.pure.lisp @@ -3232,3 +3232,18 @@ (vector i i i)) t)))) (ctu:assert-no-consing (funcall f)))) + +(with-test (:name :truncate-float) + (let ((s (compile nil `(lambda (x) + (declare (single-float x)) + (truncate x)))) + (d (compile nil `(lambda (x) + (declare (double-float x)) + (truncate x))))) + ;; Check that there is no generic arithmetic + (assert (not (search "GENERIC" + (with-output-to-string (out) + (disassemble s :stream out))))) + (assert (not (search "GENERIC" + (with-output-to-string (out) + (disassemble d :stream out))))))) diff --git a/version.lisp-expr b/version.lisp-expr index caf56fd..874d18d 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.30.37" +"1.0.30.38" -- 1.7.10.4