From: Paul Khuong Date: Thu, 6 May 2010 13:49:24 +0000 (+0000) Subject: 1.0.38.6: Clear higher order bits for SSE operations that don't X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=f16e93459cd73b1884e3d576c95e422f8e8a000e;p=sbcl.git 1.0.38.6: Clear higher order bits for SSE operations that don't * SQRTSD, CVT{SS,SD}2{SS,SD} and CVTSI2{SS,SD} leave the high-order bits of the result as-is. These are the only (hopefully) operations we use that have a single input, so we must explicitly clear out the destination register. * Probably a performance fix too, as it breaks dependency chains on the destination register. * The bug was relatively hard to observe. Raymond Toy reported seeing and fixing something similar on maxima/CMUCL. Brittle test case added. --- diff --git a/NEWS b/NEWS index fdc624f..96385f0 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,9 @@ changes relative to sbcl-1.9.38: * bug fix: 32-bit unicode external formats now work on big-endian systems. * bug fix: Literal characters with code points greater than about 32767 now work on PPC UNICODE builds. + * bug fix: Any noise left by SSE operations (sqrt and conversions) in the + high order bits are explicitly cleared out. In some contrived situations, + this could lead to wrong results in mixed real/complex float arithmetic. changes in sbcl-1.0.38 relative to sbcl-1.0.37: * incompatible change: Thread names are now restricted to SIMPLE-STRINGs diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp index 227e1f9..583a7a4 100644 --- a/src/compiler/x86-64/float.lisp +++ b/src/compiler/x86-64/float.lisp @@ -794,6 +794,8 @@ (:vop-var vop) (:save-p :compute-only) (:generator 1 + (unless (location= x y) + (inst xorpd y y)) (note-this-location vop :internal-error) (inst sqrtsd y x))) @@ -1071,6 +1073,9 @@ (:vop-var vop) (:save-p :compute-only) (:generator 5 + (sc-case y + (single-reg (inst xorps y y)) + (double-reg (inst xorpd y y))) (note-this-location vop :internal-error) (inst ,inst y x))))) (frob %single-float/signed %single-float cvtsi2ss single-reg single-float) @@ -1088,10 +1093,18 @@ (:vop-var vop) (:save-p :compute-only) (:generator 2 + (unless (location= x y) + (sc-case y + (single-reg (inst xorps y y)) + (double-reg (inst xorpd y y)))) (note-this-location vop :internal-error) (inst ,inst y (sc-case x (,(first from-scs) x) - (,(second from-scs) (,ea-func x)))))))) + (,(second from-scs) (,ea-func x)))) + ,(when (and (eq from-type 'double-float) ; if the input is wider + (eq to-type 'single-float)) ; than the output, clear + `(when (location= x y) ; noise in the high part + (inst shufps y y #4r3330))))))) (frob %single-float/double-float %single-float cvtsd2ss (double-reg double-stack) double-float ea-for-df-stack single-reg single-float) diff --git a/tests/float.pure.lisp b/tests/float.pure.lisp index 01d7ae1..eb1d5bc 100644 --- a/tests/float.pure.lisp +++ b/tests/float.pure.lisp @@ -278,3 +278,94 @@ ;; That'd be one strange host, but just in case (assert (plusp (sb-kernel:%unary-truncate/single-float (expt 2f0 65)))) (assert (plusp (sb-kernel:%unary-truncate/double-float (expt 2d0 65))))) + +;; On x86-64, we sometimes forgot to clear the higher order bits of the +;; destination register before using it with an instruction that doesn't +;; clear the (unused) high order bits. Suspect instructions are operations +;; with only one operand: for everything else, the destination has already +;; been loaded with a value, making it safe (by induction). +;; +;; The tests are extremely brittle and could be broken by any number of +;; back- or front-end optimisations. We should just keep the issue above +;; in mind at all times when working with SSE or similar instruction sets. +(macrolet ((with-pinned-floats ((count type &rest names) &body body) + "Force COUNT float values to be kept live (and hopefully in registers), + fill a temporary register with noise, and execute BODY." + (let ((dummy (loop repeat count + collect (or (pop names) + (gensym "TEMP"))))) + `(let ,(loop for i downfrom -1 + for var in dummy + for j = (coerce i type) + collect + `(,var ,(complex j j))) ; we don't actually need that, but + (declare (type (complex ,type) ,@dummy)) ; future-proofing can't hurt + ,@(loop for var in dummy + for i upfrom 0 + collect `(setf ,var ,(complex i (coerce i type)))) + (multiple-value-prog1 + (progn + (let ((x ,(complex 1d0 1d0))) + (declare (type (complex double-float) x)) + (setf x ,(complex most-positive-fixnum (float most-positive-fixnum 1d0))) + (sb-vm::touch-object x)) + (locally ,@body)) + ,@(loop for var in dummy + collect `(sb-vm::touch-object ,var))))))) + (with-test (:name :clear-sqrtsd) + (flet ((test-sqrtsd (float) + (declare (optimize speed (safety 1)) + (type (double-float (0d0)) float)) + (with-pinned-floats (14 double-float x0) + (let ((x (sqrt float))) + (values (+ x x0) float))))) + (declare (notinline test-sqrtsd)) + (assert (zerop (imagpart (test-sqrtsd 4d0)))))) + + (with-test (:name :clear-sqrtsd-single) + (flet ((test-sqrtsd-float (float) + (declare (optimize speed (safety 1)) + (type (single-float (0f0)) float)) + (with-pinned-floats (14 single-float x0) + (let ((x (sqrt float))) + (values (+ x x0) float))))) + (declare (notinline test-sqrtsd-float)) + (assert (zerop (imagpart (test-sqrtsd-float 4f0)))))) + + (with-test (:name :clear-cvtss2sd) + (flet ((test-cvtss2sd (float) + (declare (optimize speed (safety 1)) + (type single-float float)) + (with-pinned-floats (14 double-float x0) + (let ((x (float float 0d0))) + (values (+ x x0) (+ 1e0 float)))))) + (declare (notinline test-cvtss2sd)) + (assert (zerop (imagpart (test-cvtss2sd 1f0)))))) + + (with-test (:name :clear-cvtsd2ss) + (flet ((test-cvtsd2ss (float) + (declare (optimize speed (safety 1)) + (type double-float float)) + (with-pinned-floats (14 single-float x0) + (let ((x (float float 1e0))) + (values (+ x x0) (+ 1d0 float)))))) + (declare (notinline test-cvtsd2ss)) + (assert (zerop (imagpart (test-cvtsd2ss 4d0)))))) + + (with-test (:name :clear-cvtsi2sd) + (flet ((test-cvtsi2sd (int) + (declare (optimize speed (safety 0)) + (type (unsigned-byte 10) int)) + (with-pinned-floats (15 double-float x0) + (+ (float int 0d0) x0)))) + (declare (notinline test-cvtsi2sd)) + (assert (zerop (imagpart (test-cvtsi2sd 4)))))) + + (with-test (:name :clear-cvtsi2ss) + (flet ((test-cvtsi2ss (int) + (declare (optimize speed (safety 0)) + (type (unsigned-byte 10) int)) + (with-pinned-floats (15 single-float x0) + (+ (float int 0e0) x0)))) + (declare (notinline test-cvtsi2ss)) + (assert (zerop (imagpart (test-cvtsi2ss 4))))))) diff --git a/version.lisp-expr b/version.lisp-expr index edbbe5c..5041cdd 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.38.5" +"1.0.38.6"