X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Ffloat.lisp;h=0d69b015e28fa53036e50c3d0a427ee80a666d33;hb=079ef9dad558ca07cb8178ef428bf738112174fa;hp=89e5d715637b96a82dbc144ea7b3c6f47e7bb22d;hpb=a530bbe337109d898d5b4a001fc8f1afa3b5dc39;p=sbcl.git diff --git a/src/compiler/x86/float.lisp b/src/compiler/x86/float.lisp index 89e5d71..0d69b01 100644 --- a/src/compiler/x86/float.lisp +++ b/src/compiler/x86/float.lisp @@ -10,43 +10,41 @@ ;;;; files for more information. (in-package "SB!VM") - -(file-comment - "$Header$") (macrolet ((ea-for-xf-desc (tn slot) `(make-ea :dword :base ,tn - :disp (- (* ,slot sb!vm:word-bytes) sb!vm:other-pointer-type)))) + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) (defun ea-for-sf-desc (tn) - (ea-for-xf-desc tn sb!vm:single-float-value-slot)) + (ea-for-xf-desc tn single-float-value-slot)) (defun ea-for-df-desc (tn) - (ea-for-xf-desc tn sb!vm:double-float-value-slot)) + (ea-for-xf-desc tn double-float-value-slot)) #!+long-float (defun ea-for-lf-desc (tn) - (ea-for-xf-desc tn sb!vm:long-float-value-slot)) + (ea-for-xf-desc tn long-float-value-slot)) ;; complex floats (defun ea-for-csf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-single-float-real-slot)) + (ea-for-xf-desc tn complex-single-float-real-slot)) (defun ea-for-csf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-single-float-imag-slot)) + (ea-for-xf-desc tn complex-single-float-imag-slot)) (defun ea-for-cdf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-double-float-real-slot)) + (ea-for-xf-desc tn complex-double-float-real-slot)) (defun ea-for-cdf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-double-float-imag-slot)) + (ea-for-xf-desc tn complex-double-float-imag-slot)) #!+long-float (defun ea-for-clf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-long-float-real-slot)) + (ea-for-xf-desc tn complex-long-float-real-slot)) #!+long-float (defun ea-for-clf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-long-float-imag-slot))) + (ea-for-xf-desc tn complex-long-float-imag-slot))) (macrolet ((ea-for-xf-stack (tn kind) `(make-ea :dword :base ebp-tn :disp (- (* (+ (tn-offset ,tn) (ecase ,kind (:single 1) (:double 2) (:long 3))) - sb!vm:word-bytes))))) + n-word-bytes))))) (defun ea-for-sf-stack (tn) (ea-for-xf-stack tn :single)) (defun ea-for-df-stack (tn) @@ -55,7 +53,29 @@ (defun ea-for-lf-stack (tn) (ea-for-xf-stack tn :long))) -;;; Complex float stack EAs +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +;;; +;;; Until 2004-03-15, the implementation of this was buggy; it +;;; unconditionally emitted the WAIT instruction. It turns out that +;;; this is the right thing to do anyway; omitting them can lead to +;;; system corruption on conforming code. -- CSR +(defun maybe-fp-wait (node &optional note-next-instruction) + (declare (ignore node)) + #+nil + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs (macrolet ((ea-for-cxf-stack (tn kind slot &optional base) `(make-ea :dword :base ,base @@ -65,7 +85,7 @@ (:double 2) (:long 3)) (ecase ,slot (:real 1) (:imag 2)))) - sb!vm:word-bytes))))) + n-word-bytes))))) (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) (ea-for-cxf-stack tn :single :real base)) (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) @@ -88,7 +108,7 @@ ;;; ;;; Using a Pop then load. (defun copy-fp-reg-to-fr0 (reg) - (assert (not (zerop (tn-offset reg)))) + (aver (not (zerop (tn-offset reg)))) (inst fstp fr0-tn) (inst fld (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) @@ -96,7 +116,7 @@ ;;; Using Fxch then Fst to restore the original reg contents. #+nil (defun copy-fp-reg-to-fr0 (reg) - (assert (not (zerop (tn-offset reg)))) + (aver (not (zerop (tn-offset reg)))) (inst fxch reg) (inst fst reg)) @@ -110,13 +130,13 @@ ;;;; move functions -;;; x is source, y is destination -(define-move-function (load-single 2) (vop x y) +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) ((single-stack) (single-reg)) (with-empty-tn@fp-top(y) (inst fld (ea-for-sf-stack x)))) -(define-move-function (store-single 2) (vop x y) +(define-move-fun (store-single 2) (vop x y) ((single-reg) (single-stack)) (cond ((zerop (tn-offset x)) (inst fst (ea-for-sf-stack y))) @@ -126,12 +146,12 @@ ;; This may not be necessary as ST0 is likely invalid now. (inst fxch x)))) -(define-move-function (load-double 2) (vop x y) +(define-move-fun (load-double 2) (vop x y) ((double-stack) (double-reg)) (with-empty-tn@fp-top(y) (inst fldd (ea-for-df-stack x)))) -(define-move-function (store-double 2) (vop x y) +(define-move-fun (store-double 2) (vop x y) ((double-reg) (double-stack)) (cond ((zerop (tn-offset x)) (inst fstd (ea-for-df-stack y))) @@ -142,13 +162,13 @@ (inst fxch x)))) #!+long-float -(define-move-function (load-long 2) (vop x y) +(define-move-fun (load-long 2) (vop x y) ((long-stack) (long-reg)) (with-empty-tn@fp-top(y) (inst fldl (ea-for-lf-stack x)))) #!+long-float -(define-move-function (store-long 2) (vop x y) +(define-move-fun (store-long 2) (vop x y) ((long-reg) (long-stack)) (cond ((zerop (tn-offset x)) (store-long-float (ea-for-lf-stack y))) @@ -158,32 +178,36 @@ ;; This may not be necessary as ST0 is likely invalid now. (inst fxch x)))) -;;; The i387 has instructions to load some useful constants. -;;; This doesn't save much time but might cut down on memory -;;; access and reduce the size of the constant vector (CV). -;;; Intel claims they are stored in a more precise form on chip. -;;; Anyhow, might as well use the feature. It can be turned -;;; off by hacking the "immediate-constant-sc" in vm.lisp. -(define-move-function (load-fp-constant 2) (vop x y) +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* + #!+long-float 'long-float #!-long-float 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) ((fp-constant) (single-reg double-reg #!+long-float long-reg)) (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) (with-empty-tn@fp-top(y) (cond ((zerop value) (inst fldz)) - ((= value 1l0) + ((= value 1e0) (inst fld1)) - ((= value pi) + ((= value (coerce pi *read-default-float-format*)) (inst fldpi)) - ((= value (log 10l0 2l0)) + ((= value (log 10e0 2e0)) (inst fldl2t)) - ((= value (log 2.718281828459045235360287471352662L0 2l0)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) (inst fldl2e)) - ((= value (log 2l0 10l0)) + ((= value (log 2e0 10e0)) (inst fldlg2)) - ((= value (log 2l0 2.718281828459045235360287471352662L0)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) (inst fldln2)) - (t (warn "Ignoring bogus i387 Constant ~A" value)))))) - + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) ;;;; complex float move functions @@ -210,8 +234,8 @@ (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) :offset (1+ (tn-offset x)))) -;;; x is source, y is destination -(define-move-function (load-complex-single 2) (vop x y) +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) ((complex-single-stack) (complex-single-reg)) (let ((real-tn (complex-single-reg-real-tn y))) (with-empty-tn@fp-top (real-tn) @@ -220,7 +244,7 @@ (with-empty-tn@fp-top (imag-tn) (inst fld (ea-for-csf-imag-stack x))))) -(define-move-function (store-complex-single 2) (vop x y) +(define-move-fun (store-complex-single 2) (vop x y) ((complex-single-reg) (complex-single-stack)) (let ((real-tn (complex-single-reg-real-tn x))) (cond ((zerop (tn-offset real-tn)) @@ -234,7 +258,7 @@ (inst fst (ea-for-csf-imag-stack y)) (inst fxch imag-tn))) -(define-move-function (load-complex-double 2) (vop x y) +(define-move-fun (load-complex-double 2) (vop x y) ((complex-double-stack) (complex-double-reg)) (let ((real-tn (complex-double-reg-real-tn y))) (with-empty-tn@fp-top(real-tn) @@ -243,7 +267,7 @@ (with-empty-tn@fp-top(imag-tn) (inst fldd (ea-for-cdf-imag-stack x))))) -(define-move-function (store-complex-double 2) (vop x y) +(define-move-fun (store-complex-double 2) (vop x y) ((complex-double-reg) (complex-double-stack)) (let ((real-tn (complex-double-reg-real-tn x))) (cond ((zerop (tn-offset real-tn)) @@ -258,7 +282,7 @@ (inst fxch imag-tn))) #!+long-float -(define-move-function (load-complex-long 2) (vop x y) +(define-move-fun (load-complex-long 2) (vop x y) ((complex-long-stack) (complex-long-reg)) (let ((real-tn (complex-long-reg-real-tn y))) (with-empty-tn@fp-top(real-tn) @@ -268,7 +292,7 @@ (inst fldl (ea-for-clf-imag-stack x))))) #!+long-float -(define-move-function (store-complex-long 2) (vop x y) +(define-move-fun (store-complex-long 2) (vop x y) ((complex-long-reg) (complex-long-stack)) (let ((real-tn (complex-long-reg-real-tn x))) (cond ((zerop (tn-offset real-tn)) @@ -285,7 +309,7 @@ ;;;; move VOPs -;;; Float register to register moves. +;;; float register to register moves (define-vop (float-move) (:args (x)) (:results (y)) @@ -375,8 +399,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:single-float-type - sb!vm:single-float-size node) + single-float-widetag + single-float-size node) (with-tn@fp-top(x) (inst fst (ea-for-sf-desc y)))))) (define-move-vop move-from-single :move @@ -389,8 +413,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:double-float-type - sb!vm:double-float-size + double-float-widetag + double-float-size node) (with-tn@fp-top(x) (inst fstd (ea-for-df-desc y)))))) @@ -405,8 +429,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:long-float-type - sb!vm:long-float-size + long-float-widetag + long-float-size node) (with-tn@fp-top(x) (store-long-float (ea-for-lf-desc y)))))) @@ -419,8 +443,8 @@ (:results (y :scs (descriptor-reg))) (:generator 2 (ecase (sb!c::constant-value (sb!c::tn-leaf x)) - (0f0 (load-symbol-value y *fp-constant-0s0*)) - (1f0 (load-symbol-value y *fp-constant-1s0*)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) (0d0 (load-symbol-value y *fp-constant-0d0*)) (1d0 (load-symbol-value y *fp-constant-1d0*)) #!+long-float @@ -442,7 +466,7 @@ (define-move-vop move-from-fp-constant :move (fp-constant) (descriptor-reg)) -;;; Move from a descriptor to a float register +;;; Move from a descriptor to a float register. (define-vop (move-to-single) (:args (x :scs (descriptor-reg))) (:results (y :scs (single-reg))) @@ -471,7 +495,6 @@ (inst fldl (ea-for-lf-desc x))))) #!+long-float (define-move-vop move-to-long :move (descriptor-reg) (long-reg)) - ;;; Move from complex float to a descriptor reg. allocating a new ;;; complex float object in the process. @@ -482,8 +505,9 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-single-float-type - sb!vm:complex-single-float-size node) + complex-single-float-widetag + complex-single-float-size + node) (let ((real-tn (complex-single-reg-real-tn x))) (with-tn@fp-top(real-tn) (inst fst (ea-for-csf-real-desc y)))) @@ -500,8 +524,8 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-double-float-type - sb!vm:complex-double-float-size + complex-double-float-widetag + complex-double-float-size node) (let ((real-tn (complex-double-reg-real-tn x))) (with-tn@fp-top(real-tn) @@ -520,8 +544,8 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-long-float-type - sb!vm:complex-long-float-size + complex-long-float-widetag + complex-long-float-size node) (let ((real-tn (complex-long-reg-real-tn x))) (with-tn@fp-top(real-tn) @@ -533,7 +557,7 @@ (define-move-vop move-from-complex-long :move (complex-long-reg) (descriptor-reg)) -;;; Move from a descriptor to a complex float register +;;; Move from a descriptor to a complex float register. (macrolet ((frob (name sc format) `(progn (define-vop (,name) @@ -560,14 +584,13 @@ (frob move-to-complex-double complex-double-reg :double) #!+long-float (frob move-to-complex-double complex-long-reg :long)) - -;;;; The move argument vops. +;;;; the move argument vops ;;;; -;;;; Note these are also used to stuff fp numbers onto the c-call stack -;;;; so the order is different than the lisp-stack. +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. -;;; The general move-argument vop +;;; the general MOVE-ARG VOP (macrolet ((frob (name sc stack-sc format) `(progn (define-vop (,name) @@ -590,7 +613,7 @@ (inst fxch x))))) (,stack-sc (if (= (tn-offset fp) esp-offset) - (let* ((offset (* (tn-offset y) word-bytes)) + (let* ((offset (* (tn-offset y) n-word-bytes)) (ea (make-ea :dword :base fp :disp offset))) (with-tn@fp-top(x) ,@(ecase format @@ -605,21 +628,21 @@ (:single 1) (:double 2) (:long 3))) - sb!vm:word-bytes))))) + n-word-bytes))))) (with-tn@fp-top(x) ,@(ecase format (:single '((inst fst ea))) (:double '((inst fstd ea))) #!+long-float (:long '((store-long-float ea))))))))))) - (define-move-vop ,name :move-argument + (define-move-vop ,name :move-arg (,sc descriptor-reg) (,sc))))) - (frob move-single-float-argument single-reg single-stack :single) - (frob move-double-float-argument double-reg double-stack :double) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double) #!+long-float - (frob move-long-float-argument long-reg long-stack :long)) + (frob move-long-float-arg long-reg long-stack :long)) -;;;; Complex float move-argument vop +;;;; complex float MOVE-ARG VOP (macrolet ((frob (name sc stack-sc format) `(progn (define-vop (,name) @@ -687,17 +710,17 @@ '((store-long-float (ea-for-clf-imag-stack y fp))))) (inst fxch imag-tn)))))) - (define-move-vop ,name :move-argument + (define-move-vop ,name :move-arg (,sc descriptor-reg) (,sc))))) - (frob move-complex-single-float-argument + (frob move-complex-single-float-arg complex-single-reg complex-single-stack :single) - (frob move-complex-double-float-argument + (frob move-complex-double-float-arg complex-double-reg complex-double-stack :double) #!+long-float - (frob move-complex-long-float-argument + (frob move-complex-long-float-arg complex-long-reg complex-long-stack :long)) -(define-move-vop move-argument :move-argument +(define-move-vop move-arg :move-arg (single-reg double-reg #!+long-float long-reg complex-single-reg complex-double-reg #!+long-float complex-long-reg) (descriptor-reg)) @@ -705,7 +728,7 @@ ;;;; arithmetic VOPs -;;; dtc: The floating point arithmetic vops. +;;; dtc: the floating point arithmetic vops ;;; ;;; Note: Although these can accept x and y on the stack or pointed to ;;; from a descriptor register, they will work with register loading @@ -722,16 +745,16 @@ ;;; 22-Jul-97: descriptor args lose in some simple cases when ;;; a function result computed in a loop. Then Python insists ;;; on consing the intermediate values! For example -#| -(defun test(a n) - (declare (type (simple-array double-float (*)) a) - (fixnum n)) - (let ((sum 0d0)) - (declare (type double-float sum)) - (dotimes (i n) - (incf sum (* (aref a i)(aref a i)))) - sum)) -|# +;;; +;;; (defun test(a n) +;;; (declare (type (simple-array double-float (*)) a) +;;; (fixnum n)) +;;; (let ((sum 0d0)) +;;; (declare (type double-float sum)) +;;; (dotimes (i n) +;;; (incf sum (* (aref a i)(aref a i)))) +;;; sum)) +;;; ;;; So, disabling descriptor args until this can be fixed elsewhere. (macrolet ((frob (op fop-sti fopr-sti @@ -795,9 +818,7 @@ (inst fld (ea-for-sf-desc y))))) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((and (sc-is y single-reg) (location= y r)) (cond ((zerop (tn-offset r)) @@ -823,10 +844,8 @@ (inst fld (ea-for-sf-desc x))))) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) - ;; The default case + (maybe-fp-wait node vop)) + ;; the default case (t ;; Get the result to ST0. @@ -875,12 +894,11 @@ (note-next-instruction vop :internal-error) - ;; Finally save the result + ;; Finally save the result. (sc-case r (single-reg (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))) (single-stack @@ -903,7 +921,7 @@ (:save-p :compute-only) (:node-var node) (:generator ,dcost - ;; Handle a few special cases + ;; Handle a few special cases. (cond ;; x, y, and r are the same register. ((and (sc-is x double-reg) (location= x r) (location= y r)) @@ -941,9 +959,7 @@ (inst fldd (ea-for-df-desc y))))) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((and (sc-is y double-reg) (location= y r)) (cond ((zerop (tn-offset r)) @@ -969,10 +985,8 @@ (inst fldd (ea-for-df-desc x))))) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) - ;; The default case + (maybe-fp-wait node vop)) + ;; the default case (t ;; Get the result to ST0. @@ -1021,12 +1035,11 @@ (note-next-instruction vop :internal-error) - ;; Finally save the result + ;; Finally save the result. (sc-case r (double-reg (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))) (double-stack @@ -1048,7 +1061,7 @@ (:save-p :compute-only) (:node-var node) (:generator ,lcost - ;; Handle a few special cases + ;; Handle a few special cases. (cond ;; x, y, and r are the same register. ((and (location= x r) (location= y r)) @@ -1072,9 +1085,7 @@ (copy-fp-reg-to-fr0 y)) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((location= y r) (cond ((zerop (tn-offset r)) @@ -1086,9 +1097,7 @@ (copy-fp-reg-to-fr0 x)) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; the default case (t ;; Get the result to ST0. @@ -1114,8 +1123,7 @@ ;; Finally save the result. (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))))))))) @@ -1155,8 +1163,8 @@ (unless (zerop (tn-offset x)) (inst fxch x) ; x to top of stack (unless (location= x y) - (inst fst x))) ; maybe save it - (inst ,inst) ; clobber st0 + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. (unless (zerop (tn-offset y)) (inst fst y)))))) @@ -1224,7 +1232,6 @@ (y :scs (long-reg))) (:arg-types long-float long-float)) - (define-vop (0/single-float float-test) (:translate >) (:args (x :scs (single-reg))) - #!-negative-zero-is-not-zero (:arg-types single-float (:constant (single-float 0f0 0f0))) - #!+negative-zero-is-not-zero - (:arg-types single-float (:constant (single-float -0f0 0f0))) (:variant #x00)) (define-vop (>0/double-float float-test) (:translate >) (:args (x :scs (double-reg))) - #!-negative-zero-is-not-zero (:arg-types double-float (:constant (double-float 0d0 0d0))) - #!+negative-zero-is-not-zero - (:arg-types double-float (:constant (double-float -0d0 0d0))) (:variant #x00)) #!+long-float (define-vop (>0/long-float float-test) (:translate >) (:args (x :scs (long-reg))) - #!-negative-zero-is-not-zero (:arg-types long-float (:constant (long-float 0l0 0l0))) - #!+negative-zero-is-not-zero - (:arg-types long-float (:constant (long-float -0l0 0l0))) (:variant #x00)) #!+long-float @@ -1666,8 +1646,8 @@ #!+long-float (frob %long-float/unsigned %long-float long-reg long-float)) -;;; These should be no-ops but the compiler might want to move -;;; some things around +;;; These should be no-ops but the compiler might want to move some +;;; things around. (macrolet ((frob (name translate from-sc from-type to-sc to-type) `(define-vop (,name) (:args (x :scs (,from-sc) :target y)) @@ -1734,10 +1714,10 @@ ;; Catch any pending FPE exceptions. (inst wait))) (,(if round-p 'progn 'pseudo-atomic) - ;; normal mode (for now) is "round to best" + ;; Normal mode (for now) is "round to best". (with-tn@fp-top (x) ,@(unless round-p - '((inst fnstcw scw) ; save current control word + '((inst fnstcw scw) ; save current control word (move rcw scw) ; into 16-bit register (inst or rcw (ash #b11 10)) ; CHOP (move stack-temp rcw) @@ -1781,7 +1761,7 @@ '((note-this-location vop :internal-error) ;; Catch any pending FPE exceptions. (inst wait))) - ;; normal mode (for now) is "round to best" + ;; Normal mode (for now) is "round to best". (unless (zerop (tn-offset x)) (copy-fp-reg-to-fr0 x)) ,@(unless round-p @@ -1827,7 +1807,7 @@ (signed-reg (inst mov res bits)) (signed-stack - (assert (location= bits res))))) + (aver (location= bits res))))) (single-reg (sc-case bits (signed-reg @@ -1855,7 +1835,7 @@ (storew lo-bits ebp-tn (- (1+ offset))) (with-empty-tn@fp-top(res) (inst fldd (make-ea :dword :base ebp-tn - :disp (- (* (1+ offset) word-bytes)))))))) + :disp (- (* (1+ offset) n-word-bytes)))))))) #!+long-float (define-vop (make-long-float) @@ -1876,7 +1856,7 @@ (storew lo-bits ebp-tn (- (+ offset 2))) (with-empty-tn@fp-top(res) (inst fldl (make-ea :dword :base ebp-tn - :disp (- (* (+ offset 2) word-bytes)))))))) + :disp (- (* (+ offset 2) n-word-bytes)))))))) (define-vop (single-float-bits) (:args (float :scs (single-reg descriptor-reg) @@ -1900,8 +1880,8 @@ (inst mov bits float)) (descriptor-reg (loadw - bits float sb!vm:single-float-value-slot - sb!vm:other-pointer-type)))) + bits float single-float-value-slot + other-pointer-lowtag)))) (signed-stack (sc-case float (single-reg @@ -1924,14 +1904,14 @@ (with-tn@fp-top(float) (let ((where (make-ea :dword :base ebp-tn :disp (- (* (+ 2 (tn-offset temp)) - word-bytes))))) + n-word-bytes))))) (inst fstd where))) (loadw hi-bits ebp-tn (- (1+ (tn-offset temp))))) (double-stack (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) (descriptor-reg - (loadw hi-bits float (1+ sb!vm:double-float-value-slot) - sb!vm:other-pointer-type))))) + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) (define-vop (double-float-low-bits) (:args (float :scs (double-reg descriptor-reg) @@ -1949,14 +1929,14 @@ (with-tn@fp-top(float) (let ((where (make-ea :dword :base ebp-tn :disp (- (* (+ 2 (tn-offset temp)) - word-bytes))))) + n-word-bytes))))) (inst fstd where))) (loadw lo-bits ebp-tn (- (+ 2 (tn-offset temp))))) (double-stack (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) (descriptor-reg - (loadw lo-bits float sb!vm:double-float-value-slot - sb!vm:other-pointer-type))))) + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) #!+long-float (define-vop (long-float-exp-bits) @@ -1975,21 +1955,21 @@ (with-tn@fp-top(float) (let ((where (make-ea :dword :base ebp-tn :disp (- (* (+ 3 (tn-offset temp)) - word-bytes))))) + n-word-bytes))))) (store-long-float where))) (inst movsx exp-bits (make-ea :word :base ebp-tn - :disp (* (- (1+ (tn-offset temp))) word-bytes)))) + :disp (* (- (1+ (tn-offset temp))) n-word-bytes)))) (long-stack (inst movsx exp-bits (make-ea :word :base ebp-tn - :disp (* (- (1+ (tn-offset float))) word-bytes)))) + :disp (* (- (1+ (tn-offset float))) n-word-bytes)))) (descriptor-reg (inst movsx exp-bits (make-ea :word :base float - :disp (- (* (+ 2 sb!vm:long-float-value-slot) - word-bytes) - sb!vm:other-pointer-type))))))) + :disp (- (* (+ 2 long-float-value-slot) + n-word-bytes) + other-pointer-lowtag))))))) #!+long-float (define-vop (long-float-high-bits) @@ -2008,14 +1988,14 @@ (with-tn@fp-top(float) (let ((where (make-ea :dword :base ebp-tn :disp (- (* (+ 3 (tn-offset temp)) - word-bytes))))) + n-word-bytes))))) (store-long-float where))) (loadw hi-bits ebp-tn (- (+ (tn-offset temp) 2)))) (long-stack (loadw hi-bits ebp-tn (- (+ (tn-offset float) 2)))) (descriptor-reg - (loadw hi-bits float (1+ sb!vm:long-float-value-slot) - sb!vm:other-pointer-type))))) + (loadw hi-bits float (1+ long-float-value-slot) + other-pointer-lowtag))))) #!+long-float (define-vop (long-float-low-bits) @@ -2034,14 +2014,14 @@ (with-tn@fp-top(float) (let ((where (make-ea :dword :base ebp-tn :disp (- (* (+ 3 (tn-offset temp)) - word-bytes))))) + n-word-bytes))))) (store-long-float where))) (loadw lo-bits ebp-tn (- (+ (tn-offset temp) 3)))) (long-stack (loadw lo-bits ebp-tn (- (+ (tn-offset float) 3)))) (descriptor-reg - (loadw lo-bits float sb!vm:long-float-value-slot - sb!vm:other-pointer-type))))) + (loadw lo-bits float long-float-value-slot + other-pointer-lowtag))))) ;;;; float mode hackery @@ -2050,9 +2030,9 @@ (defknown ((setf floating-point-modes)) (float-modes) float-modes) -(defconstant npx-env-size (* 7 sb!vm:word-bytes)) -(defconstant npx-cw-offset 0) -(defconstant npx-sw-offset 4) +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) (define-vop (floating-point-modes) (:results (res :scs (unsigned-reg))) @@ -2062,16 +2042,16 @@ (:temporary (:sc unsigned-reg :offset eax-offset :target res :to :result) eax) (:generator 8 - (inst sub esp-tn npx-env-size) ; make space on stack - (inst wait) ; Catch any pending FPE exceptions + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions (inst fstenv (make-ea :dword :base esp-tn)) ; masks all exceptions - (inst fldenv (make-ea :dword :base esp-tn)) ; restore previous state - ;; Current status to high word + (inst fldenv (make-ea :dword :base esp-tn)) ; Restore previous state. + ;; Move current status to high word. (inst mov eax (make-ea :dword :base esp-tn :disp (- npx-sw-offset 2))) - ;; Exception mask to low word + ;; Move exception mask to low word. (inst mov ax-tn (make-ea :word :base esp-tn :disp npx-cw-offset)) - (inst add esp-tn npx-env-size) ; Pop stack - (inst xor eax #x3f) ; Flip exception mask to trap enable bits + (inst add esp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. (move res eax))) (define-vop (set-floating-point-modes) @@ -2084,16 +2064,16 @@ (:temporary (:sc unsigned-reg :offset eax-offset :from :eval :to :result) eax) (:generator 3 - (inst sub esp-tn npx-env-size) ; make space on stack - (inst wait) ; Catch any pending FPE exceptions + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. (inst fstenv (make-ea :dword :base esp-tn)) (inst mov eax new) - (inst xor eax #x3f) ; turn trap enable bits into exception mask + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. (inst mov (make-ea :word :base esp-tn :disp npx-cw-offset) ax-tn) (inst shr eax 16) ; position status word (inst mov (make-ea :word :base esp-tn :disp npx-sw-offset) ax-tn) (inst fldenv (make-ea :dword :base esp-tn)) - (inst add esp-tn npx-env-size) ; Pop stack + (inst add esp-tn npx-env-size) ; Pop stack. (move res new))) #!-long-float @@ -2127,8 +2107,7 @@ (inst fst x))) ; maybe save it (inst ,op) ; clobber st0 (cond ((zerop (tn-offset y)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst y))))))) @@ -2177,114 +2156,9 @@ (inst fxch fr1) (inst fstd y))))) -;;; These versions of fsin, fcos, and ftan try to use argument -;;; reduction but to do this accurately requires greater precision and -;;; it is hopelessly inaccurate. -#+nil -(macrolet ((frob (func trans op) - `(define-vop (,func) - (:translate ,trans) - (:args (x :scs (double-reg) :target fr0)) - (:temporary (:sc unsigned-reg :offset eax-offset - :from :eval :to :result) eax) - (:temporary (:sc unsigned-reg :offset fr0-offset - :from :argument :to :result) fr0) - (:temporary (:sc unsigned-reg :offset fr1-offset - :from :argument :to :result) fr1) - (:results (y :scs (double-reg))) - (:arg-types double-float) - (:result-types double-float) - (:policy :fast-safe) - (:note "inline sin/cos function") - (:vop-var vop) - (:save-p :compute-only) - (:ignore eax) - (:generator 5 - (note-this-location vop :internal-error) - (unless (zerop (tn-offset x)) - (inst fxch x) ; x to top of stack - (unless (location= x y) - (inst fst x))) ; maybe save it - (inst ,op) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :z DONE) - ;; Else x was out of range so reduce it; ST0 is unchanged. - (inst fstp fr1) ; Load 2*PI - (inst fldpi) - (inst fadd fr0) - (inst fxch fr1) - LOOP - (inst fprem1) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :nz LOOP) - (inst ,op) - DONE - (unless (zerop (tn-offset y)) - (inst fstd y)))))) - (frob fsin %sin fsin) - (frob fcos %cos fcos)) - -#+nil -(define-vop (ftan) - (:translate %tan) - (:args (x :scs (double-reg) :target fr0)) - (:temporary (:sc unsigned-reg :offset eax-offset - :from :argument :to :result) eax) - (:temporary (:sc double-reg :offset fr0-offset - :from :argument :to :result) fr0) - (:temporary (:sc double-reg :offset fr1-offset - :from :argument :to :result) fr1) - (:results (y :scs (double-reg))) - (:arg-types double-float) - (:result-types double-float) - (:policy :fast-safe) - (:note "inline tan function") - (:vop-var vop) - (:save-p :compute-only) - (:ignore eax) - (:generator 5 - (note-this-location vop :internal-error) - (case (tn-offset x) - (0 - (inst fstp fr1)) - (1 - (inst fstp fr0)) - (t - (inst fstp fr0) - (inst fstp fr0) - (inst fldd (make-random-tn :kind :normal - :sc (sc-or-lose 'double-reg) - :offset (- (tn-offset x) 2))))) - (inst fptan) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :z DONE) - ;; Else x was out of range so reduce it; ST0 is unchanged. - (inst fldpi) ; Load 2*PI - (inst fadd fr0) - (inst fxch fr1) - LOOP - (inst fprem1) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :nz LOOP) - (inst fstp fr1) - (inst fptan) - DONE - ;; Result is in fr1 - (case (tn-offset y) - (0 - (inst fxch fr1)) - (1) - (t - (inst fxch fr1) - (inst fstd y))))) - -;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if -;;; the argument is out of range 2^63 and would thus be hopelessly -;;; inaccurate. +;;; KLUDGE: these versions of fsin, fcos, and ftan simply load a 0.0 +;;; result if the argument is out of range 2^63 and would thus be +;;; hopelessly inaccurate. (macrolet ((frob (func trans op) `(define-vop (,func) (:translate ,trans) @@ -2355,8 +2229,7 @@ (inst fnstsw) ; status word to ax (inst and ah-tn #x04) ; C2 (inst jmp :z DONE) - ;; Else x was out of range so reduce it; ST0 is unchanged. - (inst fldz) ; Load 0.0 + ;; Else x was out of range so load 0.0 (inst fxch fr1) DONE ;; Result is in fr1 @@ -2368,59 +2241,8 @@ (inst fxch fr1) (inst fstd y))))) -#+nil -(define-vop (fexp) - (:translate %exp) - (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) - (:temporary (:sc double-reg :offset fr0-offset - :from :argument :to :result) fr0) - (:temporary (:sc double-reg :offset fr1-offset - :from :argument :to :result) fr1) - (:temporary (:sc double-reg :offset fr2-offset - :from :argument :to :result) fr2) - (:results (y :scs (double-reg))) - (:arg-types double-float) - (:result-types double-float) - (:policy :fast-safe) - (:note "inline exp function") - (:vop-var vop) - (:save-p :compute-only) - (:generator 5 - (note-this-location vop :internal-error) - (sc-case x - (double-reg - (cond ((zerop (tn-offset x)) - ;; x is in fr0 - (inst fstp fr1) - (inst fldl2e) - (inst fmul fr1)) - (t - ;; x is in a FP reg, not fr0 - (inst fstp fr0) - (inst fldl2e) - (inst fmul x)))) - ((double-stack descriptor-reg) - (inst fstp fr0) - (inst fldl2e) - (if (sc-is x double-stack) - (inst fmuld (ea-for-df-stack x)) - (inst fmuld (ea-for-df-desc x))))) - ;; Now fr0=x log2(e) - (inst fst fr1) - (inst frndint) - (inst fst fr2) - (inst fsubp-sti fr1) - (inst f2xm1) - (inst fld1) - (inst faddp-sti fr1) - (inst fscale) - (inst fld fr0) - (case (tn-offset y) - ((0 1)) - (t (inst fstd y))))) - -;;; Modified exp that handles the following special cases: -;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +;;; %exp that handles the following special cases: exp(+Inf) is +Inf; +;;; exp(-Inf) is 0; exp(NaN) is NaN. (define-vop (fexp) (:translate %exp) (:args (x :scs (double-reg) :target fr0)) @@ -2935,12 +2757,6 @@ (:arg-types double-float) (:result-types double-float) (:policy :fast-safe) - ;; FIXME: PENTIUM isn't used on the *FEATURES* list of the CMU CL I based - ;; SBCL on, even when it is running on a Pentium. Find out what's going - ;; on here and see what the proper value should be. (Perhaps just use the - ;; apparently-conservative value of T always?) For more confusion, see also - ;; apparently-reversed-sense test for the FLOG1P-PENTIUM vop below. - (:guard #!+pentium nil #!-pentium t) (:note "inline log1p function") (:ignore temp) (:generator 5 @@ -2994,12 +2810,11 @@ (:arg-types double-float) (:result-types double-float) (:policy :fast-safe) - ;; FIXME: See comments on DEFINE-VOP FLOG1P :GUARD above. - (:guard #!+pentium t #!-pentium nil) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) (:note "inline log1p with limited x range function") (:vop-var vop) (:save-p :compute-only) - (:generator 5 + (:generator 4 (note-this-location vop :internal-error) (sc-case x (double-reg @@ -3161,6 +2976,10 @@ (descriptor-reg (inst fstp fr0) (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) ;; y in fr0; x not in fr1 ((and (sc-is y double-reg) (zerop (tn-offset y))) (inst fxch fr1) @@ -3232,11 +3051,8 @@ (case (tn-offset r) ((0 1)) (t (inst fstd r))))) - -) ; progn #!-long-float - +) ; PROGN #!-LONG-FLOAT - #!+long-float (progn @@ -3268,12 +3084,11 @@ (inst fst x))) ; maybe save it (inst ,op) ; clobber st0 (cond ((zerop (tn-offset y)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst y))))))) - ;; Quick versions of fsin and fcos that require the argument to be + ;; Quick versions of FSIN and FCOS that require the argument to be ;; within range 2^63. (frob fsin-quick %sin-quick fsin) (frob fcos-quick %cos-quick fcos) @@ -3318,111 +3133,6 @@ (inst fxch fr1) (inst fstd y))))) -;;; These versions of fsin, fcos, and ftan try to use argument -;;; reduction but to do this accurately requires greater precision and -;;; it is hopelessly inaccurate. -#+nil -(macrolet ((frob (func trans op) - `(define-vop (,func) - (:translate ,trans) - (:args (x :scs (long-reg) :target fr0)) - (:temporary (:sc unsigned-reg :offset eax-offset - :from :eval :to :result) eax) - (:temporary (:sc long-reg :offset fr0-offset - :from :argument :to :result) fr0) - (:temporary (:sc long-reg :offset fr1-offset - :from :argument :to :result) fr1) - (:results (y :scs (long-reg))) - (:arg-types long-float) - (:result-types long-float) - (:policy :fast-safe) - (:note "inline sin/cos function") - (:vop-var vop) - (:save-p :compute-only) - (:ignore eax) - (:generator 5 - (note-this-location vop :internal-error) - (unless (zerop (tn-offset x)) - (inst fxch x) ; x to top of stack - (unless (location= x y) - (inst fst x))) ; maybe save it - (inst ,op) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :z DONE) - ;; Else x was out of range so reduce it; ST0 is unchanged. - (inst fstp fr1) ; Load 2*PI - (inst fldpi) - (inst fadd fr0) - (inst fxch fr1) - LOOP - (inst fprem1) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :nz LOOP) - (inst ,op) - DONE - (unless (zerop (tn-offset y)) - (inst fstd y)))))) - (frob fsin %sin fsin) - (frob fcos %cos fcos)) - -#+nil -(define-vop (ftan) - (:translate %tan) - (:args (x :scs (long-reg) :target fr0)) - (:temporary (:sc unsigned-reg :offset eax-offset - :from :argument :to :result) eax) - (:temporary (:sc long-reg :offset fr0-offset - :from :argument :to :result) fr0) - (:temporary (:sc long-reg :offset fr1-offset - :from :argument :to :result) fr1) - (:results (y :scs (long-reg))) - (:arg-types long-float) - (:result-types long-float) - (:policy :fast-safe) - (:note "inline tan function") - (:vop-var vop) - (:save-p :compute-only) - (:ignore eax) - (:generator 5 - (note-this-location vop :internal-error) - (case (tn-offset x) - (0 - (inst fstp fr1)) - (1 - (inst fstp fr0)) - (t - (inst fstp fr0) - (inst fstp fr0) - (inst fldd (make-random-tn :kind :normal - :sc (sc-or-lose 'double-reg) - :offset (- (tn-offset x) 2))))) - (inst fptan) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :z DONE) - ;; Else x was out of range so reduce it; ST0 is unchanged. - (inst fldpi) ; Load 2*PI - (inst fadd fr0) - (inst fxch fr1) - LOOP - (inst fprem1) - (inst fnstsw) ; status word to ax - (inst and ah-tn #x04) ; C2 - (inst jmp :nz LOOP) - (inst fstp fr1) - (inst fptan) - DONE - ;; Result is in fr1 - (case (tn-offset y) - (0 - (inst fxch fr1)) - (1) - (t - (inst fxch fr1) - (inst fstd y))))) - ;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if ;;; the argument is out of range 2^63 and would thus be hopelessly ;;; inaccurate. @@ -4029,8 +3739,6 @@ ;; Perhaps this is OK because of the #!+LONG-FLOAT wrapped around ;; an enormous PROGN above. Still, it would be probably be good to ;; add some code to warn about redefining VOPs. - ;; FIXME 2: See comments on DEFINE-VOP FLOG1P :GUARD above. - (:guard #!+pentium nil #!-pentium t) (:note "inline log1p function") (:ignore temp) (:generator 5 @@ -4084,8 +3792,7 @@ (:arg-types long-float) (:result-types long-float) (:policy :fast-safe) - ;; FIXME: See comments on DEFINE-VOP FLOG1P :GUARD above. - (:guard #!+pentium t #!-pentium) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) (:note "inline log1p function") (:generator 5 (sc-case x @@ -4320,10 +4027,9 @@ ((0 1)) (t (inst fstd r))))) -) ; progn #!+long-float - +) ; PROGN #!+LONG-FLOAT -;;;; Complex float VOPs +;;;; complex float VOPs (define-vop (make-complex-single-float) (:translate complex) @@ -4517,7 +4223,7 @@ (1 (ea-for-clf-imag-desc x))))))) (with-empty-tn@fp-top(r) (inst fldl ea)))) - (t (error "Complex-float-value VOP failure"))))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) (define-vop (realpart/complex-single-float complex-float-value) (:translate realpart) @@ -4580,12 +4286,10 @@ (:result-types long-float) (:note "complex float imagpart") (:variant 1)) - -;;; A hack dummy VOP to bias the representation selection of its -;;; argument towards a FP register which can help avoid consing at -;;; inappropriate locations. - +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations (defknown double-float-reg-bias (double-float) (values)) (define-vop (double-float-reg-bias) (:translate double-float-reg-bias) @@ -4595,7 +4299,6 @@ (:note "inline dummy FP register bias") (:ignore x) (:generator 0)) - (defknown single-float-reg-bias (single-float) (values)) (define-vop (single-float-reg-bias) (:translate single-float-reg-bias)