X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Ffloat.lisp;h=62d186ec7ab865091a63a28b1892912068ab92fa;hb=986ce2596822cc0871b609346aaf592348aca596;hp=89e5d715637b96a82dbc144ea7b3c6f47e7bb22d;hpb=a530bbe337109d898d5b4a001fc8f1afa3b5dc39;p=sbcl.git diff --git a/src/compiler/x86/float.lisp b/src/compiler/x86/float.lisp index 89e5d71..62d186e 100644 --- a/src/compiler/x86/float.lisp +++ b/src/compiler/x86/float.lisp @@ -10,43 +10,41 @@ ;;;; files for more information. (in-package "SB!VM") - -(file-comment - "$Header$") (macrolet ((ea-for-xf-desc (tn slot) `(make-ea :dword :base ,tn - :disp (- (* ,slot sb!vm:word-bytes) sb!vm:other-pointer-type)))) + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) (defun ea-for-sf-desc (tn) - (ea-for-xf-desc tn sb!vm:single-float-value-slot)) + (ea-for-xf-desc tn single-float-value-slot)) (defun ea-for-df-desc (tn) - (ea-for-xf-desc tn sb!vm:double-float-value-slot)) + (ea-for-xf-desc tn double-float-value-slot)) #!+long-float (defun ea-for-lf-desc (tn) - (ea-for-xf-desc tn sb!vm:long-float-value-slot)) + (ea-for-xf-desc tn long-float-value-slot)) ;; complex floats (defun ea-for-csf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-single-float-real-slot)) + (ea-for-xf-desc tn complex-single-float-real-slot)) (defun ea-for-csf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-single-float-imag-slot)) + (ea-for-xf-desc tn complex-single-float-imag-slot)) (defun ea-for-cdf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-double-float-real-slot)) + (ea-for-xf-desc tn complex-double-float-real-slot)) (defun ea-for-cdf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-double-float-imag-slot)) + (ea-for-xf-desc tn complex-double-float-imag-slot)) #!+long-float (defun ea-for-clf-real-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-long-float-real-slot)) + (ea-for-xf-desc tn complex-long-float-real-slot)) #!+long-float (defun ea-for-clf-imag-desc (tn) - (ea-for-xf-desc tn sb!vm:complex-long-float-imag-slot))) + (ea-for-xf-desc tn complex-long-float-imag-slot))) (macrolet ((ea-for-xf-stack (tn kind) `(make-ea :dword :base ebp-tn :disp (- (* (+ (tn-offset ,tn) (ecase ,kind (:single 1) (:double 2) (:long 3))) - sb!vm:word-bytes))))) + n-word-bytes))))) (defun ea-for-sf-stack (tn) (ea-for-xf-stack tn :single)) (defun ea-for-df-stack (tn) @@ -55,7 +53,22 @@ (defun ea-for-lf-stack (tn) (ea-for-xf-stack tn :long))) -;;; Complex float stack EAs +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +(defun maybe-fp-wait (node &optional note-next-instruction) + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs (macrolet ((ea-for-cxf-stack (tn kind slot &optional base) `(make-ea :dword :base ,base @@ -65,7 +78,7 @@ (:double 2) (:long 3)) (ecase ,slot (:real 1) (:imag 2)))) - sb!vm:word-bytes))))) + n-word-bytes))))) (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) (ea-for-cxf-stack tn :single :real base)) (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) @@ -88,7 +101,7 @@ ;;; ;;; Using a Pop then load. (defun copy-fp-reg-to-fr0 (reg) - (assert (not (zerop (tn-offset reg)))) + (aver (not (zerop (tn-offset reg)))) (inst fstp fr0-tn) (inst fld (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) @@ -96,7 +109,7 @@ ;;; Using Fxch then Fst to restore the original reg contents. #+nil (defun copy-fp-reg-to-fr0 (reg) - (assert (not (zerop (tn-offset reg)))) + (aver (not (zerop (tn-offset reg)))) (inst fxch reg) (inst fst reg)) @@ -158,12 +171,12 @@ ;; This may not be necessary as ST0 is likely invalid now. (inst fxch x)))) -;;; The i387 has instructions to load some useful constants. -;;; This doesn't save much time but might cut down on memory -;;; access and reduce the size of the constant vector (CV). -;;; Intel claims they are stored in a more precise form on chip. -;;; Anyhow, might as well use the feature. It can be turned -;;; off by hacking the "immediate-constant-sc" in vm.lisp. +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. (define-move-function (load-fp-constant 2) (vop x y) ((fp-constant) (single-reg double-reg #!+long-float long-reg)) (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) @@ -182,7 +195,7 @@ (inst fldlg2)) ((= value (log 2l0 2.718281828459045235360287471352662L0)) (inst fldln2)) - (t (warn "Ignoring bogus i387 Constant ~A" value)))))) + (t (warn "ignoring bogus i387 constant ~A" value)))))) ;;;; complex float move functions @@ -210,7 +223,7 @@ (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) :offset (1+ (tn-offset x)))) -;;; x is source, y is destination +;;; x is source, y is destination. (define-move-function (load-complex-single 2) (vop x y) ((complex-single-stack) (complex-single-reg)) (let ((real-tn (complex-single-reg-real-tn y))) @@ -285,7 +298,7 @@ ;;;; move VOPs -;;; Float register to register moves. +;;; float register to register moves (define-vop (float-move) (:args (x)) (:results (y)) @@ -375,8 +388,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:single-float-type - sb!vm:single-float-size node) + single-float-widetag + single-float-size node) (with-tn@fp-top(x) (inst fst (ea-for-sf-desc y)))))) (define-move-vop move-from-single :move @@ -389,8 +402,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:double-float-type - sb!vm:double-float-size + double-float-widetag + double-float-size node) (with-tn@fp-top(x) (inst fstd (ea-for-df-desc y)))))) @@ -405,8 +418,8 @@ (:note "float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:long-float-type - sb!vm:long-float-size + long-float-widetag + long-float-size node) (with-tn@fp-top(x) (store-long-float (ea-for-lf-desc y)))))) @@ -442,7 +455,7 @@ (define-move-vop move-from-fp-constant :move (fp-constant) (descriptor-reg)) -;;; Move from a descriptor to a float register +;;; Move from a descriptor to a float register. (define-vop (move-to-single) (:args (x :scs (descriptor-reg))) (:results (y :scs (single-reg))) @@ -471,7 +484,6 @@ (inst fldl (ea-for-lf-desc x))))) #!+long-float (define-move-vop move-to-long :move (descriptor-reg) (long-reg)) - ;;; Move from complex float to a descriptor reg. allocating a new ;;; complex float object in the process. @@ -482,8 +494,9 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-single-float-type - sb!vm:complex-single-float-size node) + complex-single-float-widetag + complex-single-float-size + node) (let ((real-tn (complex-single-reg-real-tn x))) (with-tn@fp-top(real-tn) (inst fst (ea-for-csf-real-desc y)))) @@ -500,8 +513,8 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-double-float-type - sb!vm:complex-double-float-size + complex-double-float-widetag + complex-double-float-size node) (let ((real-tn (complex-double-reg-real-tn x))) (with-tn@fp-top(real-tn) @@ -520,8 +533,8 @@ (:note "complex float to pointer coercion") (:generator 13 (with-fixed-allocation (y - sb!vm:complex-long-float-type - sb!vm:complex-long-float-size + complex-long-float-widetag + complex-long-float-size node) (let ((real-tn (complex-long-reg-real-tn x))) (with-tn@fp-top(real-tn) @@ -533,7 +546,7 @@ (define-move-vop move-from-complex-long :move (complex-long-reg) (descriptor-reg)) -;;; Move from a descriptor to a complex float register +;;; Move from a descriptor to a complex float register. (macrolet ((frob (name sc format) `(progn (define-vop (,name) @@ -560,14 +573,13 @@ (frob move-to-complex-double complex-double-reg :double) #!+long-float (frob move-to-complex-double complex-long-reg :long)) - -;;;; The move argument vops. +;;;; the move argument vops ;;;; -;;;; Note these are also used to stuff fp numbers onto the c-call stack -;;;; so the order is different than the lisp-stack. +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. -;;; The general move-argument vop +;;; the general move-argument vop (macrolet ((frob (name sc stack-sc format) `(progn (define-vop (,name) @@ -590,7 +602,7 @@ (inst fxch x))))) (,stack-sc (if (= (tn-offset fp) esp-offset) - (let* ((offset (* (tn-offset y) word-bytes)) + (let* ((offset (* (tn-offset y) n-word-bytes)) (ea (make-ea :dword :base fp :disp offset))) (with-tn@fp-top(x) ,@(ecase format @@ -605,7 +617,7 @@ (:single 1) (:double 2) (:long 3))) - sb!vm:word-bytes))))) + n-word-bytes))))) (with-tn@fp-top(x) ,@(ecase format (:single '((inst fst ea))) @@ -619,7 +631,7 @@ #!+long-float (frob move-long-float-argument long-reg long-stack :long)) -;;;; Complex float move-argument vop +;;;; complex float move-argument vop (macrolet ((frob (name sc stack-sc format) `(progn (define-vop (,name) @@ -705,7 +717,7 @@ ;;;; arithmetic VOPs -;;; dtc: The floating point arithmetic vops. +;;; dtc: the floating point arithmetic vops ;;; ;;; Note: Although these can accept x and y on the stack or pointed to ;;; from a descriptor register, they will work with register loading @@ -795,9 +807,7 @@ (inst fld (ea-for-sf-desc y))))) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((and (sc-is y single-reg) (location= y r)) (cond ((zerop (tn-offset r)) @@ -823,10 +833,8 @@ (inst fld (ea-for-sf-desc x))))) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) - ;; The default case + (maybe-fp-wait node vop)) + ;; the default case (t ;; Get the result to ST0. @@ -875,12 +883,11 @@ (note-next-instruction vop :internal-error) - ;; Finally save the result + ;; Finally save the result. (sc-case r (single-reg (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))) (single-stack @@ -903,7 +910,7 @@ (:save-p :compute-only) (:node-var node) (:generator ,dcost - ;; Handle a few special cases + ;; Handle a few special cases. (cond ;; x, y, and r are the same register. ((and (sc-is x double-reg) (location= x r) (location= y r)) @@ -941,9 +948,7 @@ (inst fldd (ea-for-df-desc y))))) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((and (sc-is y double-reg) (location= y r)) (cond ((zerop (tn-offset r)) @@ -969,10 +974,8 @@ (inst fldd (ea-for-df-desc x))))) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) - ;; The default case + (maybe-fp-wait node vop)) + ;; the default case (t ;; Get the result to ST0. @@ -1021,12 +1024,11 @@ (note-next-instruction vop :internal-error) - ;; Finally save the result + ;; Finally save the result. (sc-case r (double-reg (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))) (double-stack @@ -1048,7 +1050,7 @@ (:save-p :compute-only) (:node-var node) (:generator ,lcost - ;; Handle a few special cases + ;; Handle a few special cases. (cond ;; x, y, and r are the same register. ((and (location= x r) (location= y r)) @@ -1072,9 +1074,7 @@ (copy-fp-reg-to-fr0 y)) ;; ST(i) = ST(i) op ST0 (inst ,fop-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; y and r are the same register. ((location= y r) (cond ((zerop (tn-offset r)) @@ -1086,9 +1086,7 @@ (copy-fp-reg-to-fr0 x)) ;; ST(i) = ST(0) op ST(i) (inst ,fopr-sti r))) - (when (policy node (or (= debug 3) (> safety speed))) - (note-next-instruction vop :internal-error) - (inst wait))) + (maybe-fp-wait node vop)) ;; the default case (t ;; Get the result to ST0. @@ -1114,8 +1112,7 @@ ;; Finally save the result. (cond ((zerop (tn-offset r)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst r)))))))))) @@ -1155,8 +1152,8 @@ (unless (zerop (tn-offset x)) (inst fxch x) ; x to top of stack (unless (location= x y) - (inst fst x))) ; maybe save it - (inst ,inst) ; clobber st0 + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. (unless (zerop (tn-offset y)) (inst fst y)))))) @@ -1224,7 +1221,6 @@ (y :scs (long-reg))) (:arg-types long-float long-float)) - (define-vop ( safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst y))))))) @@ -2935,12 +2930,6 @@ (:arg-types double-float) (:result-types double-float) (:policy :fast-safe) - ;; FIXME: PENTIUM isn't used on the *FEATURES* list of the CMU CL I based - ;; SBCL on, even when it is running on a Pentium. Find out what's going - ;; on here and see what the proper value should be. (Perhaps just use the - ;; apparently-conservative value of T always?) For more confusion, see also - ;; apparently-reversed-sense test for the FLOG1P-PENTIUM vop below. - (:guard #!+pentium nil #!-pentium t) (:note "inline log1p function") (:ignore temp) (:generator 5 @@ -2994,12 +2983,11 @@ (:arg-types double-float) (:result-types double-float) (:policy :fast-safe) - ;; FIXME: See comments on DEFINE-VOP FLOG1P :GUARD above. - (:guard #!+pentium t #!-pentium nil) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) (:note "inline log1p with limited x range function") (:vop-var vop) (:save-p :compute-only) - (:generator 5 + (:generator 4 (note-this-location vop :internal-error) (sc-case x (double-reg @@ -3232,11 +3220,8 @@ (case (tn-offset r) ((0 1)) (t (inst fstd r))))) - -) ; progn #!-long-float - +) ; PROGN #!-LONG-FLOAT - #!+long-float (progn @@ -3268,12 +3253,11 @@ (inst fst x))) ; maybe save it (inst ,op) ; clobber st0 (cond ((zerop (tn-offset y)) - (when (policy node (or (= debug 3) (> safety speed))) - (inst wait))) + (maybe-fp-wait node)) (t (inst fst y))))))) - ;; Quick versions of fsin and fcos that require the argument to be + ;; Quick versions of FSIN and FCOS that require the argument to be ;; within range 2^63. (frob fsin-quick %sin-quick fsin) (frob fcos-quick %cos-quick fcos) @@ -4320,10 +4304,9 @@ ((0 1)) (t (inst fstd r))))) -) ; progn #!+long-float - +) ; PROGN #!+LONG-FLOAT -;;;; Complex float VOPs +;;;; complex float VOPs (define-vop (make-complex-single-float) (:translate complex) @@ -4517,7 +4500,7 @@ (1 (ea-for-clf-imag-desc x))))))) (with-empty-tn@fp-top(r) (inst fldl ea)))) - (t (error "Complex-float-value VOP failure"))))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) (define-vop (realpart/complex-single-float complex-float-value) (:translate realpart) @@ -4580,12 +4563,10 @@ (:result-types long-float) (:note "complex float imagpart") (:variant 1)) - -;;; A hack dummy VOP to bias the representation selection of its -;;; argument towards a FP register which can help avoid consing at -;;; inappropriate locations. - +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations (defknown double-float-reg-bias (double-float) (values)) (define-vop (double-float-reg-bias) (:translate double-float-reg-bias) @@ -4595,7 +4576,6 @@ (:note "inline dummy FP register bias") (:ignore x) (:generator 0)) - (defknown single-float-reg-bias (single-float) (values)) (define-vop (single-float-reg-bias) (:translate single-float-reg-bias)