+(macrolet ((frob (op cost commutativep
+ duplicate-inst op-inst real-move-inst complex-move-inst
+ real-sc real-constant-sc real-type
+ complex-sc complex-constant-sc complex-type
+ real-complex-name complex-real-name)
+ (cond ((not duplicate-inst) ; simple case
+ `(flet ((load-into (r x)
+ (sc-case x
+ (,real-constant-sc
+ (inst ,real-move-inst r
+ (register-inline-constant (tn-value x))))
+ (,complex-constant-sc
+ (inst ,complex-move-inst r
+ (register-inline-constant (tn-value x))))
+ (t (move r x)))))
+ ,(when real-complex-name
+ `(define-vop (,real-complex-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,real-sc ,real-constant-sc)
+ :target r
+ :load-if (not (sc-is x ,real-constant-sc)))
+ (y :scs (,complex-sc ,complex-constant-sc)
+ ,@(when commutativep '(:target r))
+ :load-if (not (sc-is y ,complex-constant-sc))))
+ (:arg-types ,real-type ,complex-type)
+ (:results (r :scs (,complex-sc)
+ ,@(unless commutativep '(:from (:argument 0)))))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ ,(when commutativep
+ `(when (location= y r)
+ (rotatef x y)))
+ (load-into r x)
+ (when (sc-is y ,real-constant-sc ,complex-constant-sc)
+ (setf y (register-inline-constant
+ :aligned (tn-value y))))
+ (inst ,op-inst r y))))
+
+ ,(when complex-real-name
+ `(define-vop (,complex-real-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,complex-sc ,complex-constant-sc)
+ :target r
+ :load-if (not (sc-is x ,complex-constant-sc)))
+ (y :scs (,real-sc ,real-constant-sc)
+ ,@(when commutativep '(:target r))
+ :load-if (not (sc-is y ,real-constant-sc))))
+ (:arg-types ,complex-type ,real-type)
+ (:results (r :scs (,complex-sc)
+ ,@(unless commutativep '(:from (:argument 0)))))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ ,(when commutativep
+ `(when (location= y r)
+ (rotatef x y)))
+ (load-into r x)
+ (when (sc-is y ,real-constant-sc ,complex-constant-sc)
+ (setf y (register-inline-constant
+ :aligned (tn-value y))))
+ (inst ,op-inst r y))))))
+ (commutativep ; must duplicate, but commutative
+ `(progn
+ ,(when real-complex-name
+ `(define-vop (,real-complex-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,real-sc ,real-constant-sc)
+ :target dup
+ :load-if (not (sc-is x ,real-constant-sc)))
+ (y :scs (,complex-sc ,complex-constant-sc)
+ :target r
+ :to :result
+ :load-if (not (sc-is y ,complex-constant-sc))))
+ (:arg-types ,real-type ,complex-type)
+ (:temporary (:sc ,complex-sc :target r
+ :from (:argument 0)
+ :to :result)
+ dup)
+ (:results (r :scs (,complex-sc)))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ (if (sc-is x ,real-constant-sc)
+ (inst ,complex-move-inst dup
+ (register-inline-constant
+ (complex (tn-value x) (tn-value x))))
+ (let ((real x))
+ ,duplicate-inst))
+ ;; safe: dup /= y
+ (when (location= dup r)
+ (rotatef dup y))
+ (if (sc-is y ,complex-constant-sc)
+ (inst ,complex-move-inst r
+ (register-inline-constant (tn-value y)))
+ (move r y))
+ (when (sc-is dup ,complex-constant-sc)
+ (setf dup (register-inline-constant
+ :aligned (tn-value dup))))
+ (inst ,op-inst r dup))))
+
+ ,(when complex-real-name
+ `(define-vop (,complex-real-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,complex-sc ,complex-constant-sc)
+ :target r
+ :to :result
+ :load-if (not (sc-is x ,complex-constant-sc)))
+ (y :scs (,real-sc ,real-constant-sc)
+ :target dup
+ :load-if (not (sc-is y ,real-constant-sc))))
+ (:arg-types ,complex-type ,real-type)
+ (:temporary (:sc ,complex-sc :target r
+ :from (:argument 1)
+ :to :result)
+ dup)
+ (:results (r :scs (,complex-sc)))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ (if (sc-is y ,real-constant-sc)
+ (inst ,complex-move-inst dup
+ (register-inline-constant
+ (complex (tn-value y) (tn-value y))))
+ (let ((real y))
+ ,duplicate-inst))
+ (when (location= dup r)
+ (rotatef x dup))
+ (if (sc-is x ,complex-constant-sc)
+ (inst ,complex-move-inst r
+ (register-inline-constant (tn-value x)))
+ (move r x))
+ (when (sc-is dup ,complex-constant-sc)
+ (setf dup (register-inline-constant
+ :aligned (tn-value dup))))
+ (inst ,op-inst r dup))))))
+ (t ; duplicate, not commutative
+ `(progn
+ ,(when real-complex-name
+ `(define-vop (,real-complex-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,real-sc ,real-constant-sc)
+ :target r
+ :load-if (not (sc-is x ,real-constant-sc)))
+ (y :scs (,complex-sc ,complex-constant-sc)
+ :to :result
+ :load-if (not (sc-is y ,complex-constant-sc))))
+ (:arg-types ,real-type ,complex-type)
+ (:results (r :scs (,complex-sc) :from (:argument 0)))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ (if (sc-is x ,real-constant-sc)
+ (inst ,complex-move-inst dup
+ (register-inline-constant
+ (complex (tn-value x) (tn-value x))))
+ (let ((real x)
+ (dup r))
+ ,duplicate-inst))
+ (when (sc-is y ,complex-constant-sc)
+ (setf y (register-inline-constant
+ :aligned (tn-value y))))
+ (inst ,op-inst r y))))
+
+ ,(when complex-real-name
+ `(define-vop (,complex-real-name float-op)
+ (:translate ,op)
+ (:args (x :scs (,complex-sc)
+ :target r
+ :to :eval)
+ (y :scs (,real-sc ,real-constant-sc)
+ :target dup
+ :load-if (not (sc-is y ,complex-constant-sc))))
+ (:arg-types ,complex-type ,real-type)
+ (:temporary (:sc ,complex-sc :from (:argument 1))
+ dup)
+ (:results (r :scs (,complex-sc) :from :eval))
+ (:result-types ,complex-type)
+ (:generator ,cost
+ (if (sc-is y ,real-constant-sc)
+ (setf dup (register-inline-constant
+ :aligned (complex (tn-value y)
+ (tn-value y))))
+ (let ((real y))
+ ,duplicate-inst))
+ (move r x)
+ (inst ,op-inst r dup))))))))
+ (def-real-complex-op (op commutativep duplicatep
+ single-inst single-real-complex-name single-complex-real-name single-cost
+ double-inst double-real-complex-name double-complex-real-name double-cost)
+ `(progn
+ (frob ,op ,single-cost ,commutativep
+ ,(and duplicatep
+ `(progn
+ (move dup real)
+ (inst unpcklps dup dup)))
+ ,single-inst movss movq
+ single-reg fp-single-immediate single-float
+ complex-single-reg fp-complex-single-immediate complex-single-float
+ ,single-real-complex-name ,single-complex-real-name)
+ (frob ,op ,double-cost ,commutativep
+ ,(and duplicatep
+ `(progn
+ (move dup real)
+ (inst unpcklpd dup dup)))
+ ,double-inst movsd movapd
+ double-reg fp-double-immediate double-float
+ complex-double-reg fp-complex-double-immediate complex-double-float
+ ,double-real-complex-name ,double-complex-real-name))))
+ (def-real-complex-op + t nil
+ addps +/real-complex-single-float +/complex-real-single-float 3
+ addpd +/real-complex-double-float +/complex-real-double-float 4)
+ (def-real-complex-op - nil nil
+ subps -/real-complex-single-float -/complex-real-single-float 3
+ subpd -/real-complex-double-float -/complex-real-double-float 4)
+ (def-real-complex-op * t t
+ mulps */real-complex-single-float */complex-real-single-float 4
+ mulpd */real-complex-double-float */complex-real-double-float 5)
+ (def-real-complex-op / nil t
+ nil nil nil nil
+ divpd nil //complex-real-double-float 19))
+
+(define-vop (//complex-real-single-float float-op)
+ (:translate /)
+ (:args (x :scs (complex-single-reg fp-complex-single-immediate fp-complex-single-zero)
+ :to (:result 0)
+ :target r
+ :load-if (not (sc-is x fp-complex-single-immediate fp-complex-single-zero)))
+ (y :scs (single-reg fp-single-immediate fp-single-zero)
+ :target dup
+ :load-if (not (sc-is y fp-single-immediate fp-single-zero))))
+ (:arg-types complex-single-float single-float)
+ (:temporary (:sc complex-single-reg :from (:argument 1)) dup)
+ (:results (r :scs (complex-single-reg)))
+ (:result-types complex-single-float)
+ (:generator 12
+ (flet ((duplicate (x)
+ (let ((word (ldb (byte 64 0)
+ (logior (ash (single-float-bits (imagpart x)) 32)
+ (ldb (byte 32 0)
+ (single-float-bits (realpart x)))))))
+ (register-inline-constant :oword (logior (ash word 64) word)))))
+ (sc-case y
+ (fp-single-immediate
+ (setf dup (duplicate (complex (tn-value y) (tn-value y)))))
+ (fp-single-zero
+ (inst xorps dup dup))
+ (t (move dup y)
+ (inst shufps dup dup #b00000000)))
+ (sc-case x
+ (fp-complex-single-immediate
+ (inst movaps r (duplicate (tn-value x))))
+ (fp-complex-single-zero
+ (inst xorps r r))
+ (t
+ (move r x)
+ (inst unpcklpd r r)))
+ (inst divps r dup)
+ (inst movq r r))))
+
+;; Complex multiplication
+;; r := rx * ry - ix * iy
+;; i := rx * iy + ix * ry
+;;
+;; Transpose for SIMDness
+;; rx*ry rx*iy
+;; -ix*iy +ix*ry
+;;
+;; [rx rx] * [ry iy]
+;;+ [ix ix] * [-iy ry]
+;; [r i]
+
+(macrolet ((define-complex-* (name cost type sc tmp-p &body body)
+ `(define-vop (,name float-op)
+ (:translate *)
+ (:args (x :scs (,sc) :target r)
+ (y :scs (,sc) :target copy-y))
+ (:arg-types ,type ,type)
+ (:temporary (:sc ,sc) imag)
+ (:temporary (:sc ,sc :from :eval) copy-y)
+ ,@(when tmp-p
+ `((:temporary (:sc ,sc) xmm)))
+ (:results (r :scs (,sc) :from :eval))
+ (:result-types ,type)
+ (:generator ,cost
+ (when (or (location= x copy-y)
+ (location= y r))
+ (rotatef x y))
+ ,@body))))
+ (define-complex-* */complex-single-float 20
+ complex-single-float complex-single-reg t
+ (inst xorps xmm xmm)
+ (move r x)
+ (inst unpcklps r r)
+ (move imag r)
+ (inst unpckhpd imag xmm)
+ (inst unpcklpd r xmm)
+ (move copy-y y) ; y == r only if y == x == r
+ (setf y copy-y)
+
+ (inst mulps r y)
+
+ (inst shufps y y #b11110001)
+ (inst xorps y (register-inline-constant :oword (ash 1 31)))
+
+ (inst mulps imag y)
+ (inst addps r imag))
+ (define-complex-* */complex-double-float 25
+ complex-double-float complex-double-reg nil
+ (move imag x)
+ (move r x)
+ (move copy-y y)
+ (setf y copy-y)
+ (inst unpcklpd r r)
+ (inst unpckhpd imag imag)
+
+ (inst mulpd r y)
+
+ (inst shufpd y y #b01)
+ (inst xorpd y (register-inline-constant :oword (ash 1 63)))
+
+ (inst mulpd imag y)
+ (inst addpd r imag)))
+
+(define-vop (fsqrt)
+ (:args (x :scs (double-reg)))
+ (:results (y :scs (double-reg)))
+ (:translate %sqrt)
+ (:policy :fast-safe)
+ (:arg-types double-float)
+ (:result-types double-float)
+ (:note "inline float arithmetic")
+ (:vop-var vop)
+ (:save-p :compute-only)
+ (:generator 1
+ (unless (location= x y)
+ (inst xorpd y y))
+ (note-this-location vop :internal-error)
+ (inst sqrtsd y x)))