From a157ed0be79751f85b8243c06102eea95af06aa3 Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Thu, 25 Jun 2009 15:37:05 +0000 Subject: [PATCH] 1.0.29.44: Complex float improvements * On all platforms: - Slightly more stable complex-complex float (double and single) division; - New transform for real-complex division; - complex-real and real-complex float addition and subtraction behave as though the real was first upgraded to a complex, thus losing the sign of any imaginary zero. * On x86-64 - Complexes floats are represented packed in a single SSE register; - VOPs for all four arithmetic operations, complex-complex, but also complex-real and real-complex, except for complex-complex and real-complex division; - VOPs for =, negate and conjugate of complexes (complex-real and complex-complex); - VOPs for EQL of floats (real and complexes). - Full register moves for float values in SSE registers should also speed scalar operations up. --- base-target-features.lisp-expr | 12 + make-config.sh | 2 +- package-data-list.lisp-expr | 3 + src/compiler/assem.lisp | 2 +- src/compiler/float-tran.lisp | 76 ++++- src/compiler/generic/genesis.lisp | 15 +- src/compiler/generic/objdef.lisp | 6 +- src/compiler/generic/vm-tran.lisp | 3 +- src/compiler/srctran.lisp | 8 +- src/compiler/x86-64/array.lisp | 94 +----- src/compiler/x86-64/cell.lisp | 90 +---- src/compiler/x86-64/float.lisp | 655 +++++++++++++++++++++++++++---------- src/compiler/x86-64/insts.lisp | 6 + src/compiler/x86-64/macros.lisp | 10 +- src/compiler/x86-64/vm.lisp | 38 +-- src/runtime/print.c | 8 + tests/arith.pure.lisp | 1 - tests/float.pure.lisp | 79 +++++ 18 files changed, 736 insertions(+), 372 deletions(-) diff --git a/base-target-features.lisp-expr b/base-target-features.lisp-expr index ba12b0d..f389507 100644 --- a/base-target-features.lisp-expr +++ b/base-target-features.lisp-expr @@ -155,6 +155,18 @@ ;; ; :cycle-counter + ;; Enabled automatically for platforms which implement complex arithmetic + ;; VOPs. Such platforms should implement real-complex, complex-real and + ;; complex-complex addition and subtractions (for complex-single-float + ;; and complex-double-float). They should also also implement complex-real + ;; and real-complex multiplication, complex-real division, and + ;; sb!vm::swap-complex, which swaps the real and imaginary parts. + ;; Finally, they should implement conjugate and complex-real, real-complex + ;; and complex-complex CL:= (complex-complex EQL would usually be a good + ;; idea). + ;; + ; :complex-float-vops + ;; Peter Van Eynde's increase-bulletproofness code for CMU CL ;; ;; Some of the code which was #+high-security before the fork has now diff --git a/make-config.sh b/make-config.sh index a43a059..468c79f 100644 --- a/make-config.sh +++ b/make-config.sh @@ -296,7 +296,7 @@ elif [ "$sbcl_arch" = "x86-64" ]; then printf ' :compare-and-swap-vops :unwind-to-frame-and-call-vop :raw-instance-init-vops' >> $ltf printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf - printf ' :alien-callbacks :cycle-counter' >> $ltf + printf ' :alien-callbacks :cycle-counter :complex-float-vops' >> $ltf elif [ "$sbcl_arch" = "mips" ]; then printf ' :linkage-table' >> $ltf printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index ed9736c..a7edc0d 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -2460,7 +2460,10 @@ structure representations" #!+long-float "COMPLEX-LONG-FLOAT-WIDETAG" #!+long-float "COMPLEX-LONG-REG-SC-NUMBER" #!+long-float "COMPLEX-LONG-STACK-SC-NUMBER" + #!-x86-64 #!-x86-64 "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT" + #!+x86-64 + "COMPLEX-SINGLE-FLOAT-DATA-SLOT" "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG" "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER" "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" diff --git a/src/compiler/assem.lisp b/src/compiler/assem.lisp index 797fd83..5fa14f2 100644 --- a/src/compiler/assem.lisp +++ b/src/compiler/assem.lisp @@ -107,7 +107,7 @@ #!+sb-dyncount (collect-dynamic-statistics nil)) (sb!c::defprinter (segment) - name) + type) (declaim (inline segment-current-index)) (defun segment-current-index (segment) diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp index 681aa48..c44d149 100644 --- a/src/compiler/float-tran.lisp +++ b/src/compiler/float-tran.lisp @@ -1178,27 +1178,45 @@ ;;; of complex operation VOPs. (macrolet ((frob (type) `(progn + (deftransform complex ((r) (,type)) + '(complex r ,(coerce 0 type))) + (deftransform complex ((r i) (,type (and real (not ,type)))) + '(complex r (truly-the ,type (coerce i ',type)))) + (deftransform complex ((r i) ((and real (not ,type)) ,type)) + '(complex (truly-the ,type (coerce r ',type)) i)) ;; negation + #!-complex-float-vops (deftransform %negate ((z) ((complex ,type)) *) '(complex (%negate (realpart z)) (%negate (imagpart z)))) ;; complex addition and subtraction + #!-complex-float-vops (deftransform + ((w z) ((complex ,type) (complex ,type)) *) '(complex (+ (realpart w) (realpart z)) (+ (imagpart w) (imagpart z)))) + #!-complex-float-vops (deftransform - ((w z) ((complex ,type) (complex ,type)) *) '(complex (- (realpart w) (realpart z)) (- (imagpart w) (imagpart z)))) ;; Add and subtract a complex and a real. + #!-complex-float-vops (deftransform + ((w z) ((complex ,type) real) *) - '(complex (+ (realpart w) z) (imagpart w))) + `(complex (+ (realpart w) z) + (+ (imagpart w) ,(coerce 0 ',type)))) + #!-complex-float-vops (deftransform + ((z w) (real (complex ,type)) *) - '(complex (+ (realpart w) z) (imagpart w))) + `(complex (+ (realpart w) z) + (+ (imagpart w) ,(coerce 0 ',type)))) ;; Add and subtract a real and a complex number. + #!-complex-float-vops (deftransform - ((w z) ((complex ,type) real) *) - '(complex (- (realpart w) z) (imagpart w))) + `(complex (- (realpart w) z) + (- (imagpart w) ,(coerce 0 ',type)))) + #!-complex-float-vops (deftransform - ((z w) (real (complex ,type)) *) - '(complex (- z (realpart w)) (- (imagpart w)))) + `(complex (- z (realpart w)) + (- ,(coerce 0 ',type) (imagpart w)))) ;; Multiply and divide two complex numbers. + #!-complex-float-vops (deftransform * ((x y) ((complex ,type) (complex ,type)) *) '(let* ((rx (realpart x)) (ix (imagpart x)) @@ -1207,39 +1225,81 @@ (complex (- (* rx ry) (* ix iy)) (+ (* rx iy) (* ix ry))))) (deftransform / ((x y) ((complex ,type) (complex ,type)) *) + #!-complex-float-vops '(let* ((rx (realpart x)) (ix (imagpart x)) (ry (realpart y)) (iy (imagpart y))) (if (> (abs ry) (abs iy)) (let* ((r (/ iy ry)) - (dn (* ry (+ 1 (* r r))))) + (dn (+ ry (* r iy)))) (complex (/ (+ rx (* ix r)) dn) (/ (- ix (* rx r)) dn))) (let* ((r (/ ry iy)) - (dn (* iy (+ 1 (* r r))))) + (dn (+ iy (* r ry)))) (complex (/ (+ (* rx r) ix) dn) - (/ (- (* ix r) rx) dn)))))) + (/ (- (* ix r) rx) dn))))) + #!+complex-float-vops + `(let* ((cs (conjugate (sb!vm::swap-complex x))) + (ry (realpart y)) + (iy (imagpart y))) + (if (> (abs ry) (abs iy)) + (let* ((r (/ iy ry)) + (dn (+ ry (* r iy)))) + (/ (+ x (* cs r)) dn)) + (let* ((r (/ ry iy)) + (dn (+ iy (* r ry)))) + (/ (+ (* x r) cs) dn))))) ;; Multiply a complex by a real or vice versa. + #!-complex-float-vops (deftransform * ((w z) ((complex ,type) real) *) '(complex (* (realpart w) z) (* (imagpart w) z))) + #!-complex-float-vops (deftransform * ((z w) (real (complex ,type)) *) '(complex (* (realpart w) z) (* (imagpart w) z))) - ;; Divide a complex by a real. + ;; Divide a complex by a real or vice versa. + #!-complex-float-vops (deftransform / ((w z) ((complex ,type) real) *) '(complex (/ (realpart w) z) (/ (imagpart w) z))) + (deftransform / ((x y) (,type (complex ,type)) *) + #!-complex-float-vops + '(let* ((ry (realpart y)) + (iy (imagpart y))) + (if (> (abs ry) (abs iy)) + (let* ((r (/ iy ry)) + (dn (+ ry (* r iy)))) + (complex (/ x dn) + (/ (- (* x r)) dn))) + (let* ((r (/ ry iy)) + (dn (+ iy (* r ry)))) + (complex (/ (* x r) dn) + (/ (- x) dn))))) + #!+complex-float-vops + '(let* ((ry (realpart y)) + (iy (imagpart y))) + (if (> (abs ry) (abs iy)) + (let* ((r (/ iy ry)) + (dn (+ ry (* r iy)))) + (/ (complex x (- (* x r))) dn)) + (let* ((r (/ ry iy)) + (dn (+ iy (* r ry)))) + (/ (complex (* x r) (- x)) dn))))) ;; conjugate of complex number + #!-complex-float-vops (deftransform conjugate ((z) ((complex ,type)) *) '(complex (realpart z) (- (imagpart z)))) ;; CIS (deftransform cis ((z) ((,type)) *) '(complex (cos z) (sin z))) ;; comparison + #!-complex-float-vops (deftransform = ((w z) ((complex ,type) (complex ,type)) *) '(and (= (realpart w) (realpart z)) (= (imagpart w) (imagpart z)))) + #!-complex-float-vops (deftransform = ((w z) ((complex ,type) real) *) '(and (= (realpart w) z) (zerop (imagpart w)))) + #!-complex-float-vops (deftransform = ((w z) (real (complex ,type)) *) '(and (= (realpart z) w) (zerop (imagpart z))))))) diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp index bb2887c..4978acc 100644 --- a/src/compiler/generic/genesis.lisp +++ b/src/compiler/generic/genesis.lisp @@ -735,10 +735,17 @@ core and return a descriptor to it." (let ((des (allocate-unboxed-object *dynamic* sb!vm:n-word-bits (1- sb!vm:complex-single-float-size) sb!vm:complex-single-float-widetag))) - (write-wordindexed des sb!vm:complex-single-float-real-slot - (make-random-descriptor (single-float-bits (realpart num)))) - (write-wordindexed des sb!vm:complex-single-float-imag-slot - (make-random-descriptor (single-float-bits (imagpart num)))) + #!-x86-64 + (progn + (write-wordindexed des sb!vm:complex-single-float-real-slot + (make-random-descriptor (single-float-bits (realpart num)))) + (write-wordindexed des sb!vm:complex-single-float-imag-slot + (make-random-descriptor (single-float-bits (imagpart num))))) + #!+x86-64 + (write-wordindexed des sb!vm:complex-single-float-data-slot + (make-random-descriptor + (logior (ldb (byte 32 0) (single-float-bits (realpart num))) + (ash (single-float-bits (imagpart num)) 32)))) des)) (defun complex-double-float-to-core (num) diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp index b4d231b..d768bca 100644 --- a/src/compiler/generic/objdef.lisp +++ b/src/compiler/generic/objdef.lisp @@ -342,13 +342,17 @@ (define-primitive-object (complex-single-float :lowtag other-pointer-lowtag :widetag complex-single-float-widetag) + #!+x86-64 + (data :c-type "struct { float data[2]; } ") + #!-x86-64 (real :c-type "float") + #!-x86-64 (imag :c-type "float")) (define-primitive-object (complex-double-float :lowtag other-pointer-lowtag :widetag complex-double-float-widetag) - #!-x86-64 (filler) + (filler) (real :c-type "double" :length #!-x86-64 2 #!+x86-64 1) (imag :c-type "double" :length #!-x86-64 2 #!+x86-64 1)) diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index 5b37876..4614221 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -584,10 +584,11 @@ (values))) ;;;; transforms for EQL of floating point values - +#!-x86-64 (deftransform eql ((x y) (single-float single-float)) '(= (single-float-bits x) (single-float-bits y))) +#!-x86-64 (deftransform eql ((x y) (double-float double-float)) '(and (= (double-float-low-bits x) (double-float-low-bits y)) (= (double-float-high-bits x) (double-float-high-bits y)))) diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index d2f472c..db475c1 100644 --- a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ -3499,7 +3499,13 @@ (cond ((or (and (csubtypep x-type (specifier-type 'float)) (csubtypep y-type (specifier-type 'float))) (and (csubtypep x-type (specifier-type '(complex float))) - (csubtypep y-type (specifier-type '(complex float))))) + (csubtypep y-type (specifier-type '(complex float)))) + #!+complex-float-vops + (and (csubtypep x-type (specifier-type '(or single-float (complex single-float)))) + (csubtypep y-type (specifier-type '(or single-float (complex single-float))))) + #!+complex-float-vops + (and (csubtypep x-type (specifier-type '(or double-float (complex double-float)))) + (csubtypep y-type (specifier-type '(or double-float (complex double-float)))))) ;; They are both floats. Leave as = so that -0.0 is ;; handled correctly. (give-up-ir1-transform)) diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp index bf50ef8..2dc0a40 100644 --- a/src/compiler/x86-64/array.lisp +++ b/src/compiler/x86-64/array.lisp @@ -394,8 +394,7 @@ (move dword-index index) (inst shr dword-index 1) (inst movss (make-ea-for-float-ref object dword-index offset 4) value) - (unless (location= result value) - (inst movss result value)))) + (move result value))) (define-vop (data-vector-set-c-with-offset/simple-array-single-float) (:note "inline array store") @@ -412,8 +411,7 @@ (:result-types single-float) (:generator 4 (inst movss (make-ea-for-float-ref object index offset 4) value) - (unless (location= result value) - (inst movss result value)))) + (move result value))) (define-vop (data-vector-ref-with-offset/simple-array-double-float) (:note "inline array access") @@ -460,8 +458,7 @@ (:result-types double-float) (:generator 20 (inst movsd (make-ea-for-float-ref object index offset 8) value) - (unless (location= result value) - (inst movsd result value)))) + (move result value))) (define-vop (data-vector-set-c-with-offset/simple-array-double-float) (:note "inline array store") @@ -478,8 +475,7 @@ (:result-types double-float) (:generator 19 (inst movsd (make-ea-for-float-ref object index offset 8) value) - (unless (location= result value) - (inst movsd result value)))) + (move result value))) ;;; complex float variants @@ -497,11 +493,7 @@ (:results (value :scs (complex-single-reg))) (:result-types complex-single-float) (:generator 5 - (let ((real-tn (complex-single-reg-real-tn value))) - (inst movss real-tn (make-ea-for-float-ref object index offset 8))) - (let ((imag-tn (complex-single-reg-imag-tn value))) - (inst movss imag-tn (make-ea-for-float-ref object index offset 8 - :complex-offset 4))))) + (inst movq value (make-ea-for-float-ref object index offset 8)))) (define-vop (data-vector-ref-c-with-offset/simple-array-complex-single-float) (:note "inline array access") @@ -515,11 +507,7 @@ (:results (value :scs (complex-single-reg))) (:result-types complex-single-float) (:generator 4 - (let ((real-tn (complex-single-reg-real-tn value))) - (inst movss real-tn (make-ea-for-float-ref object index offset 8))) - (let ((imag-tn (complex-single-reg-imag-tn value))) - (inst movss imag-tn (make-ea-for-float-ref object index offset 8 - :complex-offset 4))))) + (inst movq value (make-ea-for-float-ref object index offset 8)))) (define-vop (data-vector-set-with-offset/simple-array-complex-single-float) (:note "inline array store") @@ -536,18 +524,8 @@ (:results (result :scs (complex-single-reg))) (:result-types complex-single-float) (:generator 5 - (let ((value-real (complex-single-reg-real-tn value)) - (result-real (complex-single-reg-real-tn result))) - (inst movss (make-ea-for-float-ref object index offset 8) value-real) - (unless (location= value-real result-real) - (inst movss result-real value-real))) - (let ((value-imag (complex-single-reg-imag-tn value)) - (result-imag (complex-single-reg-imag-tn result))) - (inst movss (make-ea-for-float-ref object index offset 8 - :complex-offset 4) - value-imag) - (unless (location= value-imag result-imag) - (inst movss result-imag value-imag))))) + (move result value) + (inst movq (make-ea-for-float-ref object index offset 8) value))) (define-vop (data-vector-set-c-with-offset/simple-array-complex-single-float) (:note "inline array store") @@ -563,18 +541,8 @@ (:results (result :scs (complex-single-reg))) (:result-types complex-single-float) (:generator 4 - (let ((value-real (complex-single-reg-real-tn value)) - (result-real (complex-single-reg-real-tn result))) - (inst movss (make-ea-for-float-ref object index offset 8) value-real) - (unless (location= value-real result-real) - (inst movss result-real value-real))) - (let ((value-imag (complex-single-reg-imag-tn value)) - (result-imag (complex-single-reg-imag-tn result))) - (inst movss (make-ea-for-float-ref object index offset 8 - :complex-offset 4) - value-imag) - (unless (location= value-imag result-imag) - (inst movss result-imag value-imag))))) + (move result value) + (inst movq (make-ea-for-float-ref object index offset 8) value))) (define-vop (data-vector-ref-with-offset/simple-array-complex-double-float) (:note "inline array access") @@ -589,11 +557,7 @@ (:results (value :scs (complex-double-reg))) (:result-types complex-double-float) (:generator 7 - (let ((real-tn (complex-double-reg-real-tn value))) - (inst movsd real-tn (make-ea-for-float-ref object index offset 16 :scale 2))) - (let ((imag-tn (complex-double-reg-imag-tn value))) - (inst movsd imag-tn (make-ea-for-float-ref object index offset 16 :scale 2 - :complex-offset 8))))) + (inst movapd value (make-ea-for-float-ref object index offset 16 :scale 2)))) (define-vop (data-vector-ref-c-with-offset/simple-array-complex-double-float) (:note "inline array access") @@ -607,11 +571,7 @@ (:results (value :scs (complex-double-reg))) (:result-types complex-double-float) (:generator 6 - (let ((real-tn (complex-double-reg-real-tn value))) - (inst movsd real-tn (make-ea-for-float-ref object index offset 16 :scale 2))) - (let ((imag-tn (complex-double-reg-imag-tn value))) - (inst movsd imag-tn (make-ea-for-float-ref object index offset 16 :scale 2 - :complex-offset 8))))) + (inst movapd value (make-ea-for-float-ref object index offset 16 :scale 2)))) (define-vop (data-vector-set-with-offset/simple-array-complex-double-float) (:note "inline array store") @@ -628,19 +588,8 @@ (:results (result :scs (complex-double-reg))) (:result-types complex-double-float) (:generator 20 - (let ((value-real (complex-double-reg-real-tn value)) - (result-real (complex-double-reg-real-tn result))) - (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2) - value-real) - (unless (location= value-real result-real) - (inst movsd result-real value-real))) - (let ((value-imag (complex-double-reg-imag-tn value)) - (result-imag (complex-double-reg-imag-tn result))) - (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2 - :complex-offset 8) - value-imag) - (unless (location= value-imag result-imag) - (inst movsd result-imag value-imag))))) + (inst movapd (make-ea-for-float-ref object index offset 16 :scale 2) value) + (move result value))) (define-vop (data-vector-set-c-with-offset/simple-array-complex-double-float) (:note "inline array store") @@ -656,19 +605,8 @@ (:results (result :scs (complex-double-reg))) (:result-types complex-double-float) (:generator 19 - (let ((value-real (complex-double-reg-real-tn value)) - (result-real (complex-double-reg-real-tn result))) - (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2) - value-real) - (unless (location= value-real result-real) - (inst movsd result-real value-real))) - (let ((value-imag (complex-double-reg-imag-tn value)) - (result-imag (complex-double-reg-imag-tn result))) - (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2 - :complex-offset 8) - value-imag) - (unless (location= value-imag result-imag) - (inst movsd result-imag value-imag))))) + (inst movapd (make-ea-for-float-ref object index offset 16 :scale 2) value) + (move result value))) diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp index b7687f7..632e19e 100644 --- a/src/compiler/x86-64/cell.lisp +++ b/src/compiler/x86-64/cell.lisp @@ -662,8 +662,7 @@ (inst shl tmp 3) (inst sub tmp index) (inst movss (make-ea-for-raw-slot object index tmp) value) - (unless (location= result value) - (inst movss result value)))) + (move result value))) (define-vop (raw-instance-set-c/single) (:translate %raw-instance-set/single) @@ -682,8 +681,7 @@ (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) (inst movss (make-ea-for-raw-slot object index tmp) value) - (unless (location= result value) - (inst movss result value)))) + (move result value))) (define-vop (raw-instance-init/single) (:args (object :scs (descriptor-reg)) @@ -741,8 +739,7 @@ (inst shl tmp 3) (inst sub tmp index) (inst movsd (make-ea-for-raw-slot object index tmp) value) - (unless (location= result value) - (inst movsd result value)))) + (move result value))) (define-vop (raw-instance-set-c/double) (:translate %raw-instance-set/double) @@ -761,8 +758,7 @@ (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) (inst movsd (make-ea-for-raw-slot object index tmp) value) - (unless (location= result value) - (inst movsd result value)))) + (move result value))) (define-vop (raw-instance-init/double) (:args (object :scs (descriptor-reg)) @@ -786,10 +782,7 @@ (inst shr tmp n-widetag-bits) (inst shl tmp 3) (inst sub tmp index) - (let ((real-tn (complex-single-reg-real-tn value))) - (inst movss real-tn (make-ea-for-raw-slot object index tmp))) - (let ((imag-tn (complex-single-reg-imag-tn value))) - (inst movss imag-tn (make-ea-for-raw-slot object index tmp 4))))) + (inst movq value (make-ea-for-raw-slot object index tmp)))) (define-vop (raw-instance-ref-c/complex-single) (:translate %raw-instance-ref/complex-single) @@ -805,10 +798,7 @@ (:generator 4 (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) - (let ((real-tn (complex-single-reg-real-tn value))) - (inst movss real-tn (make-ea-for-raw-slot object index tmp))) - (let ((imag-tn (complex-single-reg-imag-tn value))) - (inst movss imag-tn (make-ea-for-raw-slot object index tmp 4))))) + (inst movq value (make-ea-for-raw-slot object index tmp)))) (define-vop (raw-instance-set/complex-single) (:translate %raw-instance-set/complex-single) @@ -825,16 +815,8 @@ (inst shr tmp n-widetag-bits) (inst shl tmp 3) (inst sub tmp index) - (let ((value-real (complex-single-reg-real-tn value)) - (result-real (complex-single-reg-real-tn result))) - (inst movss (make-ea-for-raw-slot object index tmp) value-real) - (unless (location= value-real result-real) - (inst movss result-real value-real))) - (let ((value-imag (complex-single-reg-imag-tn value)) - (result-imag (complex-single-reg-imag-tn result))) - (inst movss (make-ea-for-raw-slot object index tmp 4) value-imag) - (unless (location= value-imag result-imag) - (inst movss result-imag value-imag))))) + (move result value) + (inst movq (make-ea-for-raw-slot object index tmp) value))) (define-vop (raw-instance-set-c/complex-single) (:translate %raw-instance-set/complex-single) @@ -852,16 +834,8 @@ (:generator 4 (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) - (let ((value-real (complex-single-reg-real-tn value)) - (result-real (complex-single-reg-real-tn result))) - (inst movss (make-ea-for-raw-slot object index tmp) value-real) - (unless (location= value-real result-real) - (inst movss result-real value-real))) - (let ((value-imag (complex-single-reg-imag-tn value)) - (result-imag (complex-single-reg-imag-tn result))) - (inst movss (make-ea-for-raw-slot object index tmp 4) value-imag) - (unless (location= value-imag result-imag) - (inst movss result-imag value-imag))))) + (move result value) + (inst movq (make-ea-for-raw-slot object index tmp) value))) (define-vop (raw-instance-init/complex-single) (:args (object :scs (descriptor-reg)) @@ -869,10 +843,7 @@ (:arg-types * complex-single-float) (:info instance-length index) (:generator 4 - (let ((value-real (complex-single-reg-real-tn value))) - (inst movss (make-ea-for-raw-slot object index instance-length) value-real)) - (let ((value-imag (complex-single-reg-imag-tn value))) - (inst movss (make-ea-for-raw-slot object index instance-length 4) value-imag)))) + (inst movq (make-ea-for-raw-slot object index instance-length) value))) (define-vop (raw-instance-ref/complex-double) (:translate %raw-instance-ref/complex-double) @@ -888,10 +859,7 @@ (inst shr tmp n-widetag-bits) (inst shl tmp 3) (inst sub tmp index) - (let ((real-tn (complex-double-reg-real-tn value))) - (inst movsd real-tn (make-ea-for-raw-slot object index tmp -8))) - (let ((imag-tn (complex-double-reg-imag-tn value))) - (inst movsd imag-tn (make-ea-for-raw-slot object index tmp))))) + (inst movdqu value (make-ea-for-raw-slot object index tmp -8)))) (define-vop (raw-instance-ref-c/complex-double) (:translate %raw-instance-ref/complex-double) @@ -907,10 +875,7 @@ (:generator 4 (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) - (let ((real-tn (complex-double-reg-real-tn value))) - (inst movsd real-tn (make-ea-for-raw-slot object index tmp -8))) - (let ((imag-tn (complex-double-reg-imag-tn value))) - (inst movsd imag-tn (make-ea-for-raw-slot object index tmp))))) + (inst movdqu value (make-ea-for-raw-slot object index tmp -8)))) (define-vop (raw-instance-set/complex-double) (:translate %raw-instance-set/complex-double) @@ -927,16 +892,8 @@ (inst shr tmp n-widetag-bits) (inst shl tmp 3) (inst sub tmp index) - (let ((value-real (complex-double-reg-real-tn value)) - (result-real (complex-double-reg-real-tn result))) - (inst movsd (make-ea-for-raw-slot object index tmp -8) value-real) - (unless (location= value-real result-real) - (inst movsd result-real value-real))) - (let ((value-imag (complex-double-reg-imag-tn value)) - (result-imag (complex-double-reg-imag-tn result))) - (inst movsd (make-ea-for-raw-slot object index tmp) value-imag) - (unless (location= value-imag result-imag) - (inst movsd result-imag value-imag))))) + (move result value) + (inst movdqu (make-ea-for-raw-slot object index tmp -8) value))) (define-vop (raw-instance-set-c/complex-double) (:translate %raw-instance-set/complex-double) @@ -954,16 +911,8 @@ (:generator 4 (loadw tmp object 0 instance-pointer-lowtag) (inst shr tmp n-widetag-bits) - (let ((value-real (complex-double-reg-real-tn value)) - (result-real (complex-double-reg-real-tn result))) - (inst movsd (make-ea-for-raw-slot object index tmp -8) value-real) - (unless (location= value-real result-real) - (inst movsd result-real value-real))) - (let ((value-imag (complex-double-reg-imag-tn value)) - (result-imag (complex-double-reg-imag-tn result))) - (inst movsd (make-ea-for-raw-slot object index tmp) value-imag) - (unless (location= value-imag result-imag) - (inst movsd result-imag value-imag))))) + (move result value) + (inst movdqu (make-ea-for-raw-slot object index tmp -8) value))) (define-vop (raw-instance-init/complex-double) (:args (object :scs (descriptor-reg)) @@ -971,7 +920,4 @@ (:arg-types * complex-double-float) (:info instance-length index) (:generator 4 - (let ((value-real (complex-double-reg-real-tn value))) - (inst movsd (make-ea-for-raw-slot object index instance-length -8) value-real)) - (let ((value-imag (complex-double-reg-imag-tn value))) - (inst movsd (make-ea-for-raw-slot object index instance-length) value-imag)))) + (inst movdqu (make-ea-for-raw-slot object index instance-length -8) value))) diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp index 23eef99..286889a 100644 --- a/src/compiler/x86-64/float.lisp +++ b/src/compiler/x86-64/float.lisp @@ -19,10 +19,15 @@ (defun ea-for-df-desc (tn) (ea-for-xf-desc tn double-float-value-slot)) ;; complex floats + (defun ea-for-csf-data-desc (tn) + (ea-for-xf-desc tn complex-single-float-data-slot)) (defun ea-for-csf-real-desc (tn) - (ea-for-xf-desc tn complex-single-float-real-slot)) + (ea-for-xf-desc tn complex-single-float-data-slot)) (defun ea-for-csf-imag-desc (tn) - (ea-for-xf-desc tn complex-single-float-imag-slot)) + (ea-for-xf-desc tn (+ complex-single-float-data-slot 1/2))) + + (defun ea-for-cdf-data-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) (defun ea-for-cdf-real-desc (tn) (ea-for-xf-desc tn complex-double-float-real-slot)) (defun ea-for-cdf-imag-desc (tn) @@ -40,7 +45,6 @@ ;;; complex float stack EAs (macrolet ((ea-for-cxf-stack (tn kind slot &optional base) - (declare (ignore kind)) `(make-ea :qword :base ,base :disp (frame-byte-offset @@ -50,16 +54,28 @@ ((= (tn-offset ,base) rbp-offset) 0) (t (error "Unexpected offset."))) - (ecase ,slot (:real 0) (:imag 1))))))) + (ecase ,kind + (:single + (ecase ,slot + (:real 0) + (:imag -1/2))) + (:double + (ecase ,slot + (:real 1) + (:imag 0))))))))) + (defun ea-for-csf-data-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :single :real base)) (defun ea-for-csf-real-stack (tn &optional (base rbp-tn)) (ea-for-cxf-stack tn :single :real base)) (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn)) (ea-for-cxf-stack tn :single :imag base)) + + (defun ea-for-cdf-data-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :real base)) (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn)) (ea-for-cxf-stack tn :double :real base)) (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn)) (ea-for-cxf-stack tn :double :imag base))) - ;;;; move functions @@ -67,11 +83,13 @@ (define-move-fun (load-fp-zero 1) (vop x y) ((fp-single-zero) (single-reg) - (fp-double-zero) (double-reg)) + (fp-double-zero) (double-reg) + (fp-complex-single-zero) (complex-single-reg) + (fp-complex-double-zero) (complex-double-reg)) (identity x) (sc-case y - (single-reg (inst xorps y y)) - (double-reg (inst xorpd y y)))) + ((single-reg complex-single-reg) (inst xorps y y)) + ((double-reg complex-double-reg) (inst xorpd y y)))) (define-move-fun (load-single 2) (vop x y) ((single-stack) (single-reg)) @@ -94,49 +112,22 @@ ;;;; complex float move functions -(defun complex-single-reg-real-tn (x) - (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) - :offset (tn-offset x))) -(defun complex-single-reg-imag-tn (x) - (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) - :offset (1+ (tn-offset x)))) - -(defun complex-double-reg-real-tn (x) - (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) - :offset (tn-offset x))) -(defun complex-double-reg-imag-tn (x) - (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) - :offset (1+ (tn-offset x)))) - ;;; X is source, Y is destination. (define-move-fun (load-complex-single 2) (vop x y) ((complex-single-stack) (complex-single-reg)) - (let ((real-tn (complex-single-reg-real-tn y))) - (inst movss real-tn (ea-for-csf-real-stack x))) - (let ((imag-tn (complex-single-reg-imag-tn y))) - (inst movss imag-tn (ea-for-csf-imag-stack x)))) + (inst movq y (ea-for-csf-data-stack x))) (define-move-fun (store-complex-single 2) (vop x y) ((complex-single-reg) (complex-single-stack)) - (let ((real-tn (complex-single-reg-real-tn x)) - (imag-tn (complex-single-reg-imag-tn x))) - (inst movss (ea-for-csf-real-stack y) real-tn) - (inst movss (ea-for-csf-imag-stack y) imag-tn))) + (inst movq (ea-for-csf-data-stack y) x)) (define-move-fun (load-complex-double 2) (vop x y) ((complex-double-stack) (complex-double-reg)) - (let ((real-tn (complex-double-reg-real-tn y))) - (inst movsd real-tn (ea-for-cdf-real-stack x))) - (let ((imag-tn (complex-double-reg-imag-tn y))) - (inst movsd imag-tn (ea-for-cdf-imag-stack x)))) + (inst movupd y (ea-for-cdf-data-stack x))) (define-move-fun (store-complex-double 2) (vop x y) ((complex-double-reg) (complex-double-stack)) - (let ((real-tn (complex-double-reg-real-tn x)) - (imag-tn (complex-double-reg-imag-tn x))) - (inst movsd (ea-for-cdf-real-stack y) real-tn) - (inst movsd (ea-for-cdf-imag-stack y) imag-tn))) - + (inst movupd (ea-for-cdf-data-stack y) x)) ;;;; move VOPs @@ -151,43 +142,12 @@ :load-if (not (location= x y)))) (:note "float move") (:generator 0 - (unless (location= y x) - (inst movq y x)))) + (move y x))) (define-move-vop ,vop :move (,sc) (,sc))))) (frob single-move single-reg) - (frob double-move double-reg)) - -;;; complex float register to register moves -(define-vop (complex-float-move) - (:args (x :target y :load-if (not (location= x y)))) - (:results (y :load-if (not (location= x y)))) - (:note "complex float move") - (:generator 0 - (unless (location= x y) - ;; Note the complex-float-regs are aligned to every second - ;; float register so there is not need to worry about overlap. - ;; (It would be better to put the imagpart in the top half of the - ;; register, or something, but let's worry about that later) - (let ((x-real (complex-single-reg-real-tn x)) - (y-real (complex-single-reg-real-tn y))) - (inst movq y-real x-real)) - (let ((x-imag (complex-single-reg-imag-tn x)) - (y-imag (complex-single-reg-imag-tn y))) - (inst movq y-imag x-imag))))) - -(define-vop (complex-single-move complex-float-move) - (:args (x :scs (complex-single-reg) :target y - :load-if (not (location= x y)))) - (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) -(define-move-vop complex-single-move :move - (complex-single-reg) (complex-single-reg)) - -(define-vop (complex-double-move complex-float-move) - (:args (x :scs (complex-double-reg) - :target y :load-if (not (location= x y)))) - (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) -(define-move-vop complex-double-move :move - (complex-double-reg) (complex-double-reg)) + (frob double-move double-reg) + (frob complex-single-move complex-single-reg) + (frob complex-double-move complex-double-reg)) ;;; Move from float to a descriptor reg. allocating a new float @@ -252,10 +212,7 @@ complex-single-float-widetag complex-single-float-size node) - (let ((real-tn (complex-single-reg-real-tn x))) - (inst movss (ea-for-csf-real-desc y) real-tn)) - (let ((imag-tn (complex-single-reg-imag-tn x))) - (inst movss (ea-for-csf-imag-desc y) imag-tn))))) + (inst movq (ea-for-csf-data-desc y) x)))) (define-move-vop move-from-complex-single :move (complex-single-reg) (descriptor-reg)) @@ -269,10 +226,7 @@ complex-double-float-widetag complex-double-float-size node) - (let ((real-tn (complex-double-reg-real-tn x))) - (inst movsd (ea-for-cdf-real-desc y) real-tn)) - (let ((imag-tn (complex-double-reg-imag-tn x))) - (inst movsd (ea-for-cdf-imag-desc y) imag-tn))))) + (inst movapd (ea-for-cdf-data-desc y) x)))) (define-move-vop move-from-complex-double :move (complex-double-reg) (descriptor-reg)) @@ -284,20 +238,11 @@ (:results (y :scs (,sc))) (:note "pointer to complex float coercion") (:generator 2 - (let ((real-tn (complex-double-reg-real-tn y))) - ,@(ecase - format - (:single - '((inst movss real-tn (ea-for-csf-real-desc x)))) - (:double - '((inst movsd real-tn (ea-for-cdf-real-desc x)))))) - (let ((imag-tn (complex-double-reg-imag-tn y))) - ,@(ecase - format - (:single - '((inst movss imag-tn (ea-for-csf-imag-desc x)))) - (:double - '((inst movsd imag-tn (ea-for-cdf-imag-desc x)))))))) + ,(ecase format + (:single + '(inst movq y (ea-for-csf-data-desc x))) + (:double + '(inst movapd y (ea-for-cdf-data-desc x)))))) (define-move-vop ,name :move (descriptor-reg) (,sc))))) (frob move-to-complex-single complex-single-reg :single) (frob move-to-complex-double complex-double-reg :double)) @@ -319,8 +264,7 @@ (:generator ,(case format (:single 2) (:double 3) ) (sc-case y (,sc - (unless (location= x y) - (inst movq y x))) + (move y x)) (,stack-sc (if (= (tn-offset fp) esp-offset) (let* ((offset (* (tn-offset y) n-word-bytes)) @@ -351,32 +295,13 @@ (:generator ,(ecase format (:single 2) (:double 3)) (sc-case y (,sc - (unless (location= x y) - (let ((x-real (complex-double-reg-real-tn x)) - (y-real (complex-double-reg-real-tn y))) - (inst movsd y-real x-real)) - (let ((x-imag (complex-double-reg-imag-tn x)) - (y-imag (complex-double-reg-imag-tn y))) - (inst movsd y-imag x-imag)))) + (move y x)) (,stack-sc - (let ((real-tn (complex-double-reg-real-tn x))) - ,@(ecase format - (:single - '((inst movss - (ea-for-csf-real-stack y fp) - real-tn))) - (:double - '((inst movsd - (ea-for-cdf-real-stack y fp) - real-tn))))) - (let ((imag-tn (complex-double-reg-imag-tn x))) - ,@(ecase format - (:single - '((inst movss - (ea-for-csf-imag-stack y fp) imag-tn))) - (:double - '((inst movsd - (ea-for-cdf-imag-stack y fp) imag-tn))))))))) + ,(ecase format + (:single + '(inst movq (ea-for-csf-data-stack y fp) x)) + (:double + '(inst movupd (ea-for-cdf-data-stack y fp) x))))))) (define-move-vop ,name :move-arg (,sc descriptor-reg) (,sc))))) (frob move-complex-single-float-arg @@ -408,9 +333,11 @@ (:arg-types ,ptype ,ptype) (:result-types ,ptype)))) (frob single-float-op single-reg single-float) - (frob double-float-op double-reg double-float)) + (frob double-float-op double-reg double-float) + (frob complex-single-float-op complex-single-reg complex-single-float) + (frob complex-double-float-op complex-double-reg complex-double-float)) -(macrolet ((generate (movinst opinst commutative) +(macrolet ((generate (opinst commutative) `(progn (cond ((location= x r) @@ -418,29 +345,280 @@ ((and ,commutative (location= y r)) (inst ,opinst y x)) ((not (location= r y)) - (inst ,movinst r x) + (move r x) (inst ,opinst r y)) (t - (inst ,movinst tmp x) + (move tmp x) (inst ,opinst tmp y) - (inst ,movinst r tmp))))) - (frob (op sinst sname scost dinst dname dcost commutative) + (move r tmp))))) + (frob (op sinst sname scost dinst dname dcost commutative + &optional csinst csname cscost cdinst cdname cdcost) `(progn (define-vop (,sname single-float-op) (:translate ,op) (:temporary (:sc single-reg) tmp) (:generator ,scost - (generate movss ,sinst ,commutative))) + (generate ,sinst ,commutative))) (define-vop (,dname double-float-op) (:translate ,op) - (:temporary (:sc single-reg) tmp) + (:temporary (:sc double-reg) tmp) (:generator ,dcost - (generate movsd ,dinst ,commutative)))))) - (frob + addss +/single-float 2 addsd +/double-float 2 t) - (frob - subss -/single-float 2 subsd -/double-float 2 nil) + (generate ,dinst ,commutative))) + ,(when csinst + `(define-vop (,csname complex-single-float-op) + (:translate ,op) + (:temporary (:sc complex-single-reg) tmp) + (:generator ,cscost + (generate ,csinst ,commutative)))) + ,(when cdinst + `(define-vop (,cdname complex-double-float-op) + (:translate ,op) + (:temporary (:sc complex-double-reg) tmp) + (:generator ,cdcost + (generate ,cdinst ,commutative))))))) + (frob + addss +/single-float 2 addsd +/double-float 2 t + addps +/complex-single-float 3 addpd +/complex-double-float 3) + (frob - subss -/single-float 2 subsd -/double-float 2 nil + subps -/complex-single-float 3 subpd -/complex-double-float 3) (frob * mulss */single-float 4 mulsd */double-float 5 t) (frob / divss //single-float 12 divsd //double-float 19 nil)) +(macrolet ((frob (op cost commutativep + duplicate-inst op-inst + real-sc real-type complex-sc complex-type + real-complex-name complex-real-name) + (cond ((not duplicate-inst) ; simple case + `(progn + ,(when real-complex-name + `(define-vop (,real-complex-name float-op) + (:translate ,op) + (:args (x :scs (,real-sc) :target r) + (y :scs (,complex-sc) + ,@(when commutativep '(:target r)))) + (:arg-types ,real-type ,complex-type) + (:results (r :scs (,complex-sc) + ,@(unless commutativep '(:from (:argument 0))))) + (:result-types ,complex-type) + (:generator ,cost + ,(when commutativep + `(when (location= y r) + (rotatef x y))) + (move r x) + (inst ,op-inst r y)))) + + ,(when complex-real-name + `(define-vop (,complex-real-name float-op) + (:translate ,op) + (:args (x :scs (,complex-sc) :target r) + (y :scs (,real-sc) + ,@(when commutativep '(:target r)))) + (:arg-types ,complex-type ,real-type) + (:results (r :scs (,complex-sc) + ,@(unless commutativep '(:from (:argument 0))))) + (:result-types ,complex-type) + (:generator ,cost + ,(when commutativep + `(when (location= y r) + (rotatef x y))) + (move r x) + (inst ,op-inst r y)))))) + (commutativep ; must duplicate, but commutative + `(progn + ,(when real-complex-name + `(define-vop (,real-complex-name float-op) + (:translate ,op) + (:args (x :scs (,real-sc) :target dup) + (y :scs (,complex-sc) :target r + :to :result)) + (:arg-types ,real-type ,complex-type) + (:temporary (:sc ,complex-sc :target r + :from (:argument 0) + :to :result) + dup) + (:results (r :scs (,complex-sc))) + (:result-types ,complex-type) + (:generator ,cost + (let ((real x)) + ,duplicate-inst) + ;; safe: dup /= y + (when (location= dup r) + (rotatef dup y)) + (move r y) + (inst ,op-inst r dup)))) + + ,(when complex-real-name + `(define-vop (,complex-real-name float-op) + (:translate ,op) + (:args (x :scs (,complex-sc) :target r + :to :result) + (y :scs (,real-sc) :target dup)) + (:arg-types ,complex-type ,real-type) + (:temporary (:sc ,complex-sc :target r + :from (:argument 1) + :to :result) + dup) + (:results (r :scs (,complex-sc))) + (:result-types ,complex-type) + (:generator ,cost + (let ((real y)) + ,duplicate-inst) + (when (location= dup r) + (rotatef x dup)) + (move r x) + (inst ,op-inst r dup)))))) + (t ; duplicate, not commutative + `(progn + ,(when real-complex-name + `(define-vop (,real-complex-name float-op) + (:translate ,op) + (:args (x :scs (,real-sc) + :target r) + (y :scs (,complex-sc) :to :result)) + (:arg-types ,real-type ,complex-type) + (:results (r :scs (,complex-sc) :from (:argument 0))) + (:result-types ,complex-type) + (:generator ,cost + (let ((real x) + (dup r)) + ,duplicate-inst) + (inst ,op-inst r y)))) + + ,(when complex-real-name + `(define-vop (,complex-real-name float-op) + (:translate ,op) + (:args (x :scs (,complex-sc) :target r + :to :eval) + (y :scs (,real-sc) :target dup)) + (:arg-types ,complex-type ,real-type) + (:temporary (:sc ,complex-sc :from (:argument 1)) + dup) + (:results (r :scs (,complex-sc) :from :eval)) + (:result-types ,complex-type) + (:generator ,cost + (let ((real y)) + ,duplicate-inst) + (move r x) + (inst ,op-inst r dup)))))))) + (def-real-complex-op (op commutativep duplicatep + single-inst single-real-complex-name single-complex-real-name single-cost + double-inst double-real-complex-name double-complex-real-name double-cost) + `(progn + (frob ,op ,single-cost ,commutativep + ,(and duplicatep + `(progn + (move dup real) + (inst unpcklps dup dup))) + ,single-inst + single-reg single-float complex-single-reg complex-single-float + ,single-real-complex-name ,single-complex-real-name) + (frob ,op ,double-cost ,commutativep + ,(and duplicatep + `(progn + (move dup real) + (inst unpcklpd dup dup))) + ,double-inst + double-reg double-float complex-double-reg complex-double-float + ,double-real-complex-name ,double-complex-real-name)))) + (def-real-complex-op + t nil + addps +/real-complex-single-float +/complex-real-single-float 3 + addpd +/real-complex-double-float +/complex-real-double-float 4) + (def-real-complex-op - nil nil + subps -/real-complex-single-float -/complex-real-single-float 3 + subpd -/real-complex-double-float -/complex-real-double-float 4) + (def-real-complex-op * t t + mulps */real-complex-single-float */complex-real-single-float 4 + mulpd */real-complex-double-float */complex-real-double-float 5) + (def-real-complex-op / nil t + nil nil nil nil + divpd nil //complex-real-double-float 19)) + +(define-vop (//complex-real-single-float float-op) + (:translate /) + (:args (x :scs (complex-single-reg) + :to (:result 0) + :target r) + (y :scs (single-reg) :target dup)) + (:arg-types complex-single-float single-float) + (:temporary (:sc complex-single-reg :from (:argument 1)) dup) + (:results (r :scs (complex-single-reg))) + (:result-types complex-single-float) + (:generator 12 + (move dup y) + (inst shufps dup dup #b00000000) + (move r x) + (inst unpcklpd r r) + (inst divps r dup) + (inst movq r r))) + +;; Complex multiplication +;; r := rx * ry - ix * iy +;; i := rx * iy + ix * ry +;; +;; Transpose for SIMDness +;; rx*ry rx*iy +;; -ix*iy +ix*ry +;; +;; [rx rx] * [ry iy] +;;+ [ix ix] * [-iy ry] +;; [r i] + +(macrolet ((define-complex-* (name cost type sc &body body) + `(define-vop (,name float-op) + (:translate *) + (:args (x :scs (,sc) :target r) + (y :scs (,sc) :target copy-y)) + (:arg-types ,type ,type) + (:temporary (:sc any-reg) hex8) + (:temporary (:sc ,sc) imag) + (:temporary (:sc ,sc :from :eval) copy-y) + (:temporary (:sc ,sc) xmm) + (:results (r :scs (,sc) :from :eval)) + (:result-types ,type) + (:generator ,cost + (when (or (location= x copy-y) + (location= y r)) + (rotatef x y)) + ,@body)))) + (define-complex-* */complex-single-float 20 complex-single-float complex-single-reg + (inst xorps xmm xmm) + (move r x) + (inst unpcklps r r) + (move imag r) + (inst unpckhpd imag xmm) + (inst unpcklpd r xmm) + (move copy-y y) ; y == r only if y == x == r + (setf y copy-y) + + (inst lea hex8 (make-ea :qword :disp 1)) + (inst rol hex8 31) + (inst movd xmm hex8) + + (inst mulps r y) + + (inst shufps y y #b11110001) + (inst xorps y xmm) + + (inst mulps imag y) + (inst addps r imag)) + (define-complex-* */complex-double-float 25 complex-double-float complex-double-reg + (move imag x) + (move r x) + (move copy-y y) + (setf y copy-y) + (inst unpcklpd r r) + (inst unpckhpd imag imag) + (inst lea hex8 (make-ea :qword :disp 1)) + (inst ror hex8 1) ; #x8000000000000000 + (inst movd xmm hex8) + + (inst mulpd r y) + + (inst shufpd y y #b01) + (inst xorpd y xmm) + + (inst mulpd imag y) + (inst addpd r imag))) + (define-vop (fsqrt) (:args (x :scs (double-reg))) (:results (y :scs (double-reg))) @@ -474,19 +652,41 @@ ;; we should be able to do this better. what we ;; really would like to do is use the target as the ;; temp whenever it's not also the source - (unless (location= x y) - (inst movq y x)) + (move y x) ,@body)))) (frob (%negate/double-float %negate double-reg double-float) (inst lea hex8 (make-ea :qword :disp 1)) (inst ror hex8 1) ; #x8000000000000000 (inst movd xmm hex8) (inst xorpd y xmm)) + (frob (%negate/complex-double-float %negate complex-double-reg complex-double-float) + (inst lea hex8 (make-ea :qword :disp 1)) + (inst ror hex8 1) ; #x8000000000000000 + (inst movd xmm hex8) + (inst unpcklpd xmm xmm) + (inst xorpd y xmm)) + (frob (conjugate/complex-double-float conjugate complex-double-reg complex-double-float) + (inst lea hex8 (make-ea :qword :disp 1)) + (inst ror hex8 1) ; #x8000000000000000 + (inst movd xmm hex8) + (inst shufpd xmm xmm #b01) + (inst xorpd y xmm)) (frob (%negate/single-float %negate single-reg single-float) (inst lea hex8 (make-ea :qword :disp 1)) (inst rol hex8 31) (inst movd xmm hex8) (inst xorps y xmm)) + (frob (%negate/complex-single-float %negate complex-single-reg complex-single-float) + (inst lea hex8 (make-ea :qword :disp 1)) + (inst rol hex8 31) + (inst movd xmm hex8) + (inst unpcklps xmm xmm) + (inst xorps y xmm)) + (frob (conjugate/complex-single-float conjugate complex-single-reg complex-single-float) + (inst lea hex8 (make-ea :qword :disp 1)) + (inst ror hex8 1) ; #x8000000000000000 + (inst movd xmm hex8) + (inst xorpd y xmm)) (frob (abs/double-float abs double-reg double-float) (inst mov hex8 -1) (inst shr hex8 1) @@ -497,6 +697,7 @@ (inst shr hex8 33) (inst movd xmm hex8) (inst andps y xmm))) + ;;;; comparison @@ -506,6 +707,32 @@ (:save-p :compute-only) (:note "inline float comparison")) +;;; EQL +(macrolet ((define-float-eql (name cost sc type) + `(define-vop (,name float-compare) + (:translate eql) + (:args (x :scs (,sc) :target mask) + (y :scs (,sc) :target mask)) + (:arg-types ,type ,type) + (:temporary (:sc ,sc :from :eval) mask) + (:temporary (:sc any-reg) bits) + (:conditional :e) + (:generator ,cost + (when (location= y mask) + (rotatef x y)) + (move mask x) + (inst pcmpeqd mask y) + (inst movmskps bits mask) + (inst cmp bits #b1111))))) + (define-float-eql eql/single-float 4 + single-reg single-float) + (define-float-eql eql/double-float 4 + double-reg double-float) + (define-float-eql eql/complex-double-float 5 + complex-double-reg complex-double-float) + (define-float-eql eql/complex-single-float 5 + complex-single-reg complex-single-float)) + ;;; comiss and comisd can cope with one or other arg in memory: we ;;; could (should, indeed) extend these to cope with descriptor args ;;; and stack args @@ -518,7 +745,7 @@ (:arg-types double-float double-float)) (define-vop (=/single-float single-float-compare) - (:translate =) + (:translate =) (:info) (:conditional not :p :ne) (:vop-var vop) @@ -530,7 +757,7 @@ )) (define-vop (=/double-float double-float-compare) - (:translate =) + (:translate =) (:info) (:conditional not :p :ne) (:vop-var vop) @@ -538,6 +765,42 @@ (note-this-location vop :internal-error) (inst comisd x y))) +(macrolet ((define-complex-float-= (complex-complex-name complex-real-name real-complex-name + real-sc real-type complex-sc complex-type + cmp-inst mask-inst mask) + `(progn + (define-vop (,complex-complex-name float-compare) + (:translate =) + (:args (x :scs (,complex-sc) :target cmp) + (y :scs (,complex-sc) :target cmp)) + (:arg-types ,complex-type ,complex-type) + (:temporary (:sc ,complex-sc :from :eval) cmp) + (:temporary (:sc unsigned-reg) bits) + (:info) + (:conditional :e) + (:generator 3 + (when (location= y cmp) + (rotatef x y)) + (move cmp x) + (note-this-location vop :internal-error) + (inst ,cmp-inst :eq cmp y) + (inst ,mask-inst bits cmp) + (inst cmp bits ,mask))) + (define-vop (,complex-real-name ,complex-complex-name) + (:args (x :scs (,complex-sc) :target cmp) + (y :scs (,real-sc) :target cmp)) + (:arg-types ,complex-type ,real-type)) + (define-vop (,real-complex-name ,complex-complex-name) + (:args (x :scs (,real-sc) :target cmp) + (y :scs (,complex-sc) :target cmp)) + (:arg-types ,real-type ,complex-type))))) + (define-complex-float-= =/complex-single-float =/complex-real-single-float =/real-complex-single-float + single-reg single-float complex-single-reg complex-single-float + cmpps movmskps #b1111) + (define-complex-float-= =/complex-double-float =/complex-real-double-float =/real-complex-double-float + double-reg double-float complex-double-reg complex-double-float + cmppd movmskpd #b11)) + (define-vop (data.data[0]); +#else printf("%g", ((struct complex_single_float *)native_pointer(obj))->real); +#endif NEWLINE_OR_RETURN; +#ifdef LISP_FEATURE_X86_64 + printf("%g", ((struct complex_single_float *)native_pointer(obj))->data.data[1]); +#else printf("%g", ((struct complex_single_float *)native_pointer(obj))->imag); +#endif break; #endif diff --git a/tests/arith.pure.lisp b/tests/arith.pure.lisp index f98ba64..aa39251 100644 --- a/tests/arith.pure.lisp +++ b/tests/arith.pure.lisp @@ -294,4 +294,3 @@ ;; 1.0 had a broken ATANH on win32 (with-test (:name :atanh) (assert (= (atanh 0.9d0) 1.4722194895832204d0))) - diff --git a/tests/float.pure.lisp b/tests/float.pure.lisp index bea74a8..08513dd 100644 --- a/tests/float.pure.lisp +++ b/tests/float.pure.lisp @@ -233,3 +233,82 @@ (assert (eql 0.0d0 (funcall f 123.0d0 0.0))) (assert (eql 0.0d0 (funcall f 123.0d0 0.0d0))) (assert (eql 0.0d0 (funcall f 123.0 0.0d0))))) + + +;; 1.0.29.xFIXMEx introduces a ton of changes for complex floats +;; on x86-64. Huge test of doom to help catch weird corner +;; cases. +(with-test (:name :complex-floats) + (labels ((equal-enough (x y) + (cond ((eql x y)) + ((or (complexp x) + (complexp y)) + (or (eql (coerce x '(complex double-float)) + (coerce y '(complex double-float))) + (and (equal-enough (realpart x) (realpart y)) + (equal-enough (imagpart x) (imagpart y))))) + ((numberp x) + (or (eql (coerce x 'double-float) (coerce y 'double-float)) + (< (abs (- x y)) 1d-5))))) + (reflections (x) + (values x + (conjugate x) + (complex (- (realpart x)) (imagpart x)) + (- x))) + (compute (x y r) + (list (+ x y) (+ r x) (+ x r) + (- x y) (- r x) (- x r) + (* x y) (* x r) (* r x) + (unless (zerop y) + (/ x y)) + (unless (zerop r) + (/ x r)) + (unless (zerop x) + (/ r x)) + (conjugate x) (conjugate r) + (- x) + (complex r) (complex r r) (complex 0 r) + (= x y) (= r x) (= y r) (= x (complex 0 r)) + (eql x y) (eql x (complex r)) (eql y (complex r)) + (eql x (complex r r)) (eql y (complex 0 r)))) + (compute-all (x y r) + (multiple-value-bind (x1 x2 x3 x4) (reflections x) + (multiple-value-bind (y1 y2 y3 y4) (reflections y) + #.(let ((form '(list))) + (dolist (x '(x1 x2 x3 x4) (reverse form)) + (dolist (y '(y1 y2 y3 y4)) + (push `(list ,x ,y r + (append (compute ,x ,y r) + (compute ,x ,y (- r)))) + form)))))))) + (declare (inline reflections compute compute-all)) + (let* ((reals '(0 1 2)) + (complexes '#.(let ((reals '(0 1 2)) + (cpx '())) + (dolist (x reals (nreverse cpx)) + (dolist (y reals) + (push (complex x y) cpx))))) + (val ())) + (declare (notinline every)) + (dolist (r reals (nreverse val)) + (dolist (x complexes) + (dolist (y complexes) + (let ((value (compute-all x y r)) + (single (compute-all (coerce x '(complex single-float)) + (coerce y '(complex single-float)) + (coerce r 'single-float))) + (double (compute-all (coerce x '(complex double-float)) + (coerce y '(complex double-float)) + (coerce r 'double-float)))) + (assert (every (lambda (pos ref single double) + (every (lambda (ref single double) + (or (and (equal-enough ref single) + (equal-enough ref double)) + (and (not (numberp single)) ;; -ve 0s + (equal-enough single double)))) + (fourth ref) (fourth single) (fourth double))) + '((0 0) (0 1) (0 2) (0 3) + (1 0) (1 1) (1 2) (1 3) + (2 0) (2 1) (2 2) (2 3) + (3 0) (3 1) (3 2) (3 3)) + value single double))))))))) -- 1.7.10.4