From a157ed0be79751f85b8243c06102eea95af06aa3 Mon Sep 17 00:00:00 2001
From: Paul Khuong <pvk@pvk.ca>
Date: Thu, 25 Jun 2009 15:37:05 +0000
Subject: [PATCH] 1.0.29.44: Complex float improvements

* On all platforms:
 - Slightly more stable complex-complex float (double and single)
   division;
 - New transform for real-complex division;
 - complex-real and real-complex float addition and subtraction
   behave as though the real was first upgraded to a complex, thus
   losing the sign of any imaginary zero.

* On x86-64
 - Complexes floats are represented packed in a single SSE register;
 - VOPs for all four arithmetic operations, complex-complex, but also
   complex-real and real-complex, except for complex-complex and
   real-complex division;
 - VOPs for =, negate and conjugate of complexes (complex-real and
   complex-complex);
 - VOPs for EQL of floats (real and complexes).
 - Full register moves for float values in SSE registers should also
   speed scalar operations up.
---
 base-target-features.lisp-expr    |   12 +
 make-config.sh                    |    2 +-
 package-data-list.lisp-expr       |    3 +
 src/compiler/assem.lisp           |    2 +-
 src/compiler/float-tran.lisp      |   76 ++++-
 src/compiler/generic/genesis.lisp |   15 +-
 src/compiler/generic/objdef.lisp  |    6 +-
 src/compiler/generic/vm-tran.lisp |    3 +-
 src/compiler/srctran.lisp         |    8 +-
 src/compiler/x86-64/array.lisp    |   94 +-----
 src/compiler/x86-64/cell.lisp     |   90 +----
 src/compiler/x86-64/float.lisp    |  655 +++++++++++++++++++++++++++----------
 src/compiler/x86-64/insts.lisp    |    6 +
 src/compiler/x86-64/macros.lisp   |   10 +-
 src/compiler/x86-64/vm.lisp       |   38 +--
 src/runtime/print.c               |    8 +
 tests/arith.pure.lisp             |    1 -
 tests/float.pure.lisp             |   79 +++++
 18 files changed, 736 insertions(+), 372 deletions(-)

diff --git a/base-target-features.lisp-expr b/base-target-features.lisp-expr
index ba12b0d..f389507 100644
--- a/base-target-features.lisp-expr
+++ b/base-target-features.lisp-expr
@@ -155,6 +155,18 @@
  ;;
  ; :cycle-counter
 
+ ;; Enabled automatically for platforms which implement complex arithmetic
+ ;; VOPs. Such platforms should implement real-complex, complex-real and
+ ;; complex-complex addition and subtractions (for complex-single-float
+ ;; and complex-double-float). They should also also implement complex-real
+ ;; and real-complex multiplication, complex-real division, and
+ ;; sb!vm::swap-complex, which swaps the real and imaginary parts.
+ ;; Finally, they should implement conjugate and complex-real, real-complex
+ ;; and complex-complex CL:= (complex-complex EQL would usually be a good
+ ;; idea).
+ ;;
+ ; :complex-float-vops
+
  ;; Peter Van Eynde's increase-bulletproofness code for CMU CL
  ;;
  ;; Some of the code which was #+high-security before the fork has now
diff --git a/make-config.sh b/make-config.sh
index a43a059..468c79f 100644
--- a/make-config.sh
+++ b/make-config.sh
@@ -296,7 +296,7 @@ elif [ "$sbcl_arch" = "x86-64" ]; then
     printf ' :compare-and-swap-vops :unwind-to-frame-and-call-vop :raw-instance-init-vops' >> $ltf
     printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf
     printf ' :stack-allocatable-lists :stack-allocatable-fixed-objects' >> $ltf
-    printf ' :alien-callbacks :cycle-counter' >> $ltf
+    printf ' :alien-callbacks :cycle-counter :complex-float-vops' >> $ltf
 elif [ "$sbcl_arch" = "mips" ]; then
     printf ' :linkage-table' >> $ltf
     printf ' :stack-allocatable-closures :stack-allocatable-vectors' >> $ltf
diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr
index ed9736c..a7edc0d 100644
--- a/package-data-list.lisp-expr
+++ b/package-data-list.lisp-expr
@@ -2460,7 +2460,10 @@ structure representations"
                #!+long-float "COMPLEX-LONG-FLOAT-WIDETAG"
                #!+long-float "COMPLEX-LONG-REG-SC-NUMBER"
                #!+long-float "COMPLEX-LONG-STACK-SC-NUMBER"
+               #!-x86-64 #!-x86-64
                "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT"
+               #!+x86-64
+               "COMPLEX-SINGLE-FLOAT-DATA-SLOT"
                "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG"
                "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER"
                "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG"
diff --git a/src/compiler/assem.lisp b/src/compiler/assem.lisp
index 797fd83..5fa14f2 100644
--- a/src/compiler/assem.lisp
+++ b/src/compiler/assem.lisp
@@ -107,7 +107,7 @@
   #!+sb-dyncount
   (collect-dynamic-statistics nil))
 (sb!c::defprinter (segment)
-  name)
+  type)
 
 (declaim (inline segment-current-index))
 (defun segment-current-index (segment)
diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp
index 681aa48..c44d149 100644
--- a/src/compiler/float-tran.lisp
+++ b/src/compiler/float-tran.lisp
@@ -1178,27 +1178,45 @@
 ;;; of complex operation VOPs.
 (macrolet ((frob (type)
              `(progn
+                (deftransform complex ((r) (,type))
+                  '(complex r ,(coerce 0 type)))
+                (deftransform complex ((r i) (,type (and real (not ,type))))
+                  '(complex r (truly-the ,type (coerce i ',type))))
+                (deftransform complex ((r i) ((and real (not ,type)) ,type))
+                  '(complex (truly-the ,type (coerce r ',type)) i))
                ;; negation
+                #!-complex-float-vops
                (deftransform %negate ((z) ((complex ,type)) *)
                  '(complex (%negate (realpart z)) (%negate (imagpart z))))
                ;; complex addition and subtraction
+               #!-complex-float-vops
                (deftransform + ((w z) ((complex ,type) (complex ,type)) *)
                  '(complex (+ (realpart w) (realpart z))
                            (+ (imagpart w) (imagpart z))))
+               #!-complex-float-vops
                (deftransform - ((w z) ((complex ,type) (complex ,type)) *)
                  '(complex (- (realpart w) (realpart z))
                            (- (imagpart w) (imagpart z))))
                ;; Add and subtract a complex and a real.
+               #!-complex-float-vops
                (deftransform + ((w z) ((complex ,type) real) *)
-                 '(complex (+ (realpart w) z) (imagpart w)))
+                 `(complex (+ (realpart w) z)
+                           (+ (imagpart w) ,(coerce 0 ',type))))
+               #!-complex-float-vops
                (deftransform + ((z w) (real (complex ,type)) *)
-                 '(complex (+ (realpart w) z) (imagpart w)))
+                 `(complex (+ (realpart w) z)
+                           (+ (imagpart w) ,(coerce 0 ',type))))
                ;; Add and subtract a real and a complex number.
+               #!-complex-float-vops
                (deftransform - ((w z) ((complex ,type) real) *)
-                 '(complex (- (realpart w) z) (imagpart w)))
+                 `(complex (- (realpart w) z)
+                           (- (imagpart w) ,(coerce 0 ',type))))
+               #!-complex-float-vops
                (deftransform - ((z w) (real (complex ,type)) *)
-                 '(complex (- z (realpart w)) (- (imagpart w))))
+                 `(complex (- z (realpart w))
+                           (- ,(coerce 0 ',type) (imagpart w))))
                ;; Multiply and divide two complex numbers.
+               #!-complex-float-vops
                (deftransform * ((x y) ((complex ,type) (complex ,type)) *)
                  '(let* ((rx (realpart x))
                          (ix (imagpart x))
@@ -1207,39 +1225,81 @@
                     (complex (- (* rx ry) (* ix iy))
                              (+ (* rx iy) (* ix ry)))))
                (deftransform / ((x y) ((complex ,type) (complex ,type)) *)
+                 #!-complex-float-vops
                  '(let* ((rx (realpart x))
                          (ix (imagpart x))
                          (ry (realpart y))
                          (iy (imagpart y)))
                     (if (> (abs ry) (abs iy))
                         (let* ((r (/ iy ry))
-                               (dn (* ry (+ 1 (* r r)))))
+                               (dn (+ ry (* r iy))))
                           (complex (/ (+ rx (* ix r)) dn)
                                    (/ (- ix (* rx r)) dn)))
                         (let* ((r (/ ry iy))
-                               (dn (* iy (+ 1 (* r r)))))
+                               (dn (+ iy (* r ry))))
                           (complex (/ (+ (* rx r) ix) dn)
-                                   (/ (- (* ix r) rx) dn))))))
+                                   (/ (- (* ix r) rx) dn)))))
+                 #!+complex-float-vops
+                 `(let* ((cs (conjugate (sb!vm::swap-complex x)))
+                         (ry (realpart y))
+                         (iy (imagpart y)))
+                    (if (> (abs ry) (abs iy))
+                        (let* ((r (/ iy ry))
+                               (dn (+ ry (* r iy))))
+                          (/ (+ x (* cs r)) dn))
+                        (let* ((r (/ ry iy))
+                               (dn (+ iy (* r ry))))
+                          (/ (+ (* x r) cs) dn)))))
                ;; Multiply a complex by a real or vice versa.
+               #!-complex-float-vops
                (deftransform * ((w z) ((complex ,type) real) *)
                  '(complex (* (realpart w) z) (* (imagpart w) z)))
+               #!-complex-float-vops
                (deftransform * ((z w) (real (complex ,type)) *)
                  '(complex (* (realpart w) z) (* (imagpart w) z)))
-               ;; Divide a complex by a real.
+               ;; Divide a complex by a real or vice versa.
+               #!-complex-float-vops
                (deftransform / ((w z) ((complex ,type) real) *)
                  '(complex (/ (realpart w) z) (/ (imagpart w) z)))
+               (deftransform / ((x y) (,type (complex ,type)) *)
+                 #!-complex-float-vops
+                 '(let* ((ry (realpart y))
+                         (iy (imagpart y)))
+                    (if (> (abs ry) (abs iy))
+                        (let* ((r (/ iy ry))
+                               (dn (+ ry (* r iy))))
+                          (complex (/ x dn)
+                                   (/ (- (* x r)) dn)))
+                        (let* ((r (/ ry iy))
+                               (dn (+ iy (* r ry))))
+                          (complex (/ (* x r) dn)
+                                   (/ (- x) dn)))))
+                 #!+complex-float-vops
+                 '(let* ((ry (realpart y))
+                         (iy (imagpart y)))
+                   (if (> (abs ry) (abs iy))
+                       (let* ((r (/ iy ry))
+                              (dn (+ ry (* r iy))))
+                         (/ (complex x (- (* x r))) dn))
+                       (let* ((r (/ ry iy))
+                              (dn (+ iy (* r ry))))
+                         (/ (complex (* x r) (- x)) dn)))))
                ;; conjugate of complex number
+               #!-complex-float-vops
                (deftransform conjugate ((z) ((complex ,type)) *)
                  '(complex (realpart z) (- (imagpart z))))
                ;; CIS
                (deftransform cis ((z) ((,type)) *)
                  '(complex (cos z) (sin z)))
                ;; comparison
+               #!-complex-float-vops
                (deftransform = ((w z) ((complex ,type) (complex ,type)) *)
                  '(and (= (realpart w) (realpart z))
                        (= (imagpart w) (imagpart z))))
+               #!-complex-float-vops
                (deftransform = ((w z) ((complex ,type) real) *)
                  '(and (= (realpart w) z) (zerop (imagpart w))))
+               #!-complex-float-vops
                (deftransform = ((w z) (real (complex ,type)) *)
                  '(and (= (realpart z) w) (zerop (imagpart z)))))))
 
diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp
index bb2887c..4978acc 100644
--- a/src/compiler/generic/genesis.lisp
+++ b/src/compiler/generic/genesis.lisp
@@ -735,10 +735,17 @@ core and return a descriptor to it."
   (let ((des (allocate-unboxed-object *dynamic* sb!vm:n-word-bits
                                       (1- sb!vm:complex-single-float-size)
                                       sb!vm:complex-single-float-widetag)))
-    (write-wordindexed des sb!vm:complex-single-float-real-slot
-                   (make-random-descriptor (single-float-bits (realpart num))))
-    (write-wordindexed des sb!vm:complex-single-float-imag-slot
-                   (make-random-descriptor (single-float-bits (imagpart num))))
+    #!-x86-64
+    (progn
+      (write-wordindexed des sb!vm:complex-single-float-real-slot
+                         (make-random-descriptor (single-float-bits (realpart num))))
+      (write-wordindexed des sb!vm:complex-single-float-imag-slot
+                         (make-random-descriptor (single-float-bits (imagpart num)))))
+    #!+x86-64
+    (write-wordindexed des sb!vm:complex-single-float-data-slot
+                       (make-random-descriptor
+                        (logior (ldb (byte 32 0) (single-float-bits (realpart num)))
+                                (ash (single-float-bits (imagpart num)) 32))))
     des))
 
 (defun complex-double-float-to-core (num)
diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp
index b4d231b..d768bca 100644
--- a/src/compiler/generic/objdef.lisp
+++ b/src/compiler/generic/objdef.lisp
@@ -342,13 +342,17 @@
 (define-primitive-object (complex-single-float
                           :lowtag other-pointer-lowtag
                           :widetag complex-single-float-widetag)
+  #!+x86-64
+  (data :c-type "struct { float data[2]; } ")
+  #!-x86-64
   (real :c-type "float")
+  #!-x86-64
   (imag :c-type "float"))
 
 (define-primitive-object (complex-double-float
                           :lowtag other-pointer-lowtag
                           :widetag complex-double-float-widetag)
-  #!-x86-64 (filler)
+  (filler)
   (real :c-type "double" :length #!-x86-64 2 #!+x86-64 1)
   (imag :c-type "double" :length #!-x86-64 2 #!+x86-64 1))
 
diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp
index 5b37876..4614221 100644
--- a/src/compiler/generic/vm-tran.lisp
+++ b/src/compiler/generic/vm-tran.lisp
@@ -584,10 +584,11 @@
      (values)))
 
 ;;;; transforms for EQL of floating point values
-
+#!-x86-64
 (deftransform eql ((x y) (single-float single-float))
   '(= (single-float-bits x) (single-float-bits y)))
 
+#!-x86-64
 (deftransform eql ((x y) (double-float double-float))
   '(and (= (double-float-low-bits x) (double-float-low-bits y))
         (= (double-float-high-bits x) (double-float-high-bits y))))
diff --git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp
index d2f472c..db475c1 100644
--- a/src/compiler/srctran.lisp
+++ b/src/compiler/srctran.lisp
@@ -3499,7 +3499,13 @@
     (cond ((or (and (csubtypep x-type (specifier-type 'float))
                     (csubtypep y-type (specifier-type 'float)))
                (and (csubtypep x-type (specifier-type '(complex float)))
-                    (csubtypep y-type (specifier-type '(complex float)))))
+                    (csubtypep y-type (specifier-type '(complex float))))
+               #!+complex-float-vops
+               (and (csubtypep x-type (specifier-type '(or single-float (complex single-float))))
+                    (csubtypep y-type (specifier-type '(or single-float (complex single-float)))))
+               #!+complex-float-vops
+               (and (csubtypep x-type (specifier-type '(or double-float (complex double-float))))
+                    (csubtypep y-type (specifier-type '(or double-float (complex double-float))))))
            ;; They are both floats. Leave as = so that -0.0 is
            ;; handled correctly.
            (give-up-ir1-transform))
diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp
index bf50ef8..2dc0a40 100644
--- a/src/compiler/x86-64/array.lisp
+++ b/src/compiler/x86-64/array.lisp
@@ -394,8 +394,7 @@
    (move dword-index index)
    (inst shr dword-index 1)
    (inst movss (make-ea-for-float-ref object dword-index offset 4) value)
-   (unless (location= result value)
-     (inst movss result value))))
+   (move result value)))
 
 (define-vop (data-vector-set-c-with-offset/simple-array-single-float)
   (:note "inline array store")
@@ -412,8 +411,7 @@
   (:result-types single-float)
   (:generator 4
    (inst movss (make-ea-for-float-ref object index offset 4) value)
-   (unless (location= result value)
-     (inst movss result value))))
+   (move result value)))
 
 (define-vop (data-vector-ref-with-offset/simple-array-double-float)
   (:note "inline array access")
@@ -460,8 +458,7 @@
   (:result-types double-float)
   (:generator 20
    (inst movsd (make-ea-for-float-ref object index offset 8) value)
-   (unless (location= result value)
-     (inst movsd result value))))
+   (move result value)))
 
 (define-vop (data-vector-set-c-with-offset/simple-array-double-float)
   (:note "inline array store")
@@ -478,8 +475,7 @@
   (:result-types double-float)
   (:generator 19
    (inst movsd (make-ea-for-float-ref object index offset 8) value)
-   (unless (location= result value)
-     (inst movsd result value))))
+   (move result value)))
 
 
 ;;; complex float variants
@@ -497,11 +493,7 @@
   (:results (value :scs (complex-single-reg)))
   (:result-types complex-single-float)
   (:generator 5
-    (let ((real-tn (complex-single-reg-real-tn value)))
-      (inst movss real-tn (make-ea-for-float-ref object index offset 8)))
-    (let ((imag-tn (complex-single-reg-imag-tn value)))
-      (inst movss imag-tn (make-ea-for-float-ref object index offset 8
-                                                 :complex-offset 4)))))
+    (inst movq value (make-ea-for-float-ref object index offset 8))))
 
 (define-vop (data-vector-ref-c-with-offset/simple-array-complex-single-float)
   (:note "inline array access")
@@ -515,11 +507,7 @@
   (:results (value :scs (complex-single-reg)))
   (:result-types complex-single-float)
   (:generator 4
-    (let ((real-tn (complex-single-reg-real-tn value)))
-      (inst movss real-tn (make-ea-for-float-ref object index offset 8)))
-    (let ((imag-tn (complex-single-reg-imag-tn value)))
-      (inst movss imag-tn (make-ea-for-float-ref object index offset 8
-                                                 :complex-offset 4)))))
+    (inst movq value (make-ea-for-float-ref object index offset 8))))
 
 (define-vop (data-vector-set-with-offset/simple-array-complex-single-float)
   (:note "inline array store")
@@ -536,18 +524,8 @@
   (:results (result :scs (complex-single-reg)))
   (:result-types complex-single-float)
   (:generator 5
-    (let ((value-real (complex-single-reg-real-tn value))
-          (result-real (complex-single-reg-real-tn result)))
-      (inst movss (make-ea-for-float-ref object index offset 8) value-real)
-      (unless (location= value-real result-real)
-        (inst movss result-real value-real)))
-    (let ((value-imag (complex-single-reg-imag-tn value))
-          (result-imag (complex-single-reg-imag-tn result)))
-      (inst movss (make-ea-for-float-ref object index offset 8
-                                         :complex-offset 4)
-            value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movss result-imag value-imag)))))
+    (move result value)
+    (inst movq (make-ea-for-float-ref object index offset 8) value)))
 
 (define-vop (data-vector-set-c-with-offset/simple-array-complex-single-float)
   (:note "inline array store")
@@ -563,18 +541,8 @@
   (:results (result :scs (complex-single-reg)))
   (:result-types complex-single-float)
   (:generator 4
-    (let ((value-real (complex-single-reg-real-tn value))
-          (result-real (complex-single-reg-real-tn result)))
-      (inst movss (make-ea-for-float-ref object index offset 8) value-real)
-      (unless (location= value-real result-real)
-        (inst movss result-real value-real)))
-    (let ((value-imag (complex-single-reg-imag-tn value))
-          (result-imag (complex-single-reg-imag-tn result)))
-      (inst movss (make-ea-for-float-ref object index offset 8
-                                         :complex-offset 4)
-            value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movss result-imag value-imag)))))
+    (move result value)
+    (inst movq (make-ea-for-float-ref object index offset 8) value)))
 
 (define-vop (data-vector-ref-with-offset/simple-array-complex-double-float)
   (:note "inline array access")
@@ -589,11 +557,7 @@
   (:results (value :scs (complex-double-reg)))
   (:result-types complex-double-float)
   (:generator 7
-    (let ((real-tn (complex-double-reg-real-tn value)))
-      (inst movsd real-tn (make-ea-for-float-ref object index offset 16 :scale 2)))
-    (let ((imag-tn (complex-double-reg-imag-tn value)))
-      (inst movsd imag-tn (make-ea-for-float-ref object index offset 16 :scale 2
-                                                 :complex-offset 8)))))
+    (inst movapd value (make-ea-for-float-ref object index offset 16 :scale 2))))
 
 (define-vop (data-vector-ref-c-with-offset/simple-array-complex-double-float)
   (:note "inline array access")
@@ -607,11 +571,7 @@
   (:results (value :scs (complex-double-reg)))
   (:result-types complex-double-float)
   (:generator 6
-    (let ((real-tn (complex-double-reg-real-tn value)))
-      (inst movsd real-tn (make-ea-for-float-ref object index offset 16 :scale 2)))
-    (let ((imag-tn (complex-double-reg-imag-tn value)))
-      (inst movsd imag-tn (make-ea-for-float-ref object index offset 16 :scale 2
-                                                 :complex-offset 8)))))
+    (inst movapd value (make-ea-for-float-ref object index offset 16 :scale 2))))
 
 (define-vop (data-vector-set-with-offset/simple-array-complex-double-float)
   (:note "inline array store")
@@ -628,19 +588,8 @@
   (:results (result :scs (complex-double-reg)))
   (:result-types complex-double-float)
   (:generator 20
-    (let ((value-real (complex-double-reg-real-tn value))
-          (result-real (complex-double-reg-real-tn result)))
-      (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2)
-            value-real)
-      (unless (location= value-real result-real)
-        (inst movsd result-real value-real)))
-    (let ((value-imag (complex-double-reg-imag-tn value))
-          (result-imag (complex-double-reg-imag-tn result)))
-      (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2
-                                                 :complex-offset 8)
-            value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movsd result-imag value-imag)))))
+    (inst movapd (make-ea-for-float-ref object index offset 16 :scale 2) value)
+    (move result value)))
 
 (define-vop (data-vector-set-c-with-offset/simple-array-complex-double-float)
   (:note "inline array store")
@@ -656,19 +605,8 @@
   (:results (result :scs (complex-double-reg)))
   (:result-types complex-double-float)
   (:generator 19
-    (let ((value-real (complex-double-reg-real-tn value))
-          (result-real (complex-double-reg-real-tn result)))
-      (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2)
-            value-real)
-      (unless (location= value-real result-real)
-        (inst movsd result-real value-real)))
-    (let ((value-imag (complex-double-reg-imag-tn value))
-          (result-imag (complex-double-reg-imag-tn result)))
-      (inst movsd (make-ea-for-float-ref object index offset 16 :scale 2
-                                                 :complex-offset 8)
-            value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movsd result-imag value-imag)))))
+    (inst movapd (make-ea-for-float-ref object index offset 16 :scale 2) value)
+    (move result value)))
 
 
 
diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp
index b7687f7..632e19e 100644
--- a/src/compiler/x86-64/cell.lisp
+++ b/src/compiler/x86-64/cell.lisp
@@ -662,8 +662,7 @@
     (inst shl tmp 3)
     (inst sub tmp index)
     (inst movss (make-ea-for-raw-slot object index tmp) value)
-   (unless (location= result value)
-     (inst movss result value))))
+    (move result value)))
 
 (define-vop (raw-instance-set-c/single)
   (:translate %raw-instance-set/single)
@@ -682,8 +681,7 @@
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
     (inst movss (make-ea-for-raw-slot object index tmp) value)
-   (unless (location= result value)
-     (inst movss result value))))
+    (move result value)))
 
 (define-vop (raw-instance-init/single)
   (:args (object :scs (descriptor-reg))
@@ -741,8 +739,7 @@
     (inst shl tmp 3)
     (inst sub tmp index)
     (inst movsd (make-ea-for-raw-slot object index tmp) value)
-   (unless (location= result value)
-     (inst movsd result value))))
+    (move result value)))
 
 (define-vop (raw-instance-set-c/double)
   (:translate %raw-instance-set/double)
@@ -761,8 +758,7 @@
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
     (inst movsd (make-ea-for-raw-slot object index tmp) value)
-   (unless (location= result value)
-     (inst movsd result value))))
+    (move result value)))
 
 (define-vop (raw-instance-init/double)
   (:args (object :scs (descriptor-reg))
@@ -786,10 +782,7 @@
     (inst shr tmp n-widetag-bits)
     (inst shl tmp 3)
     (inst sub tmp index)
-    (let ((real-tn (complex-single-reg-real-tn value)))
-      (inst movss real-tn (make-ea-for-raw-slot object index tmp)))
-    (let ((imag-tn (complex-single-reg-imag-tn value)))
-      (inst movss imag-tn (make-ea-for-raw-slot object index tmp 4)))))
+    (inst movq value (make-ea-for-raw-slot object index tmp))))
 
 (define-vop (raw-instance-ref-c/complex-single)
   (:translate %raw-instance-ref/complex-single)
@@ -805,10 +798,7 @@
   (:generator 4
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
-    (let ((real-tn (complex-single-reg-real-tn value)))
-      (inst movss real-tn (make-ea-for-raw-slot object index tmp)))
-    (let ((imag-tn (complex-single-reg-imag-tn value)))
-      (inst movss imag-tn (make-ea-for-raw-slot object index tmp 4)))))
+    (inst movq value (make-ea-for-raw-slot object index tmp))))
 
 (define-vop (raw-instance-set/complex-single)
   (:translate %raw-instance-set/complex-single)
@@ -825,16 +815,8 @@
     (inst shr tmp n-widetag-bits)
     (inst shl tmp 3)
     (inst sub tmp index)
-    (let ((value-real (complex-single-reg-real-tn value))
-          (result-real (complex-single-reg-real-tn result)))
-      (inst movss (make-ea-for-raw-slot object index tmp) value-real)
-      (unless (location= value-real result-real)
-        (inst movss result-real value-real)))
-    (let ((value-imag (complex-single-reg-imag-tn value))
-          (result-imag (complex-single-reg-imag-tn result)))
-      (inst movss (make-ea-for-raw-slot object index tmp 4) value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movss result-imag value-imag)))))
+    (move result value)
+    (inst movq (make-ea-for-raw-slot object index tmp) value)))
 
 (define-vop (raw-instance-set-c/complex-single)
   (:translate %raw-instance-set/complex-single)
@@ -852,16 +834,8 @@
   (:generator 4
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
-    (let ((value-real (complex-single-reg-real-tn value))
-          (result-real (complex-single-reg-real-tn result)))
-      (inst movss (make-ea-for-raw-slot object index tmp) value-real)
-      (unless (location= value-real result-real)
-        (inst movss result-real value-real)))
-    (let ((value-imag (complex-single-reg-imag-tn value))
-          (result-imag (complex-single-reg-imag-tn result)))
-      (inst movss (make-ea-for-raw-slot object index tmp 4) value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movss result-imag value-imag)))))
+    (move result value)
+    (inst movq (make-ea-for-raw-slot object index tmp) value)))
 
 (define-vop (raw-instance-init/complex-single)
   (:args (object :scs (descriptor-reg))
@@ -869,10 +843,7 @@
   (:arg-types * complex-single-float)
   (:info instance-length index)
   (:generator 4
-    (let ((value-real (complex-single-reg-real-tn value)))
-      (inst movss (make-ea-for-raw-slot object index instance-length) value-real))
-    (let ((value-imag (complex-single-reg-imag-tn value)))
-      (inst movss (make-ea-for-raw-slot object index instance-length 4) value-imag))))
+    (inst movq (make-ea-for-raw-slot object index instance-length) value)))
 
 (define-vop (raw-instance-ref/complex-double)
   (:translate %raw-instance-ref/complex-double)
@@ -888,10 +859,7 @@
     (inst shr tmp n-widetag-bits)
     (inst shl tmp 3)
     (inst sub tmp index)
-    (let ((real-tn (complex-double-reg-real-tn value)))
-      (inst movsd real-tn (make-ea-for-raw-slot object index tmp -8)))
-    (let ((imag-tn (complex-double-reg-imag-tn value)))
-      (inst movsd imag-tn (make-ea-for-raw-slot object index tmp)))))
+    (inst movdqu value (make-ea-for-raw-slot object index tmp -8))))
 
 (define-vop (raw-instance-ref-c/complex-double)
   (:translate %raw-instance-ref/complex-double)
@@ -907,10 +875,7 @@
   (:generator 4
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
-    (let ((real-tn (complex-double-reg-real-tn value)))
-      (inst movsd real-tn (make-ea-for-raw-slot object index tmp -8)))
-    (let ((imag-tn (complex-double-reg-imag-tn value)))
-      (inst movsd imag-tn (make-ea-for-raw-slot object index tmp)))))
+    (inst movdqu value (make-ea-for-raw-slot object index tmp -8))))
 
 (define-vop (raw-instance-set/complex-double)
   (:translate %raw-instance-set/complex-double)
@@ -927,16 +892,8 @@
     (inst shr tmp n-widetag-bits)
     (inst shl tmp 3)
     (inst sub tmp index)
-    (let ((value-real (complex-double-reg-real-tn value))
-          (result-real (complex-double-reg-real-tn result)))
-      (inst movsd (make-ea-for-raw-slot object index tmp -8) value-real)
-      (unless (location= value-real result-real)
-        (inst movsd result-real value-real)))
-    (let ((value-imag (complex-double-reg-imag-tn value))
-          (result-imag (complex-double-reg-imag-tn result)))
-      (inst movsd (make-ea-for-raw-slot object index tmp) value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movsd result-imag value-imag)))))
+    (move result value)
+    (inst movdqu (make-ea-for-raw-slot object index tmp -8) value)))
 
 (define-vop (raw-instance-set-c/complex-double)
   (:translate %raw-instance-set/complex-double)
@@ -954,16 +911,8 @@
   (:generator 4
     (loadw tmp object 0 instance-pointer-lowtag)
     (inst shr tmp n-widetag-bits)
-    (let ((value-real (complex-double-reg-real-tn value))
-          (result-real (complex-double-reg-real-tn result)))
-      (inst movsd (make-ea-for-raw-slot object index tmp -8) value-real)
-      (unless (location= value-real result-real)
-        (inst movsd result-real value-real)))
-    (let ((value-imag (complex-double-reg-imag-tn value))
-          (result-imag (complex-double-reg-imag-tn result)))
-      (inst movsd (make-ea-for-raw-slot object index tmp) value-imag)
-      (unless (location= value-imag result-imag)
-        (inst movsd result-imag value-imag)))))
+    (move result value)
+    (inst movdqu (make-ea-for-raw-slot object index tmp -8) value)))
 
 (define-vop (raw-instance-init/complex-double)
   (:args (object :scs (descriptor-reg))
@@ -971,7 +920,4 @@
   (:arg-types * complex-double-float)
   (:info instance-length index)
   (:generator 4
-    (let ((value-real (complex-double-reg-real-tn value)))
-      (inst movsd (make-ea-for-raw-slot object index instance-length -8) value-real))
-    (let ((value-imag (complex-double-reg-imag-tn value)))
-      (inst movsd (make-ea-for-raw-slot object index instance-length) value-imag))))
+    (inst movdqu (make-ea-for-raw-slot object index instance-length -8) value)))
diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp
index 23eef99..286889a 100644
--- a/src/compiler/x86-64/float.lisp
+++ b/src/compiler/x86-64/float.lisp
@@ -19,10 +19,15 @@
   (defun ea-for-df-desc (tn)
     (ea-for-xf-desc tn double-float-value-slot))
   ;; complex floats
+  (defun ea-for-csf-data-desc (tn)
+    (ea-for-xf-desc tn complex-single-float-data-slot))
   (defun ea-for-csf-real-desc (tn)
-    (ea-for-xf-desc tn complex-single-float-real-slot))
+    (ea-for-xf-desc tn complex-single-float-data-slot))
   (defun ea-for-csf-imag-desc (tn)
-    (ea-for-xf-desc tn complex-single-float-imag-slot))
+    (ea-for-xf-desc tn (+ complex-single-float-data-slot 1/2)))
+
+  (defun ea-for-cdf-data-desc (tn)
+    (ea-for-xf-desc tn complex-double-float-real-slot))
   (defun ea-for-cdf-real-desc (tn)
     (ea-for-xf-desc tn complex-double-float-real-slot))
   (defun ea-for-cdf-imag-desc (tn)
@@ -40,7 +45,6 @@
 
 ;;; complex float stack EAs
 (macrolet ((ea-for-cxf-stack (tn kind slot &optional base)
-             (declare (ignore kind))
              `(make-ea
                :qword :base ,base
                :disp (frame-byte-offset
@@ -50,16 +54,28 @@
                              ((= (tn-offset ,base) rbp-offset)
                               0)
                              (t (error "Unexpected offset.")))
-                       (ecase ,slot (:real 0) (:imag 1)))))))
+                       (ecase ,kind
+                         (:single
+                            (ecase ,slot
+                              (:real 0)
+                              (:imag -1/2)))
+                         (:double
+                            (ecase ,slot
+                              (:real 1)
+                              (:imag 0)))))))))
+  (defun ea-for-csf-data-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :single :real base))
   (defun ea-for-csf-real-stack (tn &optional (base rbp-tn))
     (ea-for-cxf-stack tn :single :real base))
   (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn))
     (ea-for-cxf-stack tn :single :imag base))
+
+  (defun ea-for-cdf-data-stack (tn &optional (base rbp-tn))
+    (ea-for-cxf-stack tn :double :real base))
   (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn))
     (ea-for-cxf-stack tn :double :real base))
   (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn))
     (ea-for-cxf-stack tn :double :imag base)))
-
 
 ;;;; move functions
 
@@ -67,11 +83,13 @@
 
 (define-move-fun (load-fp-zero 1) (vop x y)
   ((fp-single-zero) (single-reg)
-   (fp-double-zero) (double-reg))
+   (fp-double-zero) (double-reg)
+   (fp-complex-single-zero) (complex-single-reg)
+   (fp-complex-double-zero) (complex-double-reg))
   (identity x)
   (sc-case y
-    (single-reg (inst xorps y y))
-    (double-reg (inst xorpd y y))))
+    ((single-reg complex-single-reg) (inst xorps y y))
+    ((double-reg complex-double-reg) (inst xorpd y y))))
 
 (define-move-fun (load-single 2) (vop x y)
   ((single-stack) (single-reg))
@@ -94,49 +112,22 @@
 
 ;;;; complex float move functions
 
-(defun complex-single-reg-real-tn (x)
-  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg)
-                  :offset (tn-offset x)))
-(defun complex-single-reg-imag-tn (x)
-  (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg)
-                  :offset (1+ (tn-offset x))))
-
-(defun complex-double-reg-real-tn (x)
-  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg)
-                  :offset (tn-offset x)))
-(defun complex-double-reg-imag-tn (x)
-  (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg)
-                  :offset (1+ (tn-offset x))))
-
 ;;; X is source, Y is destination.
 (define-move-fun (load-complex-single 2) (vop x y)
   ((complex-single-stack) (complex-single-reg))
-  (let ((real-tn (complex-single-reg-real-tn y)))
-    (inst movss real-tn (ea-for-csf-real-stack x)))
-  (let ((imag-tn (complex-single-reg-imag-tn y)))
-    (inst movss imag-tn (ea-for-csf-imag-stack x))))
+  (inst movq y (ea-for-csf-data-stack x)))
 
 (define-move-fun (store-complex-single 2) (vop x y)
   ((complex-single-reg) (complex-single-stack))
-  (let ((real-tn (complex-single-reg-real-tn x))
-        (imag-tn (complex-single-reg-imag-tn x)))
-    (inst movss (ea-for-csf-real-stack y) real-tn)
-    (inst movss (ea-for-csf-imag-stack y) imag-tn)))
+  (inst movq (ea-for-csf-data-stack y) x))
 
 (define-move-fun (load-complex-double 2) (vop x y)
   ((complex-double-stack) (complex-double-reg))
-  (let ((real-tn (complex-double-reg-real-tn y)))
-    (inst movsd real-tn (ea-for-cdf-real-stack x)))
-  (let ((imag-tn (complex-double-reg-imag-tn y)))
-    (inst movsd imag-tn (ea-for-cdf-imag-stack x))))
+  (inst movupd y (ea-for-cdf-data-stack x)))
 
 (define-move-fun (store-complex-double 2) (vop x y)
   ((complex-double-reg) (complex-double-stack))
-  (let ((real-tn (complex-double-reg-real-tn x))
-        (imag-tn (complex-double-reg-imag-tn x)))
-    (inst movsd (ea-for-cdf-real-stack y) real-tn)
-    (inst movsd (ea-for-cdf-imag-stack y) imag-tn)))
-
+  (inst movupd (ea-for-cdf-data-stack y) x))
 
 ;;;; move VOPs
 
@@ -151,43 +142,12 @@
                                :load-if (not (location= x y))))
                   (:note "float move")
                   (:generator 0
-                    (unless (location= y x)
-                      (inst movq y x))))
+                    (move y x)))
                 (define-move-vop ,vop :move (,sc) (,sc)))))
   (frob single-move single-reg)
-  (frob double-move double-reg))
-
-;;; complex float register to register moves
-(define-vop (complex-float-move)
-  (:args (x :target y :load-if (not (location= x y))))
-  (:results (y :load-if (not (location= x y))))
-  (:note "complex float move")
-  (:generator 0
-     (unless (location= x y)
-       ;; Note the complex-float-regs are aligned to every second
-       ;; float register so there is not need to worry about overlap.
-       ;; (It would be better to put the imagpart in the top half of the
-       ;; register, or something, but let's worry about that later)
-       (let ((x-real (complex-single-reg-real-tn x))
-             (y-real (complex-single-reg-real-tn y)))
-         (inst movq y-real x-real))
-       (let ((x-imag (complex-single-reg-imag-tn x))
-             (y-imag (complex-single-reg-imag-tn y)))
-         (inst movq y-imag x-imag)))))
-
-(define-vop (complex-single-move complex-float-move)
-  (:args (x :scs (complex-single-reg) :target y
-            :load-if (not (location= x y))))
-  (:results (y :scs (complex-single-reg) :load-if (not (location= x y)))))
-(define-move-vop complex-single-move :move
-  (complex-single-reg) (complex-single-reg))
-
-(define-vop (complex-double-move complex-float-move)
-  (:args (x :scs (complex-double-reg)
-            :target y :load-if (not (location= x y))))
-  (:results (y :scs (complex-double-reg) :load-if (not (location= x y)))))
-(define-move-vop complex-double-move :move
-  (complex-double-reg) (complex-double-reg))
+  (frob double-move double-reg)
+  (frob complex-single-move complex-single-reg)
+  (frob complex-double-move complex-double-reg))
 
 
 ;;; Move from float to a descriptor reg. allocating a new float
@@ -252,10 +212,7 @@
                              complex-single-float-widetag
                              complex-single-float-size
                              node)
-       (let ((real-tn (complex-single-reg-real-tn x)))
-         (inst movss (ea-for-csf-real-desc y) real-tn))
-       (let ((imag-tn (complex-single-reg-imag-tn x)))
-         (inst movss (ea-for-csf-imag-desc y) imag-tn)))))
+       (inst movq (ea-for-csf-data-desc y) x))))
 (define-move-vop move-from-complex-single :move
   (complex-single-reg) (descriptor-reg))
 
@@ -269,10 +226,7 @@
                              complex-double-float-widetag
                              complex-double-float-size
                              node)
-       (let ((real-tn (complex-double-reg-real-tn x)))
-         (inst movsd (ea-for-cdf-real-desc y) real-tn))
-       (let ((imag-tn (complex-double-reg-imag-tn x)))
-         (inst movsd (ea-for-cdf-imag-desc y) imag-tn)))))
+       (inst movapd (ea-for-cdf-data-desc y) x))))
 (define-move-vop move-from-complex-double :move
   (complex-double-reg) (descriptor-reg))
 
@@ -284,20 +238,11 @@
                   (:results (y :scs (,sc)))
                   (:note "pointer to complex float coercion")
                   (:generator 2
-                    (let ((real-tn (complex-double-reg-real-tn y)))
-                      ,@(ecase
-                         format
-                         (:single
-                          '((inst movss real-tn (ea-for-csf-real-desc x))))
-                         (:double
-                          '((inst movsd real-tn (ea-for-cdf-real-desc x))))))
-                    (let ((imag-tn (complex-double-reg-imag-tn y)))
-                      ,@(ecase
-                         format
-                         (:single
-                          '((inst movss imag-tn (ea-for-csf-imag-desc x))))
-                         (:double
-                          '((inst movsd imag-tn (ea-for-cdf-imag-desc x))))))))
+                    ,(ecase format
+                      (:single
+                         '(inst movq y (ea-for-csf-data-desc x)))
+                      (:double
+                         '(inst movapd y (ea-for-cdf-data-desc x))))))
                 (define-move-vop ,name :move (descriptor-reg) (,sc)))))
   (frob move-to-complex-single complex-single-reg :single)
   (frob move-to-complex-double complex-double-reg :double))
@@ -319,8 +264,7 @@
                   (:generator ,(case format (:single 2) (:double 3) )
                     (sc-case y
                       (,sc
-                       (unless (location= x y)
-                         (inst movq y x)))
+                       (move y x))
                       (,stack-sc
                        (if (= (tn-offset fp) esp-offset)
                            (let* ((offset (* (tn-offset y) n-word-bytes))
@@ -351,32 +295,13 @@
                   (:generator ,(ecase format (:single 2) (:double 3))
                     (sc-case y
                       (,sc
-                       (unless (location= x y)
-                         (let ((x-real (complex-double-reg-real-tn x))
-                               (y-real (complex-double-reg-real-tn y)))
-                           (inst movsd y-real x-real))
-                         (let ((x-imag (complex-double-reg-imag-tn x))
-                               (y-imag (complex-double-reg-imag-tn y)))
-                           (inst movsd y-imag x-imag))))
+                       (move y x))
                       (,stack-sc
-                       (let ((real-tn (complex-double-reg-real-tn x)))
-                         ,@(ecase format
-                                  (:single
-                                   '((inst movss
-                                      (ea-for-csf-real-stack y fp)
-                                      real-tn)))
-                                  (:double
-                                   '((inst movsd
-                                      (ea-for-cdf-real-stack y fp)
-                                      real-tn)))))
-                       (let ((imag-tn (complex-double-reg-imag-tn x)))
-                         ,@(ecase format
-                                  (:single
-                                   '((inst movss
-                                      (ea-for-csf-imag-stack y fp) imag-tn)))
-                                  (:double
-                                   '((inst movsd
-                                      (ea-for-cdf-imag-stack y fp) imag-tn)))))))))
+                       ,(ecase format
+                          (:single
+                             '(inst movq (ea-for-csf-data-stack y fp) x))
+                          (:double
+                             '(inst movupd (ea-for-cdf-data-stack y fp) x)))))))
                 (define-move-vop ,name :move-arg
                   (,sc descriptor-reg) (,sc)))))
   (frob move-complex-single-float-arg
@@ -408,9 +333,11 @@
                 (:arg-types ,ptype ,ptype)
                 (:result-types ,ptype))))
   (frob single-float-op single-reg single-float)
-  (frob double-float-op double-reg double-float))
+  (frob double-float-op double-reg double-float)
+  (frob complex-single-float-op complex-single-reg complex-single-float)
+  (frob complex-double-float-op complex-double-reg complex-double-float))
 
-(macrolet ((generate (movinst opinst commutative)
+(macrolet ((generate (opinst commutative)
              `(progn
                 (cond
                   ((location= x r)
@@ -418,29 +345,280 @@
                   ((and ,commutative (location= y r))
                    (inst ,opinst y x))
                   ((not (location= r y))
-                   (inst ,movinst r x)
+                   (move r x)
                    (inst ,opinst r y))
                   (t
-                   (inst ,movinst tmp x)
+                   (move tmp x)
                    (inst ,opinst tmp y)
-                   (inst ,movinst r tmp)))))
-           (frob (op sinst sname scost dinst dname dcost commutative)
+                   (move r tmp)))))
+           (frob (op sinst sname scost dinst dname dcost commutative
+                     &optional csinst csname cscost cdinst cdname cdcost)
              `(progn
                 (define-vop (,sname single-float-op)
                     (:translate ,op)
                   (:temporary (:sc single-reg) tmp)
                   (:generator ,scost
-                    (generate movss ,sinst ,commutative)))
+                    (generate ,sinst ,commutative)))
                 (define-vop (,dname double-float-op)
                   (:translate ,op)
-                  (:temporary (:sc single-reg) tmp)
+                  (:temporary (:sc double-reg) tmp)
                   (:generator ,dcost
-                    (generate movsd ,dinst ,commutative))))))
-  (frob + addss +/single-float 2 addsd +/double-float 2 t)
-  (frob - subss -/single-float 2 subsd -/double-float 2 nil)
+                    (generate ,dinst ,commutative)))
+                ,(when csinst
+                   `(define-vop (,csname complex-single-float-op)
+                      (:translate ,op)
+                      (:temporary (:sc complex-single-reg) tmp)
+                      (:generator ,cscost
+                        (generate ,csinst ,commutative))))
+                ,(when cdinst
+                   `(define-vop (,cdname complex-double-float-op)
+                      (:translate ,op)
+                      (:temporary (:sc complex-double-reg) tmp)
+                      (:generator ,cdcost
+                        (generate ,cdinst ,commutative)))))))
+  (frob + addss +/single-float 2 addsd +/double-float 2 t
+        addps +/complex-single-float 3 addpd +/complex-double-float 3)
+  (frob - subss -/single-float 2 subsd -/double-float 2 nil
+        subps -/complex-single-float 3 subpd -/complex-double-float 3)
   (frob * mulss */single-float 4 mulsd */double-float 5 t)
   (frob / divss //single-float 12 divsd //double-float 19 nil))
 
+(macrolet ((frob (op cost commutativep
+                     duplicate-inst op-inst
+                     real-sc real-type complex-sc complex-type
+                     real-complex-name complex-real-name)
+             (cond ((not duplicate-inst) ; simple case
+                    `(progn
+                       ,(when real-complex-name
+                          `(define-vop (,real-complex-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,real-sc)    :target r)
+                                    (y :scs (,complex-sc)
+                                       ,@(when commutativep '(:target r))))
+                             (:arg-types ,real-type ,complex-type)
+                             (:results (r :scs (,complex-sc)
+                                          ,@(unless commutativep '(:from (:argument 0)))))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                               ,(when commutativep
+                                  `(when (location= y r)
+                                     (rotatef x y)))
+                               (move r x)
+                               (inst ,op-inst r y))))
+
+                       ,(when complex-real-name
+                          `(define-vop (,complex-real-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,complex-sc) :target r)
+                                    (y :scs (,real-sc)
+                                       ,@(when commutativep '(:target r))))
+                             (:arg-types ,complex-type ,real-type)
+                             (:results (r :scs (,complex-sc)
+                                          ,@(unless commutativep '(:from (:argument 0)))))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                               ,(when commutativep
+                                  `(when (location= y r)
+                                     (rotatef x y)))
+                               (move r x)
+                               (inst ,op-inst r y))))))
+                   (commutativep ; must duplicate, but commutative
+                    `(progn
+                       ,(when real-complex-name
+                          `(define-vop (,real-complex-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,real-sc)    :target dup)
+                                    (y :scs (,complex-sc) :target r
+                                       :to  :result))
+                             (:arg-types ,real-type ,complex-type)
+                             (:temporary (:sc ,complex-sc :target r
+                                          :from (:argument 0)
+                                          :to   :result)
+                                         dup)
+                             (:results (r :scs (,complex-sc)))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                                (let ((real x))
+                                  ,duplicate-inst)
+                                ;; safe: dup /= y
+                                (when (location= dup r)
+                                  (rotatef dup y))
+                                (move r y)
+                                (inst ,op-inst r dup))))
+
+                       ,(when complex-real-name
+                          `(define-vop (,complex-real-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,complex-sc) :target r
+                                       :to  :result)
+                                    (y :scs (,real-sc)    :target dup))
+                             (:arg-types ,complex-type ,real-type)
+                             (:temporary (:sc ,complex-sc :target r
+                                          :from (:argument 1)
+                                          :to :result)
+                                         dup)
+                             (:results (r :scs (,complex-sc)))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                                (let ((real y))
+                                  ,duplicate-inst)
+                                (when (location= dup r)
+                                  (rotatef x dup))
+                                (move r x)
+                                (inst ,op-inst r dup))))))
+                   (t ; duplicate, not commutative
+                    `(progn
+                       ,(when real-complex-name
+                          `(define-vop (,real-complex-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,real-sc)
+                                       :target r)
+                                    (y :scs (,complex-sc) :to :result))
+                             (:arg-types ,real-type ,complex-type)
+                             (:results (r :scs (,complex-sc) :from (:argument 0)))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                               (let ((real x)
+                                     (dup  r))
+                                 ,duplicate-inst)
+                               (inst ,op-inst r y))))
+
+                       ,(when complex-real-name
+                          `(define-vop (,complex-real-name float-op)
+                             (:translate ,op)
+                             (:args (x :scs (,complex-sc) :target r
+                                       :to :eval)
+                                    (y :scs (,real-sc)    :target dup))
+                             (:arg-types ,complex-type ,real-type)
+                             (:temporary (:sc ,complex-sc :from (:argument 1))
+                                         dup)
+                             (:results (r :scs (,complex-sc) :from :eval))
+                             (:result-types ,complex-type)
+                             (:generator ,cost
+                               (let ((real y))
+                                 ,duplicate-inst)
+                               (move r x)
+                               (inst ,op-inst r dup))))))))
+           (def-real-complex-op (op commutativep duplicatep
+                                    single-inst single-real-complex-name single-complex-real-name single-cost
+                                    double-inst double-real-complex-name double-complex-real-name double-cost)
+               `(progn
+                  (frob ,op ,single-cost ,commutativep
+                        ,(and duplicatep
+                              `(progn
+                                 (move dup real)
+                                 (inst unpcklps dup dup)))
+                        ,single-inst
+                        single-reg single-float complex-single-reg complex-single-float
+                        ,single-real-complex-name ,single-complex-real-name)
+                  (frob ,op ,double-cost ,commutativep
+                        ,(and duplicatep
+                              `(progn
+                                 (move dup real)
+                                 (inst unpcklpd dup dup)))
+                        ,double-inst
+                        double-reg double-float complex-double-reg complex-double-float
+                        ,double-real-complex-name ,double-complex-real-name))))
+  (def-real-complex-op + t nil
+    addps +/real-complex-single-float +/complex-real-single-float 3
+    addpd +/real-complex-double-float +/complex-real-double-float 4)
+  (def-real-complex-op - nil nil
+    subps -/real-complex-single-float -/complex-real-single-float 3
+    subpd -/real-complex-double-float -/complex-real-double-float 4)
+  (def-real-complex-op * t t
+    mulps */real-complex-single-float */complex-real-single-float 4
+    mulpd */real-complex-double-float */complex-real-double-float 5)
+  (def-real-complex-op / nil t
+    nil nil nil nil
+    divpd nil //complex-real-double-float 19))
+
+(define-vop (//complex-real-single-float float-op)
+  (:translate /)
+  (:args (x :scs (complex-single-reg)
+            :to (:result 0)
+            :target r)
+         (y :scs (single-reg) :target dup))
+  (:arg-types complex-single-float single-float)
+  (:temporary (:sc complex-single-reg :from (:argument 1)) dup)
+  (:results (r :scs (complex-single-reg)))
+  (:result-types complex-single-float)
+  (:generator 12
+    (move dup y)
+    (inst shufps dup dup #b00000000)
+    (move r x)
+    (inst unpcklpd r r)
+    (inst divps r dup)
+    (inst movq r r)))
+
+;; Complex multiplication
+;; r := rx * ry - ix * iy
+;; i := rx * iy + ix * ry
+;;
+;; Transpose for SIMDness
+;;  rx*ry    rx*iy
+;; -ix*iy   +ix*ry
+;;
+;;  [rx rx] * [ry iy]
+;;+ [ix ix] * [-iy ry]
+;;       [r i]
+
+(macrolet ((define-complex-* (name cost type sc &body body)
+               `(define-vop (,name float-op)
+                  (:translate *)
+                  (:args (x :scs (,sc) :target r)
+                         (y :scs (,sc) :target copy-y))
+                  (:arg-types ,type ,type)
+                  (:temporary (:sc any-reg) hex8)
+                  (:temporary (:sc ,sc) imag)
+                  (:temporary (:sc ,sc :from :eval) copy-y)
+                  (:temporary (:sc ,sc) xmm)
+                  (:results (r :scs (,sc) :from :eval))
+                  (:result-types ,type)
+                  (:generator ,cost
+                    (when (or (location= x copy-y)
+                              (location= y r))
+                      (rotatef x y))
+                    ,@body))))
+  (define-complex-* */complex-single-float 20 complex-single-float complex-single-reg
+    (inst xorps xmm xmm)
+    (move r x)
+    (inst unpcklps r r)
+    (move imag r)
+    (inst unpckhpd imag xmm)
+    (inst unpcklpd r    xmm)
+    (move copy-y y)  ; y == r only if y == x == r
+    (setf y copy-y)
+
+    (inst lea hex8 (make-ea :qword :disp 1))
+    (inst rol hex8 31)
+    (inst movd xmm hex8)
+
+    (inst mulps r y)
+
+    (inst shufps y y #b11110001)
+    (inst xorps y xmm)
+
+    (inst mulps imag y)
+    (inst addps r imag))
+  (define-complex-* */complex-double-float 25 complex-double-float complex-double-reg
+    (move imag x)
+    (move r x)
+    (move copy-y y)
+    (setf y copy-y)
+    (inst unpcklpd r r)
+    (inst unpckhpd imag imag)
+    (inst lea hex8 (make-ea :qword :disp 1))
+    (inst ror hex8 1)               ; #x8000000000000000
+    (inst movd xmm hex8)
+
+    (inst mulpd r y)
+
+    (inst shufpd y y #b01)
+    (inst xorpd y xmm)
+
+    (inst mulpd imag y)
+    (inst addpd r imag)))
+
 (define-vop (fsqrt)
   (:args (x :scs (double-reg)))
   (:results (y :scs (double-reg)))
@@ -474,19 +652,41 @@
                             ;; we should be able to do this better.  what we
                             ;; really would like to do is use the target as the
                             ;; temp whenever it's not also the source
-                            (unless (location= x y)
-                              (inst movq y x))
+                            (move y x)
                             ,@body))))
   (frob (%negate/double-float %negate double-reg double-float)
         (inst lea hex8 (make-ea :qword :disp 1))
         (inst ror hex8 1)               ; #x8000000000000000
         (inst movd xmm hex8)
         (inst xorpd y xmm))
+  (frob (%negate/complex-double-float %negate complex-double-reg complex-double-float)
+        (inst lea hex8 (make-ea :qword :disp 1))
+        (inst ror hex8 1)               ; #x8000000000000000
+        (inst movd xmm hex8)
+        (inst unpcklpd xmm xmm)
+        (inst xorpd y xmm))
+  (frob (conjugate/complex-double-float conjugate complex-double-reg complex-double-float)
+        (inst lea hex8 (make-ea :qword :disp 1))
+        (inst ror hex8 1)               ; #x8000000000000000
+        (inst movd xmm hex8)
+        (inst shufpd xmm xmm #b01)
+        (inst xorpd y xmm))
   (frob (%negate/single-float %negate single-reg single-float)
         (inst lea hex8 (make-ea :qword :disp 1))
         (inst rol hex8 31)
         (inst movd xmm hex8)
         (inst xorps y xmm))
+  (frob (%negate/complex-single-float %negate complex-single-reg complex-single-float)
+        (inst lea hex8 (make-ea :qword :disp 1))
+        (inst rol hex8 31)
+        (inst movd xmm hex8)
+        (inst unpcklps xmm xmm)
+        (inst xorps y xmm))
+  (frob (conjugate/complex-single-float conjugate complex-single-reg complex-single-float)
+        (inst lea hex8 (make-ea :qword :disp 1))
+        (inst ror hex8 1)               ; #x8000000000000000
+        (inst movd xmm hex8)
+        (inst xorpd y xmm))
   (frob (abs/double-float abs  double-reg double-float)
         (inst mov hex8 -1)
         (inst shr hex8 1)
@@ -497,6 +697,7 @@
         (inst shr hex8 33)
         (inst movd xmm hex8)
         (inst andps y xmm)))
+
 
 ;;;; comparison
 
@@ -506,6 +707,32 @@
   (:save-p :compute-only)
   (:note "inline float comparison"))
 
+;;; EQL
+(macrolet ((define-float-eql (name cost sc type)
+               `(define-vop (,name float-compare)
+                  (:translate eql)
+                  (:args (x :scs (,sc) :target mask)
+                         (y :scs (,sc) :target mask))
+                  (:arg-types ,type ,type)
+                  (:temporary (:sc ,sc :from :eval) mask)
+                  (:temporary (:sc any-reg) bits)
+                  (:conditional :e)
+                  (:generator ,cost
+                    (when (location= y mask)
+                      (rotatef x y))
+                    (move mask x)
+                    (inst pcmpeqd mask y)
+                    (inst movmskps bits mask)
+                    (inst cmp bits #b1111)))))
+  (define-float-eql eql/single-float 4
+    single-reg single-float)
+  (define-float-eql eql/double-float 4
+    double-reg double-float)
+  (define-float-eql eql/complex-double-float 5
+    complex-double-reg complex-double-float)
+  (define-float-eql eql/complex-single-float 5
+    complex-single-reg complex-single-float))
+
 ;;; comiss and comisd can cope with one or other arg in memory: we
 ;;; could (should, indeed) extend these to cope with descriptor args
 ;;; and stack args
@@ -518,7 +745,7 @@
   (:arg-types double-float double-float))
 
 (define-vop (=/single-float single-float-compare)
-    (:translate =)
+  (:translate =)
   (:info)
   (:conditional not :p :ne)
   (:vop-var vop)
@@ -530,7 +757,7 @@
     ))
 
 (define-vop (=/double-float double-float-compare)
-    (:translate =)
+  (:translate =)
   (:info)
   (:conditional not :p :ne)
   (:vop-var vop)
@@ -538,6 +765,42 @@
     (note-this-location vop :internal-error)
     (inst comisd x y)))
 
+(macrolet ((define-complex-float-= (complex-complex-name complex-real-name real-complex-name
+                                    real-sc real-type complex-sc complex-type
+                                    cmp-inst mask-inst mask)
+               `(progn
+                  (define-vop (,complex-complex-name float-compare)
+                    (:translate =)
+                    (:args (x :scs (,complex-sc) :target cmp)
+                           (y :scs (,complex-sc) :target cmp))
+                    (:arg-types ,complex-type ,complex-type)
+                    (:temporary (:sc ,complex-sc :from :eval) cmp)
+                    (:temporary (:sc unsigned-reg) bits)
+                    (:info)
+                    (:conditional :e)
+                    (:generator 3
+                      (when (location= y cmp)
+                        (rotatef x y))
+                      (move cmp x)
+                      (note-this-location vop :internal-error)
+                      (inst ,cmp-inst :eq cmp y)
+                      (inst ,mask-inst bits cmp)
+                      (inst cmp bits ,mask)))
+                  (define-vop (,complex-real-name ,complex-complex-name)
+                    (:args (x :scs (,complex-sc) :target cmp)
+                           (y :scs (,real-sc)    :target cmp))
+                    (:arg-types ,complex-type ,real-type))
+                  (define-vop (,real-complex-name ,complex-complex-name)
+                    (:args (x :scs (,real-sc)    :target cmp)
+                           (y :scs (,complex-sc) :target cmp))
+                    (:arg-types ,real-type ,complex-type)))))
+  (define-complex-float-= =/complex-single-float =/complex-real-single-float =/real-complex-single-float
+      single-reg single-float complex-single-reg complex-single-float
+    cmpps movmskps #b1111)
+  (define-complex-float-= =/complex-double-float =/complex-real-double-float =/real-complex-double-float
+      double-reg double-float complex-double-reg complex-double-float
+    cmppd movmskpd #b11))
+
 (define-vop (<double-float double-float-compare)
   (:translate <)
   (:info)
@@ -766,69 +1029,72 @@
 
 (define-vop (make-complex-single-float)
   (:translate complex)
-  (:args (real :scs (single-reg) :to :result :target r
-               :load-if (not (location= real r)))
-         (imag :scs (single-reg) :to :save))
+  (:args (real :scs (single-reg fp-single-zero)
+               :target r
+               :load-if (not (sc-is real fp-single-zero)))
+         (imag :scs (single-reg fp-single-zero)
+               :load-if (not (sc-is imag fp-single-zero))))
   (:arg-types single-float single-float)
-  (:results (r :scs (complex-single-reg) :from (:argument 0)
-               :load-if (not (sc-is r complex-single-stack))))
+  (:results (r :scs (complex-single-reg) :from (:argument 0)))
   (:result-types complex-single-float)
   (:note "inline complex single-float creation")
   (:policy :fast-safe)
   (:generator 5
-    (sc-case r
-      (complex-single-reg
-       (let ((r-real (complex-single-reg-real-tn r)))
-         (unless (location= real r-real)
-           (inst movss r-real real)))
-       (let ((r-imag (complex-single-reg-imag-tn r)))
-         (unless (location= imag r-imag)
-           (inst movss r-imag imag))))
-      (complex-single-stack
-       (unless (location= real r)
-         (inst movss (ea-for-csf-real-stack r) real))
-       (inst movss (ea-for-csf-imag-stack r) imag)))))
+    (cond ((sc-is real fp-single-zero)
+           (inst xorps r r)
+           (unless (sc-is imag fp-single-zero)
+             (inst unpcklps r imag)))
+          ((location= real imag)
+           (move r real)
+           (inst unpcklps r r))
+          (t
+           (move r real)
+           (unless (sc-is imag fp-single-zero)
+             (inst unpcklps r imag))))))
 
 (define-vop (make-complex-double-float)
   (:translate complex)
-  (:args (real :scs (double-reg) :target r
-               :load-if (not (location= real r)))
-         (imag :scs (double-reg) :to :save))
+  (:args (real :scs (double-reg fp-double-zero)
+               :target r
+               :load-if (not (sc-is real fp-double-zero)))
+         (imag :scs (double-reg fp-double-zero)
+               :load-if (not (sc-is imag fp-double-zero))))
   (:arg-types double-float double-float)
-  (:results (r :scs (complex-double-reg) :from (:argument 0)
-               :load-if (not (sc-is r complex-double-stack))))
+  (:results (r :scs (complex-double-reg) :from (:argument 0)))
   (:result-types complex-double-float)
   (:note "inline complex double-float creation")
   (:policy :fast-safe)
   (:generator 5
-    (sc-case r
-      (complex-double-reg
-       (let ((r-real (complex-double-reg-real-tn r)))
-         (unless (location= real r-real)
-           (inst movsd r-real real)))
-       (let ((r-imag (complex-double-reg-imag-tn r)))
-         (unless (location= imag r-imag)
-           (inst movsd r-imag imag))))
-      (complex-double-stack
-       (unless (location= real r)
-         (inst movsd (ea-for-cdf-real-stack r) real))
-       (inst movsd (ea-for-cdf-imag-stack r) imag)))))
+    (cond ((sc-is real fp-double-zero)
+           (inst xorpd r r)
+           (unless (sc-is imag fp-double-zero)
+             (inst unpcklpd r imag)))
+          ((location= real imag)
+           (move r real)
+           (inst unpcklpd r r))
+          (t
+           (move r real)
+           (unless (sc-is imag fp-double-zero)
+             (inst unpcklpd r imag))))))
 
 (define-vop (complex-float-value)
   (:args (x :target r))
+  (:temporary (:sc complex-double-reg) zero)
   (:results (r))
   (:variant-vars offset)
   (:policy :fast-safe)
   (:generator 3
-    (cond ((sc-is x complex-single-reg complex-double-reg)
-           (let ((value-tn
-                  (make-random-tn :kind :normal
-                                  :sc (sc-or-lose 'double-reg)
-                                  :offset (+ offset (tn-offset x)))))
-             (unless (location= value-tn r)
-               (if (sc-is x complex-single-reg)
-                   (inst movss r value-tn)
-                   (inst movsd r value-tn)))))
+    (cond ((sc-is x complex-double-reg)
+           (move r x)
+           (inst xorpd zero zero)
+           (ecase offset
+             (0 (inst unpcklpd r zero))
+             (1 (inst unpckhpd r zero))))
+          ((sc-is x complex-single-reg)
+           (move r x)
+           (ecase offset
+             (0 (inst shufps r r #b11111100))
+             (1 (inst shufps r r #b11111101))))
           ((sc-is r single-reg)
            (let ((ea (sc-case x
                        (complex-single-stack
@@ -915,3 +1181,30 @@
   (:note "inline dummy FP register bias")
   (:ignore x)
   (:generator 0))
+
+(defknown swap-complex ((complex float)) (complex float)
+    (foldable flushable movable always-translatable))
+(defoptimizer (swap-complex derive-type) ((x))
+  (sb!c::lvar-type x))
+(defun swap-complex (x)
+  (complex (imagpart x) (realpart x)))
+(define-vop (swap-complex-single-float)
+  (:translate swap-complex)
+  (:policy :fast-safe)
+  (:args (x :scs (complex-single-reg) :target r))
+  (:arg-types complex-single-float)
+  (:results (r :scs (complex-single-reg)))
+  (:result-types complex-single-float)
+  (:generator 2
+     (move r x)
+     (inst shufps r r #b11110001)))
+(define-vop (swap-complex-double-float)
+  (:translate swap-complex)
+  (:policy :fast-safe)
+  (:args (x :scs (complex-double-reg) :target r))
+  (:arg-types complex-double-float)
+  (:results (r :scs (complex-double-reg)))
+  (:result-types complex-double-float)
+  (:generator 2
+     (move r x)
+     (inst shufpd r r #b01)))
diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp
index 927ce8d..1f3cc0b 100644
--- a/src/compiler/x86-64/insts.lisp
+++ b/src/compiler/x86-64/insts.lisp
@@ -1681,6 +1681,8 @@
         :float)
        (#.*double-sc-names*
         :double)
+       (#.*complex-sc-names*
+        :complex)
        (t
         (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing))))))
     (ea
@@ -3150,6 +3152,10 @@
   (define-regular-sse-inst subps    nil  #x5c)
   (define-regular-sse-inst subsd    #xf2 #x5c)
   (define-regular-sse-inst subss    #xf3 #x5c)
+  (define-regular-sse-inst unpckhpd #x66 #x15)
+  (define-regular-sse-inst unpckhps nil  #x15)
+  (define-regular-sse-inst unpcklpd #x66 #x14)
+  (define-regular-sse-inst unpcklps nil  #x14)
   ;; integer arithmetic
   (define-regular-sse-inst paddb    #x66 #xfc)
   (define-regular-sse-inst paddw    #x66 #xfd)
diff --git a/src/compiler/x86-64/macros.lisp b/src/compiler/x86-64/macros.lisp
index a80b73e..018b43c 100644
--- a/src/compiler/x86-64/macros.lisp
+++ b/src/compiler/x86-64/macros.lisp
@@ -20,10 +20,12 @@
               (n-src src))
     `(unless (location= ,n-dst ,n-src)
        (sc-case ,n-dst
-         (single-reg
-          (inst movss ,n-dst ,n-src))
-         (double-reg
-          (inst movsd ,n-dst ,n-src))
+         ((single-reg complex-single-reg)
+          (aver (xmm-register-p ,n-src))
+          (inst movaps ,n-dst ,n-src))
+         ((double-reg complex-double-reg)
+          (aver (xmm-register-p ,n-src))
+          (inst movapd ,n-dst ,n-src))
          (t
           (inst mov ,n-dst ,n-src))))))
 
diff --git a/src/compiler/x86-64/vm.lisp b/src/compiler/x86-64/vm.lisp
index c946adb..78c2df3 100644
--- a/src/compiler/x86-64/vm.lisp
+++ b/src/compiler/x86-64/vm.lisp
@@ -217,6 +217,8 @@
 
   (fp-single-zero immediate-constant)
   (fp-double-zero immediate-constant)
+  (fp-complex-single-zero immediate-constant)
+  (fp-complex-double-zero immediate-constant)
 
   (immediate immediate-constant)
 
@@ -235,7 +237,7 @@
   (sap-stack stack)                     ; System area pointers.
   (single-stack stack)                  ; single-floats
   (double-stack stack)
-  (complex-single-stack stack :element-size 2)  ; complex-single-floats
+  (complex-single-stack stack)  ; complex-single-floats
   (complex-double-stack stack :element-size 2)  ; complex-double-floats
 
 
@@ -325,29 +327,27 @@
 
   ;; non-descriptor SINGLE-FLOATs
   (single-reg float-registers
-              :locations #.(loop for i from 0 below 15 collect i)
+              :locations #.*float-regs*
               :constant-scs (fp-single-zero)
               :save-p t
               :alternate-scs (single-stack))
 
   ;; non-descriptor DOUBLE-FLOATs
   (double-reg float-registers
-              :locations #.(loop for i from 0 below 15 collect i)
+              :locations #.*float-regs*
               :constant-scs (fp-double-zero)
               :save-p t
               :alternate-scs (double-stack))
 
   (complex-single-reg float-registers
-                      :locations #.(loop for i from 0 to 14 by 2 collect i)
-                      :element-size 2
-                      :constant-scs ()
+                      :locations #.*float-regs*
+                      :constant-scs (fp-complex-single-zero)
                       :save-p t
                       :alternate-scs (complex-single-stack))
 
   (complex-double-reg float-registers
-                      :locations #.(loop for i from 0 to 14 by 2 collect i)
-                      :element-size 2
-                      :constant-scs ()
+                      :locations #.*float-regs*
+                      :constant-scs (fp-complex-double-zero)
                       :save-p t
                       :alternate-scs (complex-double-stack))
 
@@ -369,6 +369,8 @@
 ;;; These are used to (at least) determine operand size.
 (defparameter *float-sc-names* '(single-reg))
 (defparameter *double-sc-names* '(double-reg double-stack))
+(defparameter *complex-sc-names* '(complex-single-reg complex-single-stack
+                                   complex-double-reg complex-double-stack))
 ) ; EVAL-WHEN
 
 ;;;; miscellaneous TNs for the various registers
@@ -413,16 +415,6 @@
   (make-random-tn :kind :normal :sc (sc-or-lose 'unsigned-reg )
                   :offset r12-offset))
 
-(defparameter fp-single-zero-tn
-  (make-random-tn :kind :normal
-                  :sc (sc-or-lose 'single-reg)
-                  :offset 15))
-
-(defparameter fp-double-zero-tn
-  (make-random-tn :kind :normal
-                  :sc (sc-or-lose 'double-reg)
-                  :offset 15))
-
 ;;; If value can be represented as an immediate constant, then return
 ;;; the appropriate SC number, otherwise return NIL.
 (!def-vm-support-routine immediate-constant-sc (value)
@@ -440,6 +432,14 @@
     (double-float
      (if (eql value 0d0)
          (sc-number-or-lose 'fp-double-zero )
+         nil))
+    ((complex single-float)
+     (if (eql value (complex 0f0 0f0))
+         (sc-number-or-lose 'fp-complex-single-zero)
+         nil))
+    ((complex double-float)
+     (if (eql value (complex 0d0 0d0))
+         (sc-number-or-lose 'fp-complex-double-zero)
          nil))))
 
 
diff --git a/src/runtime/print.c b/src/runtime/print.c
index 833ec91..6f3b505 100644
--- a/src/runtime/print.c
+++ b/src/runtime/print.c
@@ -549,9 +549,17 @@ static void print_otherptr(lispobj obj)
 #ifdef COMPLEX_SINGLE_FLOAT_WIDETAG
             case COMPLEX_SINGLE_FLOAT_WIDETAG:
                 NEWLINE_OR_RETURN;
+#ifdef LISP_FEATURE_X86_64
+                printf("%g", ((struct complex_single_float *)native_pointer(obj))->data.data[0]);
+#else
                 printf("%g", ((struct complex_single_float *)native_pointer(obj))->real);
+#endif
                 NEWLINE_OR_RETURN;
+#ifdef LISP_FEATURE_X86_64
+                printf("%g", ((struct complex_single_float *)native_pointer(obj))->data.data[1]);
+#else
                 printf("%g", ((struct complex_single_float *)native_pointer(obj))->imag);
+#endif
                 break;
 #endif
 
diff --git a/tests/arith.pure.lisp b/tests/arith.pure.lisp
index f98ba64..aa39251 100644
--- a/tests/arith.pure.lisp
+++ b/tests/arith.pure.lisp
@@ -294,4 +294,3 @@
 ;; 1.0 had a broken ATANH on win32
 (with-test (:name :atanh)
   (assert (= (atanh 0.9d0) 1.4722194895832204d0)))
-
diff --git a/tests/float.pure.lisp b/tests/float.pure.lisp
index bea74a8..08513dd 100644
--- a/tests/float.pure.lisp
+++ b/tests/float.pure.lisp
@@ -233,3 +233,82 @@
     (assert (eql 0.0d0 (funcall f 123.0d0 0.0)))
     (assert (eql 0.0d0 (funcall f 123.0d0 0.0d0)))
     (assert (eql 0.0d0 (funcall f 123.0 0.0d0)))))
+
+
+;; 1.0.29.xFIXMEx introduces a ton of changes for complex floats
+;; on x86-64. Huge test of doom to help catch weird corner
+;; cases.
+(with-test (:name :complex-floats)
+  (labels ((equal-enough (x y)
+             (cond ((eql x y))
+                   ((or (complexp x)
+                        (complexp y))
+                    (or (eql (coerce x '(complex double-float))
+                             (coerce y '(complex double-float)))
+                        (and (equal-enough (realpart x) (realpart y))
+                             (equal-enough (imagpart x) (imagpart y)))))
+                   ((numberp x)
+                    (or (eql (coerce x 'double-float) (coerce y 'double-float))
+                        (< (abs (- x y))  1d-5)))))
+           (reflections (x)
+             (values x
+                     (conjugate x)
+                     (complex (- (realpart x)) (imagpart x))
+                     (- x)))
+           (compute (x y r)
+             (list (+ x y) (+ r x) (+ x r)
+                   (- x y) (- r x) (- x r)
+                   (* x y) (* x r) (* r x)
+                   (unless (zerop y)
+                     (/ x y))
+                   (unless (zerop r)
+                     (/ x r))
+                   (unless (zerop x)
+                     (/ r x))
+                   (conjugate x) (conjugate r)
+                   (- x)
+                   (complex r) (complex r r) (complex 0 r)
+                   (= x y) (= r x) (= y r) (= x (complex 0 r))
+                   (eql x y) (eql x (complex r)) (eql y (complex r))
+                   (eql x (complex r r)) (eql y (complex 0 r))))
+           (compute-all (x y r)
+             (multiple-value-bind (x1 x2 x3 x4) (reflections x)
+               (multiple-value-bind (y1 y2 y3 y4) (reflections y)
+                 #.(let ((form '(list)))
+                     (dolist (x '(x1 x2 x3 x4) (reverse form))
+                       (dolist (y '(y1 y2 y3 y4))
+                         (push `(list ,x ,y r
+                                      (append (compute ,x ,y r)
+                                              (compute ,x ,y (- r))))
+                               form))))))))
+    (declare (inline reflections compute compute-all))
+    (let* ((reals     '(0 1 2))
+           (complexes '#.(let ((reals '(0 1 2))
+                               (cpx   '()))
+                           (dolist (x reals (nreverse cpx))
+                             (dolist (y reals)
+                               (push (complex x y) cpx)))))
+           (val       ()))
+      (declare (notinline every))
+      (dolist (r reals (nreverse val))
+        (dolist (x complexes)
+          (dolist (y complexes)
+            (let ((value  (compute-all x y r))
+                  (single (compute-all (coerce x '(complex single-float))
+                                       (coerce y '(complex single-float))
+                                       (coerce r 'single-float)))
+                  (double (compute-all (coerce x '(complex double-float))
+                                       (coerce y '(complex double-float))
+                                       (coerce r 'double-float))))
+              (assert (every (lambda (pos ref single double)
+                               (every (lambda (ref single double)
+                                        (or (and (equal-enough ref single)
+                                                 (equal-enough ref double))
+                                            (and (not (numberp single)) ;; -ve 0s
+                                                 (equal-enough single double))))
+                                      (fourth ref) (fourth single) (fourth double)))
+                             '((0 0) (0 1) (0 2) (0 3)
+                               (1 0) (1 1) (1 2) (1 3)
+                               (2 0) (2 1) (2 2) (2 3)
+                               (3 0) (3 1) (3 2) (3 3))
+                             value single double)))))))))
-- 
1.7.10.4