src/compiler/x86-64/float.lisp

   1 ;;;; floating point support for the x86
   2
   3 ;;;; This software is part of the SBCL system. See the README file for
   4 ;;;; more information.
   5 ;;;;
   6 ;;;; This software is derived from the CMU CL system, which was
   7 ;;;; written at Carnegie Mellon University and released into the
   8 ;;;; public domain. The software is in the public domain and is
   9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
  10 ;;;; files for more information.
  11
  12 (in-package "SB!VM")
  13 \f
  14 (macrolet ((ea-for-xf-desc (tn slot)
  15              `(make-ea
  16                :qword :base ,tn
  17                :disp (- (* ,slot n-word-bytes)
  18                         other-pointer-lowtag))))
  19   (defun ea-for-df-desc (tn)
  20     (ea-for-xf-desc tn double-float-value-slot))
  21   ;; complex floats
  22   (defun ea-for-csf-data-desc (tn)
  23     (ea-for-xf-desc tn complex-single-float-data-slot))
  24   (defun ea-for-csf-real-desc (tn)
  25     (ea-for-xf-desc tn complex-single-float-data-slot))
  26   (defun ea-for-csf-imag-desc (tn)
  27     (ea-for-xf-desc tn (+ complex-single-float-data-slot 1/2)))
  28
  29   (defun ea-for-cdf-data-desc (tn)
  30     (ea-for-xf-desc tn complex-double-float-real-slot))
  31   (defun ea-for-cdf-real-desc (tn)
  32     (ea-for-xf-desc tn complex-double-float-real-slot))
  33   (defun ea-for-cdf-imag-desc (tn)
  34     (ea-for-xf-desc tn complex-double-float-imag-slot)))
  35
  36 (macrolet ((ea-for-xf-stack (tn kind)
  37              (declare (ignore kind))
  38              `(make-ea
  39                :qword :base rbp-tn
  40                :disp (frame-byte-offset (tn-offset ,tn)))))
  41   (defun ea-for-sf-stack (tn)
  42     (ea-for-xf-stack tn :single))
  43   (defun ea-for-df-stack (tn)
  44     (ea-for-xf-stack tn :double)))
  45
  46 ;;; complex float stack EAs
  47 (macrolet ((ea-for-cxf-stack (tn kind slot &optional base)
  48              `(make-ea
  49                :qword :base ,base
  50                :disp (frame-byte-offset
  51                       (+ (tn-offset ,tn)
  52                        (cond ((= (tn-offset ,base) rsp-offset)
  53                               sp->fp-offset)
  54                              ((= (tn-offset ,base) rbp-offset)
  55                               0)
  56                              (t (error "Unexpected offset.")))
  57                        (ecase ,kind
  58                          (:single
  59                             (ecase ,slot
  60                               (:real 0)
  61                               (:imag -1/2)))
  62                          (:double
  63                             (ecase ,slot
  64                               (:real 1)
  65                               (:imag 0)))))))))
  66   (defun ea-for-csf-data-stack (tn &optional (base rbp-tn))
  67     (ea-for-cxf-stack tn :single :real base))
  68   (defun ea-for-csf-real-stack (tn &optional (base rbp-tn))
  69     (ea-for-cxf-stack tn :single :real base))
  70   (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn))
  71     (ea-for-cxf-stack tn :single :imag base))
  72
  73   (defun ea-for-cdf-data-stack (tn &optional (base rbp-tn))
  74     (ea-for-cxf-stack tn :double :real base))
  75   (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn))
  76     (ea-for-cxf-stack tn :double :real base))
  77   (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn))
  78     (ea-for-cxf-stack tn :double :imag base)))
  79 \f
  80 ;;;; move functions
  81
  82 ;;; X is source, Y is destination.
  83
  84 (define-move-fun (load-fp-zero 1) (vop x y)
  85   ((fp-single-zero) (single-reg)
  86    (fp-double-zero) (double-reg)
  87    (fp-complex-single-zero) (complex-single-reg)
  88    (fp-complex-double-zero) (complex-double-reg))
  89   (identity x)
  90   (sc-case y
  91     ((single-reg complex-single-reg) (inst xorps y y))
  92     ((double-reg complex-double-reg) (inst xorpd y y))))
  93
  94 (define-move-fun (load-single 2) (vop x y)
  95   ((single-stack) (single-reg))
  96   (inst movss y (ea-for-sf-stack x)))
  97
  98 (define-move-fun (store-single 2) (vop x y)
  99   ((single-reg) (single-stack))
 100   (inst movss (ea-for-sf-stack y) x))
 101
 102 (define-move-fun (load-double 2) (vop x y)
 103   ((double-stack) (double-reg))
 104   (inst movsd y (ea-for-df-stack x)))
 105
 106 (define-move-fun (store-double 2) (vop x y)
 107   ((double-reg) (double-stack))
 108   (inst movsd  (ea-for-df-stack y) x))
 109
 110 (eval-when (:compile-toplevel :execute)
 111   (setf *read-default-float-format* 'single-float))
 112 \f
 113 ;;;; complex float move functions
 114
 115 ;;; X is source, Y is destination.
 116 (define-move-fun (load-complex-single 2) (vop x y)
 117   ((complex-single-stack) (complex-single-reg))
 118   (inst movq y (ea-for-csf-data-stack x)))
 119
 120 (define-move-fun (store-complex-single 2) (vop x y)
 121   ((complex-single-reg) (complex-single-stack))
 122   (inst movq (ea-for-csf-data-stack y) x))
 123
 124 (define-move-fun (load-complex-double 2) (vop x y)
 125   ((complex-double-stack) (complex-double-reg))
 126   (inst movupd y (ea-for-cdf-data-stack x)))
 127
 128 (define-move-fun (store-complex-double 2) (vop x y)
 129   ((complex-double-reg) (complex-double-stack))
 130   (inst movupd (ea-for-cdf-data-stack y) x))
 131 \f
 132 ;;;; move VOPs
 133
 134 ;;; float register to register moves
 135 (macrolet ((frob (vop sc)
 136              `(progn
 137                 (define-vop (,vop)
 138                   (:args (x :scs (,sc)
 139                             :target y
 140                             :load-if (not (location= x y))))
 141                   (:results (y :scs (,sc)
 142                                :load-if (not (location= x y))))
 143                   (:note "float move")
 144                   (:generator 0
 145                     (move y x)))
 146                 (define-move-vop ,vop :move (,sc) (,sc)))))
 147   (frob single-move single-reg)
 148   (frob double-move double-reg)
 149   (frob complex-single-move complex-single-reg)
 150   (frob complex-double-move complex-double-reg))
 151
 152 \f
 153 ;;; Move from float to a descriptor reg. allocating a new float
 154 ;;; object in the process.
 155 (define-vop (move-from-single)
 156   (:args (x :scs (single-reg) :to :save))
 157   (:results (y :scs (descriptor-reg)))
 158   (:note "float to pointer coercion")
 159   (:generator 4
 160     (inst movd y x)
 161     (inst shl y 32)
 162     (inst or y single-float-widetag)))
 163
 164 (define-move-vop move-from-single :move
 165   (single-reg) (descriptor-reg))
 166
 167 (define-vop (move-from-double)
 168   (:args (x :scs (double-reg) :to :save))
 169   (:results (y :scs (descriptor-reg)))
 170   (:node-var node)
 171   (:note "float to pointer coercion")
 172   (:generator 13
 173      (with-fixed-allocation (y
 174                              double-float-widetag
 175                              double-float-size
 176                              node)
 177        (inst movsd (ea-for-df-desc y) x))))
 178 (define-move-vop move-from-double :move
 179   (double-reg) (descriptor-reg))
 180
 181 ;;; Move from a descriptor to a float register.
 182 (define-vop (move-to-single)
 183   (:args (x :scs (descriptor-reg) :target tmp))
 184   (:temporary (:sc unsigned-reg) tmp)
 185   (:results (y :scs (single-reg)))
 186   (:note "pointer to float coercion")
 187   (:generator 2
 188     (move tmp x)
 189     (inst shr tmp 32)
 190     (inst movd y tmp)))
 191
 192 (define-move-vop move-to-single :move (descriptor-reg) (single-reg))
 193
 194 (define-vop (move-to-double)
 195   (:args (x :scs (descriptor-reg)))
 196   (:results (y :scs (double-reg)))
 197   (:note "pointer to float coercion")
 198   (:generator 2
 199     (inst movsd y (ea-for-df-desc x))))
 200 (define-move-vop move-to-double :move (descriptor-reg) (double-reg))
 201
 202 \f
 203 ;;; Move from complex float to a descriptor reg. allocating a new
 204 ;;; complex float object in the process.
 205 (define-vop (move-from-complex-single)
 206   (:args (x :scs (complex-single-reg) :to :save))
 207   (:results (y :scs (descriptor-reg)))
 208   (:node-var node)
 209   (:note "complex float to pointer coercion")
 210   (:generator 13
 211      (with-fixed-allocation (y
 212                              complex-single-float-widetag
 213                              complex-single-float-size
 214                              node)
 215        (inst movq (ea-for-csf-data-desc y) x))))
 216 (define-move-vop move-from-complex-single :move
 217   (complex-single-reg) (descriptor-reg))
 218
 219 (define-vop (move-from-complex-double)
 220   (:args (x :scs (complex-double-reg) :to :save))
 221   (:results (y :scs (descriptor-reg)))
 222   (:node-var node)
 223   (:note "complex float to pointer coercion")
 224   (:generator 13
 225      (with-fixed-allocation (y
 226                              complex-double-float-widetag
 227                              complex-double-float-size
 228                              node)
 229        (inst movapd (ea-for-cdf-data-desc y) x))))
 230 (define-move-vop move-from-complex-double :move
 231   (complex-double-reg) (descriptor-reg))
 232
 233 ;;; Move from a descriptor to a complex float register.
 234 (macrolet ((frob (name sc format)
 235              `(progn
 236                 (define-vop (,name)
 237                   (:args (x :scs (descriptor-reg)))
 238                   (:results (y :scs (,sc)))
 239                   (:note "pointer to complex float coercion")
 240                   (:generator 2
 241                     ,(ecase format
 242                       (:single
 243                          '(inst movq y (ea-for-csf-data-desc x)))
 244                       (:double
 245                          '(inst movapd y (ea-for-cdf-data-desc x))))))
 246                 (define-move-vop ,name :move (descriptor-reg) (,sc)))))
 247   (frob move-to-complex-single complex-single-reg :single)
 248   (frob move-to-complex-double complex-double-reg :double))
 249 \f
 250 ;;;; the move argument vops
 251 ;;;;
 252 ;;;; Note these are also used to stuff fp numbers onto the c-call
 253 ;;;; stack so the order is different than the lisp-stack.
 254
 255 ;;; the general MOVE-ARG VOP
 256 (macrolet ((frob (name sc stack-sc format)
 257              `(progn
 258                 (define-vop (,name)
 259                   (:args (x :scs (,sc) :target y)
 260                          (fp :scs (any-reg)
 261                              :load-if (not (sc-is y ,sc))))
 262                   (:results (y))
 263                   (:note "float argument move")
 264                   (:generator ,(case format (:single 2) (:double 3) )
 265                     (sc-case y
 266                       (,sc
 267                        (move y x))
 268                       (,stack-sc
 269                        (if (= (tn-offset fp) esp-offset)
 270                            (let* ((offset (* (tn-offset y) n-word-bytes))
 271                                   (ea (make-ea :dword :base fp :disp offset)))
 272                              ,@(ecase format
 273                                       (:single '((inst movss ea x)))
 274                                       (:double '((inst movsd ea x)))))
 275                            (let ((ea (make-ea
 276                                       :dword :base fp
 277                                       :disp (frame-byte-offset (tn-offset y)))))
 278                              ,@(ecase format
 279                                  (:single '((inst movss ea x)))
 280                                  (:double '((inst movsd ea x))))))))))
 281                 (define-move-vop ,name :move-arg
 282                   (,sc descriptor-reg) (,sc)))))
 283   (frob move-single-float-arg single-reg single-stack :single)
 284   (frob move-double-float-arg double-reg double-stack :double))
 285
 286 ;;;; complex float MOVE-ARG VOP
 287 (macrolet ((frob (name sc stack-sc format)
 288              `(progn
 289                 (define-vop (,name)
 290                   (:args (x :scs (,sc) :target y)
 291                          (fp :scs (any-reg)
 292                              :load-if (not (sc-is y ,sc))))
 293                   (:results (y))
 294                   (:note "complex float argument move")
 295                   (:generator ,(ecase format (:single 2) (:double 3))
 296                     (sc-case y
 297                       (,sc
 298                        (move y x))
 299                       (,stack-sc
 300                        ,(ecase format
 301                           (:single
 302                              '(inst movq (ea-for-csf-data-stack y fp) x))
 303                           (:double
 304                              '(inst movupd (ea-for-cdf-data-stack y fp) x)))))))
 305                 (define-move-vop ,name :move-arg
 306                   (,sc descriptor-reg) (,sc)))))
 307   (frob move-complex-single-float-arg
 308         complex-single-reg complex-single-stack :single)
 309   (frob move-complex-double-float-arg
 310         complex-double-reg complex-double-stack :double))
 311
 312 (define-move-vop move-arg :move-arg
 313   (single-reg double-reg
 314    complex-single-reg complex-double-reg)
 315   (descriptor-reg))
 316
 317 \f
 318 ;;;; arithmetic VOPs
 319
 320 (define-vop (float-op)
 321   (:args (x) (y))
 322   (:results (r))
 323   (:policy :fast-safe)
 324   (:note "inline float arithmetic")
 325   (:vop-var vop)
 326   (:save-p :compute-only))
 327
 328 (macrolet ((frob (name sc ptype)
 329              `(define-vop (,name float-op)
 330                 (:args (x :scs (,sc) :target r)
 331                        (y :scs (,sc)))
 332                 (:results (r :scs (,sc)))
 333                 (:arg-types ,ptype ,ptype)
 334                 (:result-types ,ptype))))
 335   (frob single-float-op single-reg single-float)
 336   (frob double-float-op double-reg double-float)
 337   (frob complex-single-float-op complex-single-reg complex-single-float)
 338   (frob complex-double-float-op complex-double-reg complex-double-float))
 339
 340 (macrolet ((generate (opinst commutative)
 341              `(progn
 342                 (cond
 343                   ((location= x r)
 344                    (inst ,opinst x y))
 345                   ((and ,commutative (location= y r))
 346                    (inst ,opinst y x))
 347                   ((not (location= r y))
 348                    (move r x)
 349                    (inst ,opinst r y))
 350                   (t
 351                    (move tmp x)
 352                    (inst ,opinst tmp y)
 353                    (move r tmp)))))
 354            (frob (op sinst sname scost dinst dname dcost commutative
 355                      &optional csinst csname cscost cdinst cdname cdcost)
 356              `(progn
 357                 (define-vop (,sname single-float-op)
 358                     (:translate ,op)
 359                   (:temporary (:sc single-reg) tmp)
 360                   (:generator ,scost
 361                     (generate ,sinst ,commutative)))
 362                 (define-vop (,dname double-float-op)
 363                   (:translate ,op)
 364                   (:temporary (:sc double-reg) tmp)
 365                   (:generator ,dcost
 366                     (generate ,dinst ,commutative)))
 367                 ,(when csinst
 368                    `(define-vop (,csname complex-single-float-op)
 369                       (:translate ,op)
 370                       (:temporary (:sc complex-single-reg) tmp)
 371                       (:generator ,cscost
 372                         (generate ,csinst ,commutative))))
 373                 ,(when cdinst
 374                    `(define-vop (,cdname complex-double-float-op)
 375                       (:translate ,op)
 376                       (:temporary (:sc complex-double-reg) tmp)
 377                       (:generator ,cdcost
 378                         (generate ,cdinst ,commutative)))))))
 379   (frob + addss +/single-float 2 addsd +/double-float 2 t
 380         addps +/complex-single-float 3 addpd +/complex-double-float 3)
 381   (frob - subss -/single-float 2 subsd -/double-float 2 nil
 382         subps -/complex-single-float 3 subpd -/complex-double-float 3)
 383   (frob * mulss */single-float 4 mulsd */double-float 5 t)
 384   (frob / divss //single-float 12 divsd //double-float 19 nil))
 385
 386 (macrolet ((frob (op cost commutativep
 387                      duplicate-inst op-inst
 388                      real-sc real-type complex-sc complex-type
 389                      real-complex-name complex-real-name)
 390              (cond ((not duplicate-inst) ; simple case
 391                     `(progn
 392                        ,(when real-complex-name
 393                           `(define-vop (,real-complex-name float-op)
 394                              (:translate ,op)
 395                              (:args (x :scs (,real-sc)    :target r)
 396                                     (y :scs (,complex-sc)
 397                                        ,@(when commutativep '(:target r))))
 398                              (:arg-types ,real-type ,complex-type)
 399                              (:results (r :scs (,complex-sc)
 400                                           ,@(unless commutativep '(:from (:argument 0)))))
 401                              (:result-types ,complex-type)
 402                              (:generator ,cost
 403                                ,(when commutativep
 404                                   `(when (location= y r)
 405                                      (rotatef x y)))
 406                                (move r x)
 407                                (inst ,op-inst r y))))
 408
 409                        ,(when complex-real-name
 410                           `(define-vop (,complex-real-name float-op)
 411                              (:translate ,op)
 412                              (:args (x :scs (,complex-sc) :target r)
 413                                     (y :scs (,real-sc)
 414                                        ,@(when commutativep '(:target r))))
 415                              (:arg-types ,complex-type ,real-type)
 416                              (:results (r :scs (,complex-sc)
 417                                           ,@(unless commutativep '(:from (:argument 0)))))
 418                              (:result-types ,complex-type)
 419                              (:generator ,cost
 420                                ,(when commutativep
 421                                   `(when (location= y r)
 422                                      (rotatef x y)))
 423                                (move r x)
 424                                (inst ,op-inst r y))))))
 425                    (commutativep ; must duplicate, but commutative
 426                     `(progn
 427                        ,(when real-complex-name
 428                           `(define-vop (,real-complex-name float-op)
 429                              (:translate ,op)
 430                              (:args (x :scs (,real-sc)    :target dup)
 431                                     (y :scs (,complex-sc) :target r
 432                                        :to  :result))
 433                              (:arg-types ,real-type ,complex-type)
 434                              (:temporary (:sc ,complex-sc :target r
 435                                           :from (:argument 0)
 436                                           :to   :result)
 437                                          dup)
 438                              (:results (r :scs (,complex-sc)))
 439                              (:result-types ,complex-type)
 440                              (:generator ,cost
 441                                 (let ((real x))
 442                                   ,duplicate-inst)
 443                                 ;; safe: dup /= y
 444                                 (when (location= dup r)
 445                                   (rotatef dup y))
 446                                 (move r y)
 447                                 (inst ,op-inst r dup))))
 448
 449                        ,(when complex-real-name
 450                           `(define-vop (,complex-real-name float-op)
 451                              (:translate ,op)
 452                              (:args (x :scs (,complex-sc) :target r
 453                                        :to  :result)
 454                                     (y :scs (,real-sc)    :target dup))
 455                              (:arg-types ,complex-type ,real-type)
 456                              (:temporary (:sc ,complex-sc :target r
 457                                           :from (:argument 1)
 458                                           :to :result)
 459                                          dup)
 460                              (:results (r :scs (,complex-sc)))
 461                              (:result-types ,complex-type)
 462                              (:generator ,cost
 463                                 (let ((real y))
 464                                   ,duplicate-inst)
 465                                 (when (location= dup r)
 466                                   (rotatef x dup))
 467                                 (move r x)
 468                                 (inst ,op-inst r dup))))))
 469                    (t ; duplicate, not commutative
 470                     `(progn
 471                        ,(when real-complex-name
 472                           `(define-vop (,real-complex-name float-op)
 473                              (:translate ,op)
 474                              (:args (x :scs (,real-sc)
 475                                        :target r)
 476                                     (y :scs (,complex-sc) :to :result))
 477                              (:arg-types ,real-type ,complex-type)
 478                              (:results (r :scs (,complex-sc) :from (:argument 0)))
 479                              (:result-types ,complex-type)
 480                              (:generator ,cost
 481                                (let ((real x)
 482                                      (dup  r))
 483                                  ,duplicate-inst)
 484                                (inst ,op-inst r y))))
 485
 486                        ,(when complex-real-name
 487                           `(define-vop (,complex-real-name float-op)
 488                              (:translate ,op)
 489                              (:args (x :scs (,complex-sc) :target r
 490                                        :to :eval)
 491                                     (y :scs (,real-sc)    :target dup))
 492                              (:arg-types ,complex-type ,real-type)
 493                              (:temporary (:sc ,complex-sc :from (:argument 1))
 494                                          dup)
 495                              (:results (r :scs (,complex-sc) :from :eval))
 496                              (:result-types ,complex-type)
 497                              (:generator ,cost
 498                                (let ((real y))
 499                                  ,duplicate-inst)
 500                                (move r x)
 501                                (inst ,op-inst r dup))))))))
 502            (def-real-complex-op (op commutativep duplicatep
 503                                     single-inst single-real-complex-name single-complex-real-name single-cost
 504                                     double-inst double-real-complex-name double-complex-real-name double-cost)
 505                `(progn
 506                   (frob ,op ,single-cost ,commutativep
 507                         ,(and duplicatep
 508                               `(progn
 509                                  (move dup real)
 510                                  (inst unpcklps dup dup)))
 511                         ,single-inst
 512                         single-reg single-float complex-single-reg complex-single-float
 513                         ,single-real-complex-name ,single-complex-real-name)
 514                   (frob ,op ,double-cost ,commutativep
 515                         ,(and duplicatep
 516                               `(progn
 517                                  (move dup real)
 518                                  (inst unpcklpd dup dup)))
 519                         ,double-inst
 520                         double-reg double-float complex-double-reg complex-double-float
 521                         ,double-real-complex-name ,double-complex-real-name))))
 522   (def-real-complex-op + t nil
 523     addps +/real-complex-single-float +/complex-real-single-float 3
 524     addpd +/real-complex-double-float +/complex-real-double-float 4)
 525   (def-real-complex-op - nil nil
 526     subps -/real-complex-single-float -/complex-real-single-float 3
 527     subpd -/real-complex-double-float -/complex-real-double-float 4)
 528   (def-real-complex-op * t t
 529     mulps */real-complex-single-float */complex-real-single-float 4
 530     mulpd */real-complex-double-float */complex-real-double-float 5)
 531   (def-real-complex-op / nil t
 532     nil nil nil nil
 533     divpd nil //complex-real-double-float 19))
 534
 535 (define-vop (//complex-real-single-float float-op)
 536   (:translate /)
 537   (:args (x :scs (complex-single-reg)
 538             :to (:result 0)
 539             :target r)
 540          (y :scs (single-reg) :target dup))
 541   (:arg-types complex-single-float single-float)
 542   (:temporary (:sc complex-single-reg :from (:argument 1)) dup)
 543   (:results (r :scs (complex-single-reg)))
 544   (:result-types complex-single-float)
 545   (:generator 12
 546     (move dup y)
 547     (inst shufps dup dup #b00000000)
 548     (move r x)
 549     (inst unpcklpd r r)
 550     (inst divps r dup)
 551     (inst movq r r)))
 552
 553 ;; Complex multiplication
 554 ;; r := rx * ry - ix * iy
 555 ;; i := rx * iy + ix * ry
 556 ;;
 557 ;; Transpose for SIMDness
 558 ;;  rx*ry    rx*iy
 559 ;; -ix*iy   +ix*ry
 560 ;;
 561 ;;  [rx rx] * [ry iy]
 562 ;;+ [ix ix] * [-iy ry]
 563 ;;       [r i]
 564
 565 (macrolet ((define-complex-* (name cost type sc &body body)
 566                `(define-vop (,name float-op)
 567                   (:translate *)
 568                   (:args (x :scs (,sc) :target r)
 569                          (y :scs (,sc) :target copy-y))
 570                   (:arg-types ,type ,type)
 571                   (:temporary (:sc any-reg) hex8)
 572                   (:temporary (:sc ,sc) imag)
 573                   (:temporary (:sc ,sc :from :eval) copy-y)
 574                   (:temporary (:sc ,sc) xmm)
 575                   (:results (r :scs (,sc) :from :eval))
 576                   (:result-types ,type)
 577                   (:generator ,cost
 578                     (when (or (location= x copy-y)
 579                               (location= y r))
 580                       (rotatef x y))
 581                     ,@body))))
 582   (define-complex-* */complex-single-float 20 complex-single-float complex-single-reg
 583     (inst xorps xmm xmm)
 584     (move r x)
 585     (inst unpcklps r r)
 586     (move imag r)
 587     (inst unpckhpd imag xmm)
 588     (inst unpcklpd r    xmm)
 589     (move copy-y y)  ; y == r only if y == x == r
 590     (setf y copy-y)
 591
 592     (inst lea hex8 (make-ea :qword :disp 1))
 593     (inst rol hex8 31)
 594     (inst movd xmm hex8)
 595
 596     (inst mulps r y)
 597
 598     (inst shufps y y #b11110001)
 599     (inst xorps y xmm)
 600
 601     (inst mulps imag y)
 602     (inst addps r imag))
 603   (define-complex-* */complex-double-float 25 complex-double-float complex-double-reg
 604     (move imag x)
 605     (move r x)
 606     (move copy-y y)
 607     (setf y copy-y)
 608     (inst unpcklpd r r)
 609     (inst unpckhpd imag imag)
 610     (inst lea hex8 (make-ea :qword :disp 1))
 611     (inst ror hex8 1)               ; #x8000000000000000
 612     (inst movd xmm hex8)
 613
 614     (inst mulpd r y)
 615
 616     (inst shufpd y y #b01)
 617     (inst xorpd y xmm)
 618
 619     (inst mulpd imag y)
 620     (inst addpd r imag)))
 621
 622 (define-vop (fsqrt)
 623   (:args (x :scs (double-reg)))
 624   (:results (y :scs (double-reg)))
 625   (:translate %sqrt)
 626   (:policy :fast-safe)
 627   (:arg-types double-float)
 628   (:result-types double-float)
 629   (:note "inline float arithmetic")
 630   (:vop-var vop)
 631   (:save-p :compute-only)
 632   (:generator 1
 633      (note-this-location vop :internal-error)
 634      (inst sqrtsd y x)))
 635 \f
 636 (macrolet ((frob ((name translate sc type) &body body)
 637              `(define-vop (,name)
 638                   (:args (x :scs (,sc)))
 639                 (:results (y :scs (,sc)))
 640                 (:translate ,translate)
 641                 (:policy :fast-safe)
 642                 (:arg-types ,type)
 643                 (:result-types ,type)
 644                 (:temporary (:sc any-reg) hex8)
 645                 (:temporary
 646                  (:sc ,sc) xmm)
 647                 (:note "inline float arithmetic")
 648                 (:vop-var vop)
 649                 (:save-p :compute-only)
 650                 (:generator 1
 651                             (note-this-location vop :internal-error)
 652                             ;; we should be able to do this better.  what we
 653                             ;; really would like to do is use the target as the
 654                             ;; temp whenever it's not also the source
 655                             (move y x)
 656                             ,@body))))
 657   (frob (%negate/double-float %negate double-reg double-float)
 658         (inst lea hex8 (make-ea :qword :disp 1))
 659         (inst ror hex8 1)               ; #x8000000000000000
 660         (inst movd xmm hex8)
 661         (inst xorpd y xmm))
 662   (frob (%negate/complex-double-float %negate complex-double-reg complex-double-float)
 663         (inst lea hex8 (make-ea :qword :disp 1))
 664         (inst ror hex8 1)               ; #x8000000000000000
 665         (inst movd xmm hex8)
 666         (inst unpcklpd xmm xmm)
 667         (inst xorpd y xmm))
 668   (frob (conjugate/complex-double-float conjugate complex-double-reg complex-double-float)
 669         (inst lea hex8 (make-ea :qword :disp 1))
 670         (inst ror hex8 1)               ; #x8000000000000000
 671         (inst movd xmm hex8)
 672         (inst shufpd xmm xmm #b01)
 673         (inst xorpd y xmm))
 674   (frob (%negate/single-float %negate single-reg single-float)
 675         (inst lea hex8 (make-ea :qword :disp 1))
 676         (inst rol hex8 31)
 677         (inst movd xmm hex8)
 678         (inst xorps y xmm))
 679   (frob (%negate/complex-single-float %negate complex-single-reg complex-single-float)
 680         (inst lea hex8 (make-ea :qword :disp 1))
 681         (inst rol hex8 31)
 682         (inst movd xmm hex8)
 683         (inst unpcklps xmm xmm)
 684         (inst xorps y xmm))
 685   (frob (conjugate/complex-single-float conjugate complex-single-reg complex-single-float)
 686         (inst lea hex8 (make-ea :qword :disp 1))
 687         (inst ror hex8 1)               ; #x8000000000000000
 688         (inst movd xmm hex8)
 689         (inst xorpd y xmm))
 690   (frob (abs/double-float abs  double-reg double-float)
 691         (inst mov hex8 -1)
 692         (inst shr hex8 1)
 693         (inst movd xmm hex8)
 694         (inst andpd y xmm))
 695   (frob (abs/single-float abs  single-reg single-float)
 696         (inst mov hex8 -1)
 697         (inst shr hex8 33)
 698         (inst movd xmm hex8)
 699         (inst andps y xmm)))
 700
 701 \f
 702 ;;;; comparison
 703
 704 (define-vop (float-compare)
 705   (:policy :fast-safe)
 706   (:vop-var vop)
 707   (:save-p :compute-only)
 708   (:note "inline float comparison"))
 709
 710 ;;; EQL
 711 (macrolet ((define-float-eql (name cost sc type)
 712                `(define-vop (,name float-compare)
 713                   (:translate eql)
 714                   (:args (x :scs (,sc) :target mask)
 715                          (y :scs (,sc) :target mask))
 716                   (:arg-types ,type ,type)
 717                   (:temporary (:sc ,sc :from :eval) mask)
 718                   (:temporary (:sc any-reg) bits)
 719                   (:conditional :e)
 720                   (:generator ,cost
 721                     (when (location= y mask)
 722                       (rotatef x y))
 723                     (move mask x)
 724                     (inst pcmpeqd mask y)
 725                     (inst movmskps bits mask)
 726                     (inst cmp bits #b1111)))))
 727   (define-float-eql eql/single-float 4
 728     single-reg single-float)
 729   (define-float-eql eql/double-float 4
 730     double-reg double-float)
 731   (define-float-eql eql/complex-double-float 5
 732     complex-double-reg complex-double-float)
 733   (define-float-eql eql/complex-single-float 5
 734     complex-single-reg complex-single-float))
 735
 736 ;;; comiss and comisd can cope with one or other arg in memory: we
 737 ;;; could (should, indeed) extend these to cope with descriptor args
 738 ;;; and stack args
 739
 740 (define-vop (single-float-compare float-compare)
 741   (:args (x :scs (single-reg)) (y :scs (single-reg)))
 742   (:arg-types single-float single-float))
 743 (define-vop (double-float-compare float-compare)
 744   (:args (x :scs (double-reg)) (y :scs (double-reg)))
 745   (:arg-types double-float double-float))
 746
 747 (define-vop (=/single-float single-float-compare)
 748   (:translate =)
 749   (:info)
 750   (:conditional not :p :ne)
 751   (:vop-var vop)
 752   (:generator 3
 753     (note-this-location vop :internal-error)
 754     (inst comiss x y)
 755     ;; if PF&CF, there was a NaN involved => not equal
 756     ;; otherwise, ZF => equal
 757     ))
 758
 759 (define-vop (=/double-float double-float-compare)
 760   (:translate =)
 761   (:info)
 762   (:conditional not :p :ne)
 763   (:vop-var vop)
 764   (:generator 3
 765     (note-this-location vop :internal-error)
 766     (inst comisd x y)))
 767
 768 (macrolet ((define-complex-float-= (complex-complex-name complex-real-name real-complex-name
 769                                     real-sc real-type complex-sc complex-type
 770                                     cmp-inst mask-inst mask)
 771                `(progn
 772                   (define-vop (,complex-complex-name float-compare)
 773                     (:translate =)
 774                     (:args (x :scs (,complex-sc) :target cmp)
 775                            (y :scs (,complex-sc) :target cmp))
 776                     (:arg-types ,complex-type ,complex-type)
 777                     (:temporary (:sc ,complex-sc :from :eval) cmp)
 778                     (:temporary (:sc unsigned-reg) bits)
 779                     (:info)
 780                     (:conditional :e)
 781                     (:generator 3
 782                       (when (location= y cmp)
 783                         (rotatef x y))
 784                       (move cmp x)
 785                       (note-this-location vop :internal-error)
 786                       (inst ,cmp-inst :eq cmp y)
 787                       (inst ,mask-inst bits cmp)
 788                       (inst cmp bits ,mask)))
 789                   (define-vop (,complex-real-name ,complex-complex-name)
 790                     (:args (x :scs (,complex-sc) :target cmp)
 791                            (y :scs (,real-sc)    :target cmp))
 792                     (:arg-types ,complex-type ,real-type))
 793                   (define-vop (,real-complex-name ,complex-complex-name)
 794                     (:args (x :scs (,real-sc)    :target cmp)
 795                            (y :scs (,complex-sc) :target cmp))
 796                     (:arg-types ,real-type ,complex-type)))))
 797   (define-complex-float-= =/complex-single-float =/complex-real-single-float =/real-complex-single-float
 798       single-reg single-float complex-single-reg complex-single-float
 799     cmpps movmskps #b1111)
 800   (define-complex-float-= =/complex-double-float =/complex-real-double-float =/real-complex-double-float
 801       double-reg double-float complex-double-reg complex-double-float
 802     cmppd movmskpd #b11))
 803
 804 (define-vop (<double-float double-float-compare)
 805   (:translate <)
 806   (:info)
 807   (:conditional not :p :nc)
 808   (:generator 3
 809     (inst comisd x y)))
 810
 811 (define-vop (<single-float single-float-compare)
 812   (:translate <)
 813   (:info)
 814   (:conditional not :p :nc)
 815   (:generator 3
 816     (inst comiss x y)))
 817
 818 (define-vop (>double-float double-float-compare)
 819   (:translate >)
 820   (:info)
 821   (:conditional not :p :na)
 822   (:generator 3
 823     (inst comisd x y)))
 824
 825 (define-vop (>single-float single-float-compare)
 826   (:translate >)
 827   (:info)
 828   (:conditional not :p :na)
 829   (:generator 3
 830     (inst comiss x y)))
 831
 832
 833 \f
 834 ;;;; conversion
 835
 836 (macrolet ((frob (name translate inst to-sc to-type)
 837              `(define-vop (,name)
 838                 (:args (x :scs (signed-stack signed-reg) :target temp))
 839                 (:temporary (:sc signed-stack) temp)
 840                 (:results (y :scs (,to-sc)))
 841                 (:arg-types signed-num)
 842                 (:result-types ,to-type)
 843                 (:policy :fast-safe)
 844                 (:note "inline float coercion")
 845                 (:translate ,translate)
 846                 (:vop-var vop)
 847                 (:save-p :compute-only)
 848                 (:generator 5
 849                   (sc-case x
 850                     (signed-reg
 851                      (inst mov temp x)
 852                      (note-this-location vop :internal-error)
 853                      (inst ,inst y temp))
 854                     (signed-stack
 855                      (note-this-location vop :internal-error)
 856                      (inst ,inst y x)))))))
 857   (frob %single-float/signed %single-float cvtsi2ss single-reg single-float)
 858   (frob %double-float/signed %double-float cvtsi2sd double-reg double-float))
 859
 860 (macrolet ((frob (name translate inst from-sc from-type to-sc to-type)
 861              `(define-vop (,name)
 862                (:args (x :scs (,from-sc) :target y))
 863                (:results (y :scs (,to-sc)))
 864                (:arg-types ,from-type)
 865                (:result-types ,to-type)
 866                (:policy :fast-safe)
 867                (:note "inline float coercion")
 868                (:translate ,translate)
 869                (:vop-var vop)
 870                (:save-p :compute-only)
 871                (:generator 2
 872                 (note-this-location vop :internal-error)
 873                 (inst ,inst y x)))))
 874   (frob %single-float/double-float %single-float cvtsd2ss double-reg
 875         double-float single-reg single-float)
 876
 877   (frob %double-float/single-float %double-float cvtss2sd
 878         single-reg single-float double-reg double-float))
 879
 880 (macrolet ((frob (trans inst from-sc from-type round-p)
 881              (declare (ignore round-p))
 882              `(define-vop (,(symbolicate trans "/" from-type))
 883                (:args (x :scs (,from-sc)))
 884                (:temporary (:sc any-reg) temp-reg)
 885                (:results (y :scs (signed-reg)))
 886                (:arg-types ,from-type)
 887                (:result-types signed-num)
 888                (:translate ,trans)
 889                (:policy :fast-safe)
 890                (:note "inline float truncate")
 891                (:vop-var vop)
 892                (:save-p :compute-only)
 893                (:generator 5
 894                  (sc-case y
 895                           (signed-stack
 896                            (inst ,inst temp-reg x)
 897                            (move y temp-reg))
 898                           (signed-reg
 899                            (inst ,inst y x)
 900                            ))))))
 901   (frob %unary-truncate cvttss2si single-reg single-float nil)
 902   (frob %unary-truncate cvttsd2si double-reg double-float nil)
 903
 904   (frob %unary-round cvtss2si single-reg single-float t)
 905   (frob %unary-round cvtsd2si double-reg double-float t))
 906
 907 (define-vop (make-single-float)
 908   (:args (bits :scs (signed-reg) :target res
 909                :load-if (not (or (and (sc-is bits signed-stack)
 910                                       (sc-is res single-reg))
 911                                  (and (sc-is bits signed-stack)
 912                                       (sc-is res single-stack)
 913                                       (location= bits res))))))
 914   (:results (res :scs (single-reg single-stack)))
 915   (:arg-types signed-num)
 916   (:result-types single-float)
 917   (:translate make-single-float)
 918   (:policy :fast-safe)
 919   (:vop-var vop)
 920   (:generator 4
 921     (sc-case res
 922        (single-stack
 923         (sc-case bits
 924           (signed-reg
 925            (inst mov res bits))
 926           (signed-stack
 927            (aver (location= bits res)))))
 928        (single-reg
 929         (sc-case bits
 930           (signed-reg
 931            (inst movd res bits))
 932           (signed-stack
 933            (inst movd res bits)))))))
 934
 935 (define-vop (make-double-float)
 936   (:args (hi-bits :scs (signed-reg))
 937          (lo-bits :scs (unsigned-reg)))
 938   (:results (res :scs (double-reg)))
 939   (:temporary (:sc unsigned-reg) temp)
 940   (:arg-types signed-num unsigned-num)
 941   (:result-types double-float)
 942   (:translate make-double-float)
 943   (:policy :fast-safe)
 944   (:vop-var vop)
 945   (:generator 2
 946     (move temp hi-bits)
 947     (inst shl temp 32)
 948     (inst or temp lo-bits)
 949     (inst movd res temp)))
 950
 951 (define-vop (single-float-bits)
 952   (:args (float :scs (single-reg descriptor-reg)
 953                 :load-if (not (sc-is float single-stack))))
 954   (:results (bits :scs (signed-reg)))
 955   (:temporary (:sc signed-stack :from :argument :to :result) stack-temp)
 956   (:arg-types single-float)
 957   (:result-types signed-num)
 958   (:translate single-float-bits)
 959   (:policy :fast-safe)
 960   (:vop-var vop)
 961   (:generator 4
 962     (sc-case bits
 963       (signed-reg
 964        (sc-case float
 965          (single-reg
 966           (inst movss stack-temp float)
 967           (move bits stack-temp))
 968          (single-stack
 969           (move bits float))
 970          (descriptor-reg
 971           (move bits float)
 972           (inst shr bits 32))))
 973       (signed-stack
 974        (sc-case float
 975          (single-reg
 976           (inst movss bits float)))))
 977     ;; Sign-extend
 978     (inst shl bits 32)
 979     (inst sar bits 32)))
 980
 981 (define-vop (double-float-high-bits)
 982   (:args (float :scs (double-reg descriptor-reg)
 983                 :load-if (not (sc-is float double-stack))))
 984   (:results (hi-bits :scs (signed-reg)))
 985   (:temporary (:sc signed-stack :from :argument :to :result) temp)
 986   (:arg-types double-float)
 987   (:result-types signed-num)
 988   (:translate double-float-high-bits)
 989   (:policy :fast-safe)
 990   (:vop-var vop)
 991   (:generator 5
 992      (sc-case float
 993        (double-reg
 994         (inst movsd temp float)
 995         (move hi-bits temp))
 996        (double-stack
 997         (loadw hi-bits ebp-tn (frame-word-offset (tn-offset float))))
 998        (descriptor-reg
 999         (loadw hi-bits float double-float-value-slot
1000                other-pointer-lowtag)))
1001      (inst sar hi-bits 32)))
1002
1003 (define-vop (double-float-low-bits)
1004   (:args (float :scs (double-reg descriptor-reg)
1005                 :load-if (not (sc-is float double-stack))))
1006   (:results (lo-bits :scs (unsigned-reg)))
1007   (:temporary (:sc signed-stack :from :argument :to :result) temp)
1008   (:arg-types double-float)
1009   (:result-types unsigned-num)
1010   (:translate double-float-low-bits)
1011   (:policy :fast-safe)
1012   (:vop-var vop)
1013   (:generator 5
1014      (sc-case float
1015        (double-reg
1016         (inst movsd temp float)
1017         (move lo-bits temp))
1018        (double-stack
1019         (loadw lo-bits ebp-tn (frame-word-offset (tn-offset float))))
1020        (descriptor-reg
1021         (loadw lo-bits float double-float-value-slot
1022                other-pointer-lowtag)))
1023      (inst shl lo-bits 32)
1024      (inst shr lo-bits 32)))
1025
1026 \f
1027
1028 ;;;; complex float VOPs
1029
1030 (define-vop (make-complex-single-float)
1031   (:translate complex)
1032   (:args (real :scs (single-reg fp-single-zero)
1033                :target r
1034                :load-if (not (sc-is real fp-single-zero)))
1035          (imag :scs (single-reg fp-single-zero)
1036                :load-if (not (sc-is imag fp-single-zero))))
1037   (:arg-types single-float single-float)
1038   (:results (r :scs (complex-single-reg) :from (:argument 0)))
1039   (:result-types complex-single-float)
1040   (:note "inline complex single-float creation")
1041   (:policy :fast-safe)
1042   (:generator 5
1043     (cond ((sc-is real fp-single-zero)
1044            (inst xorps r r)
1045            (unless (sc-is imag fp-single-zero)
1046              (inst unpcklps r imag)))
1047           ((location= real imag)
1048            (move r real)
1049            (inst unpcklps r r))
1050           (t
1051            (move r real)
1052            (unless (sc-is imag fp-single-zero)
1053              (inst unpcklps r imag))))))
1054
1055 (define-vop (make-complex-double-float)
1056   (:translate complex)
1057   (:args (real :scs (double-reg fp-double-zero)
1058                :target r
1059                :load-if (not (sc-is real fp-double-zero)))
1060          (imag :scs (double-reg fp-double-zero)
1061                :load-if (not (sc-is imag fp-double-zero))))
1062   (:arg-types double-float double-float)
1063   (:results (r :scs (complex-double-reg) :from (:argument 0)))
1064   (:result-types complex-double-float)
1065   (:note "inline complex double-float creation")
1066   (:policy :fast-safe)
1067   (:generator 5
1068     (cond ((sc-is real fp-double-zero)
1069            (inst xorpd r r)
1070            (unless (sc-is imag fp-double-zero)
1071              (inst unpcklpd r imag)))
1072           ((location= real imag)
1073            (move r real)
1074            (inst unpcklpd r r))
1075           (t
1076            (move r real)
1077            (unless (sc-is imag fp-double-zero)
1078              (inst unpcklpd r imag))))))
1079
1080 (define-vop (complex-float-value)
1081   (:args (x :target r))
1082   (:temporary (:sc complex-double-reg) zero)
1083   (:results (r))
1084   (:variant-vars offset)
1085   (:policy :fast-safe)
1086   (:generator 3
1087     (cond ((sc-is x complex-double-reg)
1088            (move r x)
1089            (inst xorpd zero zero)
1090            (ecase offset
1091              (0 (inst unpcklpd r zero))
1092              (1 (inst unpckhpd r zero))))
1093           ((sc-is x complex-single-reg)
1094            (move r x)
1095            (ecase offset
1096              (0 (inst shufps r r #b11111100))
1097              (1 (inst shufps r r #b11111101))))
1098           ((sc-is r single-reg)
1099            (let ((ea (sc-case x
1100                        (complex-single-stack
1101                         (ecase offset
1102                           (0 (ea-for-csf-real-stack x))
1103                           (1 (ea-for-csf-imag-stack x))))
1104                        (descriptor-reg
1105                         (ecase offset
1106                           (0 (ea-for-csf-real-desc x))
1107                           (1 (ea-for-csf-imag-desc x)))))))
1108              (inst movss r ea)))
1109           ((sc-is r double-reg)
1110            (let ((ea (sc-case x
1111                        (complex-double-stack
1112                         (ecase offset
1113                           (0 (ea-for-cdf-real-stack x))
1114                           (1 (ea-for-cdf-imag-stack x))))
1115                        (descriptor-reg
1116                         (ecase offset
1117                           (0 (ea-for-cdf-real-desc x))
1118                           (1 (ea-for-cdf-imag-desc x)))))))
1119              (inst movsd r ea)))
1120           (t (error "COMPLEX-FLOAT-VALUE VOP failure")))))
1121
1122 (define-vop (realpart/complex-single-float complex-float-value)
1123   (:translate realpart)
1124   (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)
1125             :target r))
1126   (:arg-types complex-single-float)
1127   (:results (r :scs (single-reg)))
1128   (:result-types single-float)
1129   (:note "complex float realpart")
1130   (:variant 0))
1131
1132 (define-vop (realpart/complex-double-float complex-float-value)
1133   (:translate realpart)
1134   (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)
1135             :target r))
1136   (:arg-types complex-double-float)
1137   (:results (r :scs (double-reg)))
1138   (:result-types double-float)
1139   (:note "complex float realpart")
1140   (:variant 0))
1141
1142 (define-vop (imagpart/complex-single-float complex-float-value)
1143   (:translate imagpart)
1144   (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg)
1145             :target r))
1146   (:arg-types complex-single-float)
1147   (:results (r :scs (single-reg)))
1148   (:result-types single-float)
1149   (:note "complex float imagpart")
1150   (:variant 1))
1151
1152 (define-vop (imagpart/complex-double-float complex-float-value)
1153   (:translate imagpart)
1154   (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg)
1155             :target r))
1156   (:arg-types complex-double-float)
1157   (:results (r :scs (double-reg)))
1158   (:result-types double-float)
1159   (:note "complex float imagpart")
1160   (:variant 1))
1161
1162 \f
1163 ;;; hack dummy VOPs to bias the representation selection of their
1164 ;;; arguments towards a FP register, which can help avoid consing at
1165 ;;; inappropriate locations
1166 (defknown double-float-reg-bias (double-float) (values))
1167 (define-vop (double-float-reg-bias)
1168   (:translate double-float-reg-bias)
1169   (:args (x :scs (double-reg double-stack) :load-if nil))
1170   (:arg-types double-float)
1171   (:policy :fast-safe)
1172   (:note "inline dummy FP register bias")
1173   (:ignore x)
1174   (:generator 0))
1175 (defknown single-float-reg-bias (single-float) (values))
1176 (define-vop (single-float-reg-bias)
1177   (:translate single-float-reg-bias)
1178   (:args (x :scs (single-reg single-stack) :load-if nil))
1179   (:arg-types single-float)
1180   (:policy :fast-safe)
1181   (:note "inline dummy FP register bias")
1182   (:ignore x)
1183   (:generator 0))
1184
1185 (defknown swap-complex ((complex float)) (complex float)
1186     (foldable flushable movable always-translatable))
1187 (defoptimizer (swap-complex derive-type) ((x))
1188   (sb!c::lvar-type x))
1189 (defun swap-complex (x)
1190   (complex (imagpart x) (realpart x)))
1191 (define-vop (swap-complex-single-float)
1192   (:translate swap-complex)
1193   (:policy :fast-safe)
1194   (:args (x :scs (complex-single-reg) :target r))
1195   (:arg-types complex-single-float)
1196   (:results (r :scs (complex-single-reg)))
1197   (:result-types complex-single-float)
1198   (:generator 2
1199      (move r x)
1200      (inst shufps r r #b11110001)))
1201 (define-vop (swap-complex-double-float)
1202   (:translate swap-complex)
1203   (:policy :fast-safe)
1204   (:args (x :scs (complex-double-reg) :target r))
1205   (:arg-types complex-double-float)
1206   (:results (r :scs (complex-double-reg)))
1207   (:result-types complex-double-float)
1208   (:generator 2
1209      (move r x)
1210      (inst shufpd r r #b01)))