-;;; Divide double, reverse, destination st(i):
-;;; st(i) = st(0) / st(i).
-;;;
-;;; ASM386 syntax: FDIVR ST(i), ST
-;;; Gdb syntax: fdiv %st,%st(i)
-(define-instruction fdivr-sti (segment destination)
- (:printer floating-point-fp ((op '(#b100 #b110))))
- (:emitter
- (aver (fp-reg-tn-p destination))
- (emit-byte segment #b11011100)
- (emit-fp-op segment destination #b110)))
-
-;;; Exchange fr0 with fr(n). (There is no double precision variant.)
-(define-instruction fxch (segment source)
- (:printer floating-point-fp ((op '(#b001 #b001))))
- (:emitter
- (unless (and (tn-p source)
- (eq (sb-name (sc-sb (tn-sc source))) 'float-registers))
- (cl:break))
- (emit-byte segment #b11011001)
- (emit-fp-op segment source #b001)))
-
-;;; Push 32-bit integer to st0.
-(define-instruction fild (segment source)
- (:printer floating-point ((op '(#b011 #b000))))
- (:emitter
- (and (not (fp-reg-tn-p source))
- (maybe-emit-rex-for-ea segment source nil))
- (emit-byte segment #b11011011)
- (emit-fp-op segment source #b000)))
-
-;;; Push 64-bit integer to st0.
-(define-instruction fildl (segment source)
- (:printer floating-point ((op '(#b111 #b101))))
- (:emitter
- (and (not (fp-reg-tn-p source))
- (maybe-emit-rex-for-ea segment source nil))
- (emit-byte segment #b11011111)
- (emit-fp-op segment source #b101)))
-
-;;; Store 32-bit integer.
-(define-instruction fist (segment dest)
- (:printer floating-point ((op '(#b011 #b010))))
- (:emitter
- (and (not (fp-reg-tn-p dest))
- (maybe-emit-rex-for-ea segment dest nil))
- (emit-byte segment #b11011011)
- (emit-fp-op segment dest #b010)))
-
-;;; Store and pop 32-bit integer.
-(define-instruction fistp (segment dest)
- (:printer floating-point ((op '(#b011 #b011))))
- (:emitter
- (and (not (fp-reg-tn-p dest))
- (maybe-emit-rex-for-ea segment dest nil))
- (emit-byte segment #b11011011)
- (emit-fp-op segment dest #b011)))
-
-;;; Store and pop 64-bit integer.
-(define-instruction fistpl (segment dest)
- (:printer floating-point ((op '(#b111 #b111))))
- (:emitter
- (and (not (fp-reg-tn-p dest))
- (maybe-emit-rex-for-ea segment dest nil))
- (emit-byte segment #b11011111)
- (emit-fp-op segment dest #b111)))
-
-;;; Store single from st(0) and pop.
-(define-instruction fstp (segment dest)
- (:printer floating-point ((op '(#b001 #b011))))
- (:emitter
- (cond ((fp-reg-tn-p dest)
- (emit-byte segment #b11011101)
- (emit-fp-op segment dest #b011))
- (t
- (maybe-emit-rex-for-ea segment dest nil)
- (emit-byte segment #b11011001)
- (emit-fp-op segment dest #b011)))))
-
-;;; Store double from st(0) and pop.
-(define-instruction fstpd (segment dest)
- (:printer floating-point ((op '(#b101 #b011))))
- (:printer floating-point-fp ((op '(#b101 #b011))))
- (:emitter
- (cond ((fp-reg-tn-p dest)
- (emit-byte segment #b11011101)
- (emit-fp-op segment dest #b011))
- (t
- (maybe-emit-rex-for-ea segment dest nil)
- (emit-byte segment #b11011101)
- (emit-fp-op segment dest #b011)))))
-
-;;; Store long from st(0) and pop.
-(define-instruction fstpl (segment dest)
- (:printer floating-point ((op '(#b011 #b111))))
- (:emitter
- (and (not (fp-reg-tn-p dest))
- (maybe-emit-rex-for-ea segment dest nil))
- (emit-byte segment #b11011011)
- (emit-fp-op segment dest #b111)))
-
-;;; Decrement stack-top pointer.
-(define-instruction fdecstp (segment)
- (:printer floating-point-no ((op #b10110)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110110)))
-
-;;; Increment stack-top pointer.
-(define-instruction fincstp (segment)
- (:printer floating-point-no ((op #b10111)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110111)))
-
-;;; Free fp register.
-(define-instruction ffree (segment dest)
- (:printer floating-point-fp ((op '(#b101 #b000))))
- (:emitter
- (and (not (fp-reg-tn-p dest))
- (maybe-emit-rex-for-ea segment dest nil))
- (emit-byte segment #b11011101)
- (emit-fp-op segment dest #b000)))
-
-(define-instruction fabs (segment)
- (:printer floating-point-no ((op #b00001)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11100001)))
-
-(define-instruction fchs (segment)
- (:printer floating-point-no ((op #b00000)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11100000)))
-
-(define-instruction frndint(segment)
- (:printer floating-point-no ((op #b11100)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111100)))
-
-;;; Initialize NPX.
-(define-instruction fninit(segment)
- (:printer floating-point-5 ((op #b00011)))
- (:emitter
- (emit-byte segment #b11011011)
- (emit-byte segment #b11100011)))
-
-;;; Store Status Word to AX.
-(define-instruction fnstsw(segment)
- (:printer floating-point-st ((op #b00000)))
- (:emitter
- (emit-byte segment #b11011111)
- (emit-byte segment #b11100000)))
-
-;;; Load Control Word.
-;;;
-;;; src must be a memory location
-(define-instruction fldcw(segment src)
- (:printer floating-point ((op '(#b001 #b101))))
- (:emitter
- (and (not (fp-reg-tn-p src))
- (maybe-emit-rex-for-ea segment src nil))
- (emit-byte segment #b11011001)
- (emit-fp-op segment src #b101)))
-
-;;; Store Control Word.
-(define-instruction fnstcw(segment dst)
- (:printer floating-point ((op '(#b001 #b111))))
- (:emitter
- (and (not (fp-reg-tn-p dst))
- (maybe-emit-rex-for-ea segment dst nil))
- (emit-byte segment #b11011001)
- (emit-fp-op segment dst #b111)))
-
-;;; Store FP Environment.
-(define-instruction fstenv(segment dst)
- (:printer floating-point ((op '(#b001 #b110))))
- (:emitter
- (and (not (fp-reg-tn-p dst))
- (maybe-emit-rex-for-ea segment dst nil))
- (emit-byte segment #b11011001)
- (emit-fp-op segment dst #b110)))
-
-;;; Restore FP Environment.
-(define-instruction fldenv(segment src)
- (:printer floating-point ((op '(#b001 #b100))))
- (:emitter
- (and (not (fp-reg-tn-p src))
- (maybe-emit-rex-for-ea segment src nil))
- (emit-byte segment #b11011001)
- (emit-fp-op segment src #b100)))
-
-;;; Save FP State.
-(define-instruction fsave(segment dst)
- (:printer floating-point ((op '(#b101 #b110))))
- (:emitter
- (and (not (fp-reg-tn-p dst))
- (maybe-emit-rex-for-ea segment dst nil))
- (emit-byte segment #b11011101)
- (emit-fp-op segment dst #b110)))
-
-;;; Restore FP State.
-(define-instruction frstor(segment src)
- (:printer floating-point ((op '(#b101 #b100))))
- (:emitter
- (and (not (fp-reg-tn-p src))
- (maybe-emit-rex-for-ea segment src nil))
- (emit-byte segment #b11011101)
- (emit-fp-op segment src #b100)))
-
-;;; Clear exceptions.
-(define-instruction fnclex(segment)
- (:printer floating-point-5 ((op #b00010)))
- (:emitter
- (emit-byte segment #b11011011)
- (emit-byte segment #b11100010)))
-
-;;; comparison
-(define-instruction fcom (segment src)
- (:printer floating-point ((op '(#b000 #b010))))
- (:emitter
- (and (not (fp-reg-tn-p src))
- (maybe-emit-rex-for-ea segment src nil))
- (emit-byte segment #b11011000)
- (emit-fp-op segment src #b010)))
-
-(define-instruction fcomd (segment src)
- (:printer floating-point ((op '(#b100 #b010))))
- (:printer floating-point-fp ((op '(#b000 #b010))))
- (:emitter
- (if (fp-reg-tn-p src)
- (emit-byte segment #b11011000)
- (progn
- (maybe-emit-rex-for-ea segment src nil)
- (emit-byte segment #b11011100)))
- (emit-fp-op segment src #b010)))
-
-;;; Compare ST1 to ST0, popping the stack twice.
-(define-instruction fcompp (segment)
- (:printer floating-point-3 ((op '(#b110 #b011001))))
- (:emitter
- (emit-byte segment #b11011110)
- (emit-byte segment #b11011001)))
-
-;;; unordered comparison
-(define-instruction fucom (segment src)
- (:printer floating-point-fp ((op '(#b101 #b100))))
- (:emitter
- (aver (fp-reg-tn-p src))
- (emit-byte segment #b11011101)
- (emit-fp-op segment src #b100)))
-
-(define-instruction ftst (segment)
- (:printer floating-point-no ((op #b00100)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11100100)))
-
-;;;; 80387 specials
-
-(define-instruction fsqrt(segment)
- (:printer floating-point-no ((op #b11010)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111010)))
-
-(define-instruction fscale(segment)
- (:printer floating-point-no ((op #b11101)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111101)))
-
-(define-instruction fxtract(segment)
- (:printer floating-point-no ((op #b10100)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110100)))
-
-(define-instruction fsin(segment)
- (:printer floating-point-no ((op #b11110)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111110)))
-
-(define-instruction fcos(segment)
- (:printer floating-point-no ((op #b11111)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111111)))
-
-(define-instruction fprem1(segment)
- (:printer floating-point-no ((op #b10101)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110101)))
-
-(define-instruction fprem(segment)
- (:printer floating-point-no ((op #b11000)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111000)))
-
-(define-instruction fxam (segment)
- (:printer floating-point-no ((op #b00101)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11100101)))
-
-;;; These do push/pop to stack and need special handling
-;;; in any VOPs that use them. See the book.
-
-;;; st0 <- st1*log2(st0)
-(define-instruction fyl2x(segment) ; pops stack
- (:printer floating-point-no ((op #b10001)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110001)))
-
-(define-instruction fyl2xp1(segment)
- (:printer floating-point-no ((op #b11001)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11111001)))
-
-(define-instruction f2xm1(segment)
- (:printer floating-point-no ((op #b10000)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110000)))
-
-(define-instruction fptan(segment) ; st(0) <- 1; st(1) <- tan
- (:printer floating-point-no ((op #b10010)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110010)))
-
-(define-instruction fpatan(segment) ; POPS STACK
- (:printer floating-point-no ((op #b10011)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11110011)))
-
-;;;; loading constants
-
-(define-instruction fldz(segment)
- (:printer floating-point-no ((op #b01110)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11101110)))
-
-(define-instruction fld1(segment)
- (:printer floating-point-no ((op #b01000)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11101000)))
-
-(define-instruction fldpi(segment)
- (:printer floating-point-no ((op #b01011)))
- (:emitter
- (emit-byte segment #b11011001)
- (emit-byte segment #b11101011)))
-
-(define-instruction fldl2t(segment)
- (:printer floating-point-no ((op #b01001)))
+;; Return a one- or two-element list of printers for SSE instructions.
+;; The one-element list is used in the cases where the REX prefix is
+;; really a prefix and thus automatically supported, the two-element
+;; list is used when the REX prefix is used in an infix position.
+(eval-when (:compile-toplevel :execute)
+ (defun sse-inst-printer-list (inst-format-stem prefix opcode
+ &key more-fields printer)
+ (let ((fields `(,@(when prefix
+ `((prefix ,prefix)))
+ (op ,opcode)
+ ,@more-fields))
+ (inst-formats (if prefix
+ (list (symbolicate "EXT-" inst-format-stem)
+ (symbolicate "EXT-REX-" inst-format-stem))
+ (list inst-format-stem))))
+ (mapcar (lambda (inst-format)
+ `(,inst-format ,fields ,@(when printer
+ (list printer))))
+ inst-formats))))
+
+(defun emit-sse-inst (segment dst src prefix opcode
+ &key operand-size (remaining-bytes 0))
+ (when prefix
+ (emit-byte segment prefix))
+ (if operand-size
+ (maybe-emit-rex-for-ea segment src dst :operand-size operand-size)
+ (maybe-emit-rex-for-ea segment src dst))
+ (emit-byte segment #x0f)
+ (emit-byte segment opcode)
+ (emit-ea segment src (reg-tn-encoding dst) :remaining-bytes remaining-bytes))
+
+;; 0110 0110:0000 1111:0111 00gg: 11 010 xmmreg:imm8
+
+(defun emit-sse-inst-with-imm (segment dst/src imm
+ prefix opcode /i
+ &key operand-size)
+ (aver (<= 0 /i 7))
+ (when prefix
+ (emit-byte segment prefix))
+ (maybe-emit-rex-prefix segment operand-size dst/src nil nil)
+ (emit-byte segment #x0F)
+ (emit-byte segment opcode)
+ (emit-byte segment (logior (ash (logior #b11000 /i) 3)
+ (reg-tn-encoding dst/src)))
+ (emit-byte segment imm))
+
+(macrolet
+ ((define-imm-sse-instruction (name opcode /i)
+ `(define-instruction ,name (segment dst/src imm)
+ (:printer-list
+ ',(sse-inst-printer-list 'xmm-imm #x66 opcode
+ :more-fields `((/i ,/i))))
+ (:emitter
+ (emit-sse-inst-with-imm segment dst/src imm
+ #x66 ,opcode ,/i
+ :operand-size :do-not-set)))))
+ (define-imm-sse-instruction pslldq #x73 7)
+ (define-imm-sse-instruction psllw-imm #x71 6)
+ (define-imm-sse-instruction pslld-imm #x72 6)
+ (define-imm-sse-instruction psllq-imm #x73 6)
+
+ (define-imm-sse-instruction psraw-imm #x71 4)
+ (define-imm-sse-instruction psrad-imm #x72 4)
+
+ (define-imm-sse-instruction psrldq #x73 3)
+ (define-imm-sse-instruction psrlw-imm #x71 2)
+ (define-imm-sse-instruction psrld-imm #x72 2)
+ (define-imm-sse-instruction psrlq-imm #x73 2))
+
+;;; Emit an SSE instruction that has an XMM register as the destination
+;;; operand and for which the size of the operands is implicitly given
+;;; by the instruction.
+(defun emit-regular-sse-inst (segment dst src prefix opcode
+ &key (remaining-bytes 0))
+ (aver (xmm-register-p dst))
+ (emit-sse-inst segment dst src prefix opcode
+ :operand-size :do-not-set
+ :remaining-bytes remaining-bytes))
+
+;;; Instructions having an XMM register as the destination operand
+;;; and an XMM register or a memory location as the source operand.
+;;; The operand size is implicitly given by the instruction.
+
+(macrolet ((define-regular-sse-inst (name prefix opcode)
+ `(define-instruction ,name (segment dst src)
+ (:printer-list
+ ',(sse-inst-printer-list 'xmm-xmm/mem prefix opcode))
+ (:emitter
+ (emit-regular-sse-inst segment dst src ,prefix ,opcode)))))
+ ;; logical
+ (define-regular-sse-inst andpd #x66 #x54)
+ (define-regular-sse-inst andps nil #x54)
+ (define-regular-sse-inst andnpd #x66 #x55)
+ (define-regular-sse-inst andnps nil #x55)
+ (define-regular-sse-inst orpd #x66 #x56)
+ (define-regular-sse-inst orps nil #x56)
+ (define-regular-sse-inst pand #x66 #xdb)
+ (define-regular-sse-inst pandn #x66 #xdf)
+ (define-regular-sse-inst por #x66 #xeb)
+ (define-regular-sse-inst pxor #x66 #xef)
+ (define-regular-sse-inst xorpd #x66 #x57)
+ (define-regular-sse-inst xorps nil #x57)
+ ;; comparison
+ (define-regular-sse-inst comisd #x66 #x2f)
+ (define-regular-sse-inst comiss nil #x2f)
+ (define-regular-sse-inst ucomisd #x66 #x2e)
+ (define-regular-sse-inst ucomiss nil #x2e)
+ ;; integer comparison
+ (define-regular-sse-inst pcmpeqb #x66 #x74)
+ (define-regular-sse-inst pcmpeqw #x66 #x75)
+ (define-regular-sse-inst pcmpeqd #x66 #x76)
+ (define-regular-sse-inst pcmpgtb #x66 #x64)
+ (define-regular-sse-inst pcmpgtw #x66 #x65)
+ (define-regular-sse-inst pcmpgtd #x66 #x66)
+ ;; max/min
+ (define-regular-sse-inst maxpd #x66 #x5f)
+ (define-regular-sse-inst maxps nil #x5f)
+ (define-regular-sse-inst maxsd #xf2 #x5f)
+ (define-regular-sse-inst maxss #xf3 #x5f)
+ (define-regular-sse-inst minpd #x66 #x5d)
+ (define-regular-sse-inst minps nil #x5d)
+ (define-regular-sse-inst minsd #xf2 #x5d)
+ (define-regular-sse-inst minss #xf3 #x5d)
+ ;; integer max/min
+ (define-regular-sse-inst pmaxsw #x66 #xee)
+ (define-regular-sse-inst pmaxub #x66 #xde)
+ (define-regular-sse-inst pminsw #x66 #xea)
+ (define-regular-sse-inst pminub #x66 #xda)
+ ;; arithmetic
+ (define-regular-sse-inst addpd #x66 #x58)
+ (define-regular-sse-inst addps nil #x58)
+ (define-regular-sse-inst addsd #xf2 #x58)
+ (define-regular-sse-inst addss #xf3 #x58)
+ (define-regular-sse-inst divpd #x66 #x5e)
+ (define-regular-sse-inst divps nil #x5e)
+ (define-regular-sse-inst divsd #xf2 #x5e)
+ (define-regular-sse-inst divss #xf3 #x5e)
+ (define-regular-sse-inst mulpd #x66 #x59)
+ (define-regular-sse-inst mulps nil #x59)
+ (define-regular-sse-inst mulsd #xf2 #x59)
+ (define-regular-sse-inst mulss #xf3 #x59)
+ (define-regular-sse-inst rcpps nil #x53)
+ (define-regular-sse-inst rcpss #xf3 #x53)
+ (define-regular-sse-inst rsqrtps nil #x52)
+ (define-regular-sse-inst rsqrtss #xf3 #x52)
+ (define-regular-sse-inst sqrtpd #x66 #x51)
+ (define-regular-sse-inst sqrtps nil #x51)
+ (define-regular-sse-inst sqrtsd #xf2 #x51)
+ (define-regular-sse-inst sqrtss #xf3 #x51)
+ (define-regular-sse-inst subpd #x66 #x5c)
+ (define-regular-sse-inst subps nil #x5c)
+ (define-regular-sse-inst subsd #xf2 #x5c)
+ (define-regular-sse-inst subss #xf3 #x5c)
+ (define-regular-sse-inst unpckhpd #x66 #x15)
+ (define-regular-sse-inst unpckhps nil #x15)
+ (define-regular-sse-inst unpcklpd #x66 #x14)
+ (define-regular-sse-inst unpcklps nil #x14)
+ ;; integer arithmetic
+ (define-regular-sse-inst paddb #x66 #xfc)
+ (define-regular-sse-inst paddw #x66 #xfd)
+ (define-regular-sse-inst paddd #x66 #xfe)
+ (define-regular-sse-inst paddq #x66 #xd4)
+ (define-regular-sse-inst paddsb #x66 #xec)
+ (define-regular-sse-inst paddsw #x66 #xed)
+ (define-regular-sse-inst paddusb #x66 #xdc)
+ (define-regular-sse-inst paddusw #x66 #xdd)
+ (define-regular-sse-inst pavgb #x66 #xe0)
+ (define-regular-sse-inst pavgw #x66 #xe3)
+ (define-regular-sse-inst pmaddwd #x66 #xf5)
+ (define-regular-sse-inst pmulhuw #x66 #xe4)
+ (define-regular-sse-inst pmulhw #x66 #xe5)
+ (define-regular-sse-inst pmullw #x66 #xd5)
+ (define-regular-sse-inst pmuludq #x66 #xf4)
+ (define-regular-sse-inst psadbw #x66 #xf6)
+ (define-regular-sse-inst psllw #x66 #xf1)
+ (define-regular-sse-inst pslld #x66 #xf2)
+ (define-regular-sse-inst psllq #x66 #xf3)
+ (define-regular-sse-inst psraw #x66 #xe1)
+ (define-regular-sse-inst psrad #x66 #xe2)
+ (define-regular-sse-inst psrlw #x66 #xd1)
+ (define-regular-sse-inst psrld #x66 #xd2)
+ (define-regular-sse-inst psrlq #x66 #xd3)
+ (define-regular-sse-inst psubb #x66 #xf8)
+ (define-regular-sse-inst psubw #x66 #xf9)
+ (define-regular-sse-inst psubd #x66 #xfa)
+ (define-regular-sse-inst psubq #x66 #xfb)
+ (define-regular-sse-inst psubsb #x66 #xe8)
+ (define-regular-sse-inst psubsw #x66 #xe9)
+ (define-regular-sse-inst psubusb #x66 #xd8)
+ (define-regular-sse-inst psubusw #x66 #xd9)
+ ;; conversion
+ (define-regular-sse-inst cvtdq2pd #xf3 #xe6)
+ (define-regular-sse-inst cvtdq2ps nil #x5b)
+ (define-regular-sse-inst cvtpd2dq #xf2 #xe6)
+ (define-regular-sse-inst cvtpd2ps #x66 #x5a)
+ (define-regular-sse-inst cvtps2dq #x66 #x5b)
+ (define-regular-sse-inst cvtps2pd nil #x5a)
+ (define-regular-sse-inst cvtsd2ss #xf2 #x5a)
+ (define-regular-sse-inst cvtss2sd #xf3 #x5a)
+ (define-regular-sse-inst cvttpd2dq #x66 #xe6)
+ (define-regular-sse-inst cvttps2dq #xf3 #x5b)
+ ;; integer
+ (define-regular-sse-inst packsswb #x66 #x63)
+ (define-regular-sse-inst packssdw #x66 #x6b)
+ (define-regular-sse-inst packuswb #x66 #x67)
+ (define-regular-sse-inst punpckhbw #x66 #x68)
+ (define-regular-sse-inst punpckhwd #x66 #x69)
+ (define-regular-sse-inst punpckhdq #x66 #x6a)
+ (define-regular-sse-inst punpckhqdq #x66 #x6d)
+ (define-regular-sse-inst punpcklbw #x66 #x60)
+ (define-regular-sse-inst punpcklwd #x66 #x61)
+ (define-regular-sse-inst punpckldq #x66 #x62)
+ (define-regular-sse-inst punpcklqdq #x66 #x6c))
+
+(macrolet ((define-xmm-shuffle-sse-inst (name prefix opcode n-bits radix)
+ (let ((shuffle-pattern
+ (intern (format nil "SSE-SHUFFLE-PATTERN-~D-~D"
+ n-bits radix))))
+ `(define-instruction ,name (segment dst src pattern)
+ (:printer-list
+ ',(sse-inst-printer-list
+ 'xmm-xmm/mem prefix opcode
+ :more-fields `((imm nil :type ,shuffle-pattern))
+ :printer '(:name :tab reg ", " reg/mem ", " imm)))
+
+ (:emitter
+ (aver (typep pattern '(unsigned-byte ,n-bits)))
+ (emit-regular-sse-inst segment dst src ,prefix ,opcode
+ :remaining-bytes 1)
+ (emit-byte segment pattern))))))
+ (define-xmm-shuffle-sse-inst pshufd #x66 #x70 8 4)
+ (define-xmm-shuffle-sse-inst pshufhw #xf3 #x70 8 4)
+ (define-xmm-shuffle-sse-inst pshuflw #xf2 #x70 8 4)
+ (define-xmm-shuffle-sse-inst shufpd #x66 #xc6 2 2)
+ (define-xmm-shuffle-sse-inst shufps nil #xc6 8 4))
+
+;; MASKMOVDQU (dst is DS:RDI)
+(define-instruction maskmovdqu (segment src mask)
+ (:printer-list
+ (sse-inst-printer-list 'xmm-xmm/mem #x66 #xf7))
+ (:emitter
+ (aver (xmm-register-p src))
+ (aver (xmm-register-p mask))
+ (emit-regular-sse-inst segment src mask #x66 #xf7)))
+
+(macrolet ((define-comparison-sse-inst (name prefix opcode
+ name-prefix name-suffix)
+ `(define-instruction ,name (segment op x y)
+ (:printer-list
+ ',(sse-inst-printer-list
+ 'xmm-xmm/mem prefix opcode
+ :more-fields '((imm nil :type sse-condition-code))
+ :printer `(,name-prefix imm ,name-suffix
+ :tab reg ", " reg/mem)))
+ (:emitter
+ (let ((code (position op *sse-conditions*)))
+ (aver code)
+ (emit-regular-sse-inst segment x y ,prefix ,opcode
+ :remaining-bytes 1)
+ (emit-byte segment code))))))
+ (define-comparison-sse-inst cmppd #x66 #xc2 "CMP" "PD")
+ (define-comparison-sse-inst cmpps nil #xc2 "CMP" "PS")
+ (define-comparison-sse-inst cmpsd #xf2 #xc2 "CMP" "SD")
+ (define-comparison-sse-inst cmpss #xf3 #xc2 "CMP" "SS"))
+
+;;; MOVSD, MOVSS
+(macrolet ((define-movsd/ss-sse-inst (name prefix)
+ `(define-instruction ,name (segment dst src)
+ (:printer-list
+ ',(sse-inst-printer-list 'xmm-xmm/mem-dir
+ prefix #b0001000))
+ (:emitter
+ (cond ((xmm-register-p dst)
+ (emit-sse-inst segment dst src ,prefix #x10
+ :operand-size :do-not-set))
+ (t
+ (aver (xmm-register-p src))
+ (emit-sse-inst segment src dst ,prefix #x11
+ :operand-size :do-not-set)))))))
+ (define-movsd/ss-sse-inst movsd #xf2)
+ (define-movsd/ss-sse-inst movss #xf3))
+
+;;; Packed MOVs
+(macrolet ((define-mov-sse-inst (name prefix opcode-from opcode-to
+ &key force-to-mem reg-reg-name)
+ `(progn
+ ,(when reg-reg-name
+ `(define-instruction ,reg-reg-name (segment dst src)
+ (:emitter
+ (aver (xmm-register-p dst))
+ (aver (xmm-register-p src))
+ (emit-regular-sse-inst segment dst src
+ ,prefix ,opcode-from))))
+ (define-instruction ,name (segment dst src)
+ (:printer-list
+ '(,@(when opcode-from
+ (sse-inst-printer-list
+ 'xmm-xmm/mem prefix opcode-from))
+ ,@(sse-inst-printer-list
+ 'xmm-xmm/mem prefix opcode-to
+ :printer '(:name :tab reg/mem ", " reg))))
+ (:emitter
+ (cond ,@(when opcode-from
+ `(((xmm-register-p dst)
+ ,(when force-to-mem
+ `(aver (not (or (register-p src)
+ (xmm-register-p src)))))
+ (emit-regular-sse-inst
+ segment dst src ,prefix ,opcode-from))))
+ (t
+ (aver (xmm-register-p src))
+ ,(when force-to-mem
+ `(aver (not (or (register-p dst)
+ (xmm-register-p dst)))))
+ (emit-regular-sse-inst segment src dst
+ ,prefix ,opcode-to))))))))
+ ;; direction bit?
+ (define-mov-sse-inst movapd #x66 #x28 #x29)
+ (define-mov-sse-inst movaps nil #x28 #x29)
+ (define-mov-sse-inst movdqa #x66 #x6f #x7f)
+ (define-mov-sse-inst movdqu #xf3 #x6f #x7f)
+
+ ;; streaming
+ (define-mov-sse-inst movntdq #x66 nil #xe7 :force-to-mem t)
+ (define-mov-sse-inst movntpd #x66 nil #x2b :force-to-mem t)
+ (define-mov-sse-inst movntps nil nil #x2b :force-to-mem t)
+
+ ;; use movhps for movlhps and movlps for movhlps
+ (define-mov-sse-inst movhpd #x66 #x16 #x17 :force-to-mem t)
+ (define-mov-sse-inst movhps nil #x16 #x17 :reg-reg-name movlhps)
+ (define-mov-sse-inst movlpd #x66 #x12 #x13 :force-to-mem t)
+ (define-mov-sse-inst movlps nil #x12 #x13 :reg-reg-name movhlps)
+ (define-mov-sse-inst movupd #x66 #x10 #x11)
+ (define-mov-sse-inst movups nil #x10 #x11))
+
+;;; MOVQ
+(define-instruction movq (segment dst src)
+ (:printer-list
+ (append
+ (sse-inst-printer-list 'xmm-xmm/mem #xf3 #x7e)
+ (sse-inst-printer-list 'xmm-xmm/mem #x66 #xd6
+ :printer '(:name :tab reg/mem ", " reg))))
+ (:emitter
+ (cond ((xmm-register-p dst)
+ (emit-sse-inst segment dst src #xf3 #x7e
+ :operand-size :do-not-set))
+ (t
+ (aver (xmm-register-p src))
+ (emit-sse-inst segment src dst #x66 #xd6
+ :operand-size :do-not-set)))))
+
+;;; Instructions having an XMM register as the destination operand
+;;; and a general-purpose register or a memory location as the source
+;;; operand. The operand size is calculated from the source operand.
+
+;;; MOVD - Move a 32- or 64-bit value from a general-purpose register or
+;;; a memory location to the low order 32 or 64 bits of an XMM register
+;;; with zero extension or vice versa.
+;;; We do not support the MMX version of this instruction.
+(define-instruction movd (segment dst src)
+ (:printer-list
+ (append
+ (sse-inst-printer-list 'xmm-reg/mem #x66 #x6e)
+ (sse-inst-printer-list 'xmm-reg/mem #x66 #x7e
+ :printer '(:name :tab reg/mem ", " reg))))
+ (:emitter
+ (cond ((xmm-register-p dst)
+ (emit-sse-inst segment dst src #x66 #x6e))
+ (t
+ (aver (xmm-register-p src))
+ (emit-sse-inst segment src dst #x66 #x7e)))))
+
+(define-instruction pinsrw (segment dst src imm)
+ (:printer-list
+ (sse-inst-printer-list
+ 'xmm-reg/mem #x66 #xc4
+ :more-fields '((imm nil :type imm-byte))
+ :printer '(:name :tab reg ", " reg/mem ", " imm)))
+ (:emitter
+ (aver (xmm-register-p dst))
+ (let ((src-size (operand-size src)))
+ (aver (or (not (register-p src))
+ (eq src-size :qword) (eq src-size :dword)))
+ (emit-sse-inst segment dst src #x66 #xc4
+ :operand-size (if (register-p src) src-size :do-not-set)
+ :remaining-bytes 1))
+ (emit-byte segment imm)))
+
+(define-instruction pextrw (segment dst src imm)
+ (:printer-list
+ (sse-inst-printer-list
+ 'reg-xmm/mem #x66 #xc5
+ :more-fields '((imm nil :type imm-byte))
+ :printer '(:name :tab reg ", " reg/mem ", " imm)))