(defparameter *dword-reg-names*
#(eax ecx edx ebx esp ebp esi edi))
+;;; Disassembling x86 code needs to take into account little things
+;;; like instructions that have a byte/word length bit in their
+;;; encoding, prefixes to change the default word length for a single
+;;; instruction, and so on. Unfortunately, there is no easy way with
+;;; this disassembler framework to handle prefixes that will work
+;;; correctly in all cases, so we copy the x86-64 version which at
+;;; least can handle the code output by the compiler.
+;;;
+;;; Width information for an instruction and whether a segment
+;;; override prefix was seen is stored as an inst-prop on the dstate.
+;;; The inst-props are cleared automatically after each non-prefix
+;;; instruction, must be set by prefilters, and contain a single bit of
+;;; data each (presence/absence).
+
+;;; Return the operand size based on the prefixes and width bit from
+;;; the dstate.
+(defun inst-operand-size (dstate)
+ (declare (type sb!disassem:disassem-state dstate))
+ (cond ((sb!disassem:dstate-get-inst-prop dstate 'operand-size-8)
+ :byte)
+ ((sb!disassem:dstate-get-inst-prop dstate 'operand-size-16)
+ :word)
+ (t
+ +default-operand-size+)))
+
+;;; Return the operand size for a "word-sized" operand based on the
+;;; prefixes from the dstate.
+(defun inst-word-operand-size (dstate)
+ (declare (type sb!disassem:disassem-state dstate))
+ (if (sb!disassem:dstate-get-inst-prop dstate 'operand-size-16)
+ :word
+ :dword))
+
(defun print-reg-with-width (value width stream dstate)
(declare (ignore dstate))
(princ (aref (ecase width
(type stream stream)
(type sb!disassem:disassem-state dstate))
(print-reg-with-width value
- (sb!disassem:dstate-get-prop dstate 'width)
+ (inst-operand-size dstate)
stream
dstate))
(type stream stream)
(type sb!disassem:disassem-state dstate))
(print-reg-with-width value
- (or (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+)
+ (inst-word-operand-size dstate)
stream
dstate))
(declare (ignore dstate))
(sb!disassem:princ16 value stream))
+(defun maybe-print-segment-override (stream dstate)
+ (cond ((sb!disassem:dstate-get-inst-prop dstate 'fs-segment-prefix)
+ (princ "FS:" stream))
+ ((sb!disassem:dstate-get-inst-prop dstate 'gs-segment-prefix)
+ (princ "GS:" stream))))
+
;;; Returns either an integer, meaning a register, or a list of
;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component
;;; may be missing or nil to indicate that it's not used or has the
;;; This is a sort of bogus prefilter that just stores the info globally for
;;; other people to use; it probably never gets printed.
(defun prefilter-width (value dstate)
- (setf (sb!disassem:dstate-get-prop dstate 'width)
- (if (zerop value)
- :byte
- (let ((word-width
- ;; set by a prefix instruction
- (or (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+)))
- (when (not (eql word-width +default-operand-size+))
- ;; Reset it.
- (setf (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+))
- word-width))))
+ (declare (type bit value)
+ (type sb!disassem:disassem-state dstate))
+ (when (zerop value)
+ (sb!disassem:dstate-put-inst-prop dstate 'operand-size-8))
+ value)
+
+;;; This prefilter is used solely for its side effect, namely to put
+;;; the property OPERAND-SIZE-16 into the DSTATE.
+(defun prefilter-x66 (value dstate)
+ (declare (type (eql #x66) value)
+ (ignore value)
+ (type sb!disassem:disassem-state dstate))
+ (sb!disassem:dstate-put-inst-prop dstate 'operand-size-16))
+
+;;; This prefilter is used solely for its side effect, namely to put
+;;; one of the properties [FG]S-SEGMENT-PREFIX into the DSTATE.
+;;; Unlike PREFILTER-X66, this prefilter only catches the low bit of
+;;; the prefix byte.
+(defun prefilter-seg (value dstate)
+ (declare (type bit value)
+ (type sb!disassem:disassem-state dstate))
+ (sb!disassem:dstate-put-inst-prop
+ dstate (elt '(fs-segment-prefix gs-segment-prefix) value)))
(defun read-address (value dstate)
(declare (ignore value)) ; always nil anyway
:prefilter (lambda (value dstate)
(declare (ignore value)) ; always nil anyway
(sb!disassem:read-suffix
- (width-bits (sb!disassem:dstate-get-prop dstate 'width))
+ (width-bits (inst-operand-size dstate))
dstate)))
(sb!disassem:define-arg-type signed-imm-data
:prefilter (lambda (value dstate)
(declare (ignore value)) ; always nil anyway
- (let ((width (sb!disassem:dstate-get-prop dstate 'width)))
+ (let ((width (inst-operand-size dstate)))
(sb!disassem:read-signed-suffix (width-bits width) dstate))))
(sb!disassem:define-arg-type signed-imm-byte
(sb!disassem:define-arg-type imm-word
:prefilter (lambda (value dstate)
(declare (ignore value)) ; always nil anyway
- (let ((width
- (or (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+)))
+ (let ((width (inst-word-operand-size dstate)))
(sb!disassem:read-suffix (width-bits width) dstate))))
(sb!disassem:define-arg-type signed-imm-word
:prefilter (lambda (value dstate)
(declare (ignore value)) ; always nil anyway
- (let ((width
- (or (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+)))
+ (let ((width (inst-word-operand-size dstate)))
(sb!disassem:read-signed-suffix (width-bits width) dstate))))
;;; needed for the ret imm16 instruction
(sb!disassem:define-arg-type width
:prefilter #'prefilter-width
:printer (lambda (value stream dstate)
- (if;; (zerop value)
- (or (null value)
- (and (numberp value) (zerop value))) ; zzz jrd
- (princ 'b stream)
- (let ((word-width
- ;; set by a prefix instruction
- (or (sb!disassem:dstate-get-prop dstate 'word-width)
- +default-operand-size+)))
- (princ (schar (symbol-name word-width) 0) stream)))))
+ (declare (ignore value))
+ (princ (schar (symbol-name (inst-operand-size dstate)) 0)
+ stream)))
+
+;;; Used to capture the effect of the #x66 operand size override prefix.
+(sb!disassem:define-arg-type x66
+ :prefilter #'prefilter-x66)
+
+;;; Used to capture the effect of the #x64 and #x65 segment override
+;;; prefixes.
+(sb!disassem:define-arg-type seg
+ :prefilter #'prefilter-seg)
(eval-when (:compile-toplevel :load-toplevel :execute)
(defparameter *conditions*
(accum :type 'accum)
(imm))
+;;; Prefix instructions
+
+(sb!disassem:define-instruction-format (x66 8)
+ (x66 :field (byte 8 0) :type 'x66 :value #x66))
+
+(sb!disassem:define-instruction-format (seg 8)
+ (seg :field (byte 7 1) :value #x32)
+ (fsgs :field (byte 1 0) :type 'seg))
+
(sb!disassem:define-instruction-format (simple 8)
(op :field (byte 7 1))
(width :field (byte 1 0) :type 'width)
(accum :type 'accum)
(imm))
+(sb!disassem:define-instruction-format (two-bytes 16
+ :default-printer '(:name))
+ (op :fields (list (byte 8 0) (byte 8 8))))
+
;;; Same as simple, but with direction bit
(sb!disassem:define-instruction-format (simple-dir 8 :include 'simple)
(op :field (byte 6 2))
;; optional fields
(imm))
+;;; reg-no-width with #x0f prefix
+(sb!disassem:define-instruction-format (ext-reg-no-width 16
+ :default-printer '(:name :tab reg))
+ (prefix :field (byte 8 0) :value #b00001111)
+ (op :field (byte 5 11))
+ (reg :field (byte 3 8) :type 'reg))
+
;;; Same as reg/mem, but with a prefix of #b00001111
(sb!disassem:define-instruction-format (ext-reg/mem 24
:default-printer '(:name :tab reg/mem))
(r/m (cond (index #b100)
((null base) #b101)
(t (reg-tn-encoding base)))))
+ (when (and (fixup-p disp)
+ (label-p (fixup-offset disp)))
+ (aver (null base))
+ (aver (null index))
+ (return-from emit-ea (emit-ea segment disp reg allow-constants)))
(emit-mod-reg-r/m-byte segment mod reg r/m)
(when (= r/m #b100)
(let ((ss (1- (integer-length scale)))
(:dword
(emit-dword segment value))))
\f
+;;;; prefixes
+
+(define-instruction x66 (segment)
+ (:printer x66 () nil :print-name nil)
+ (:emitter
+ (bug "#X66 prefix used as a standalone instruction")))
+
+(defun emit-prefix (segment name)
+ (ecase name
+ ((nil))
+ (:lock
+ #!+sb-thread
+ (emit-byte segment #xf0))
+ (:fs
+ (emit-byte segment #x64))
+ (:gs
+ (emit-byte segment #x65))))
+
+(define-instruction fs (segment)
+ (:printer seg ((fsgs #b0)) nil :print-name nil)
+ (:emitter
+ (bug "FS prefix used as a standalone instruction")))
+
+(define-instruction gs (segment)
+ (:printer seg ((fsgs #b1)) nil :print-name nil)
+ (:emitter
+ (bug "GS prefix used as a standalone instruction")))
+
+(define-instruction lock (segment)
+ (:printer byte ((op #b11110000)) nil)
+ (:emitter
+ (bug "LOCK prefix used as a standalone instruction")))
+
+(define-instruction rep (segment)
+ (:emitter
+ (emit-byte segment #b11110011)))
+
+(define-instruction repe (segment)
+ (:printer byte ((op #b11110011)) nil)
+ (:emitter
+ (emit-byte segment #b11110011)))
+
+(define-instruction repne (segment)
+ (:printer byte ((op #b11110010)) nil)
+ (:emitter
+ (emit-byte segment #b11110010)))
+
;;;; general data transfer
-(define-instruction mov (segment dst src)
+(define-instruction mov (segment dst src &optional prefix)
;; immediate to register
(:printer reg ((op #b1011) (imm nil :type 'imm-data))
'(:name :tab reg ", " imm))
(:printer reg/mem-imm ((op '(#b1100011 #b000))))
(:emitter
+ (emit-prefix segment prefix)
(let ((size (matching-operand-size dst src)))
(maybe-emit-operand-size-prefix segment size)
(cond ((register-p dst)
(emit-ea segment src (reg-tn-encoding dst))))))))
(define-instruction movsx (segment dst src)
- (:printer ext-reg-reg/mem ((op #b1011111) (reg nil :type 'word-reg)))
+ (:printer ext-reg-reg/mem ((op #b1011111)
+ (reg nil :type 'word-reg)
+ (reg/mem nil :type 'sized-reg/mem)))
(:emitter (emit-move-with-extension segment dst src #b10111110)))
(define-instruction movzx (segment dst src)
- (:printer ext-reg-reg/mem ((op #b1011011) (reg nil :type 'word-reg)))
+ (:printer ext-reg-reg/mem ((op #b1011011)
+ (reg nil :type 'word-reg)
+ (reg/mem nil :type 'sized-reg/mem)))
(:emitter (emit-move-with-extension segment dst src #b10110110)))
-(define-instruction push (segment src)
+(define-instruction push (segment src &optional prefix)
;; register
(:printer reg-no-width ((op #b01010)))
;; register/memory
;; ### segment registers?
(:emitter
+ (emit-prefix segment prefix)
(cond ((integerp src)
(cond ((<= -128 src 127)
(emit-byte segment #b01101010)
(emit-byte segment #b10001101)
(emit-ea segment src (reg-tn-encoding dst))))
-(define-instruction cmpxchg (segment dst src)
+(define-instruction cmpxchg (segment dst src &optional prefix)
;; Register/Memory with Register.
(:printer ext-reg-reg/mem ((op #b1011000)) '(:name :tab reg/mem ", " reg))
(:emitter
(aver (register-p src))
+ (emit-prefix segment prefix)
(let ((size (matching-operand-size src dst)))
(maybe-emit-operand-size-prefix segment size)
(emit-byte segment #b00001111)
(emit-byte segment (if (eq size :byte) #b10110000 #b10110001))
(emit-ea segment dst (reg-tn-encoding src)))))
-\f
-
-(define-instruction fs-segment-prefix (segment)
+(define-instruction pause (segment)
+ (:printer two-bytes ((op '(#xf3 #x90))))
(:emitter
- (emit-byte segment #x64)))
-
+ (emit-byte segment #xf3)
+ (emit-byte segment #x90)))
+\f
;;;; flag control instructions
;;; CLC -- Clear Carry Flag.
;;;; arithmetic
(defun emit-random-arith-inst (name segment dst src opcode
- &optional allow-constants)
+ &optional allow-constants)
(let ((size (matching-operand-size dst src)))
(maybe-emit-operand-size-prefix segment size)
(cond
(reg/mem-imm ((op (#b1000000 ,subop))))
(reg/mem-imm ((op (#b1000001 ,subop))
(imm nil :type signed-imm-byte)))
- (reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000))))))
- )
+ (reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000)))))))
-(define-instruction add (segment dst src)
+(define-instruction add (segment dst src &optional prefix)
(:printer-list (arith-inst-printer-list #b000))
- (:emitter (emit-random-arith-inst "ADD" segment dst src #b000)))
+ (:emitter
+ (emit-prefix segment prefix)
+ (emit-random-arith-inst "ADD" segment dst src #b000)))
(define-instruction adc (segment dst src)
(:printer-list (arith-inst-printer-list #b010))
(:emitter (emit-random-arith-inst "ADC" segment dst src #b010)))
-(define-instruction sub (segment dst src)
+(define-instruction sub (segment dst src &optional prefix)
(:printer-list (arith-inst-printer-list #b101))
- (:emitter (emit-random-arith-inst "SUB" segment dst src #b101)))
+ (:emitter
+ (emit-prefix segment prefix)
+ (emit-random-arith-inst "SUB" segment dst src #b101)))
(define-instruction sbb (segment dst src)
(:printer-list (arith-inst-printer-list #b011))
(:emitter (emit-random-arith-inst "SBB" segment dst src #b011)))
-(define-instruction cmp (segment dst src)
+(define-instruction cmp (segment dst src &optional prefix)
(:printer-list (arith-inst-printer-list #b111))
- (:emitter (emit-random-arith-inst "CMP" segment dst src #b111 t)))
+ (:emitter
+ (emit-prefix segment prefix)
+ (emit-random-arith-inst "CMP" segment dst src #b111 t)))
(define-instruction inc (segment dst)
;; Register.
(emit-byte segment #b11010100)
(emit-byte segment #b00001010)))
+(define-instruction bswap (segment dst)
+ (:printer ext-reg-no-width ((op #b11001)))
+ (:emitter
+ (emit-byte segment #x0f)
+ (emit-byte-with-reg segment #b11001 (reg-tn-encoding dst))))
+
;;; CBW -- Convert Byte to Word. AX <- sign_xtnd(AL)
(define-instruction cbw (segment)
+ (:printer two-bytes ((op '(#b01100110 #b10011000))))
(:emitter
(maybe-emit-operand-size-prefix segment :word)
(emit-byte segment #b10011000)))
;;; CWDE -- Convert Word To Double Word Extened. EAX <- sign_xtnd(AX)
(define-instruction cwde (segment)
+ (:printer byte ((op #b10011000)))
(:emitter
(maybe-emit-operand-size-prefix segment :dword)
(emit-byte segment #b10011000)))
;;; CWD -- Convert Word to Double Word. DX:AX <- sign_xtnd(AX)
(define-instruction cwd (segment)
+ (:printer two-bytes ((op '(#b01100110 #b10011001))))
(:emitter
(maybe-emit-operand-size-prefix segment :word)
(emit-byte segment #b10011001)))
(maybe-emit-operand-size-prefix segment :dword)
(emit-byte segment #b10011001)))
-(define-instruction xadd (segment dst src)
+(define-instruction xadd (segment dst src &optional prefix)
;; Register/Memory with Register.
(:printer ext-reg-reg/mem ((op #b1100000)) '(:name :tab reg/mem ", " reg))
(:emitter
(aver (register-p src))
+ (emit-prefix segment prefix)
(let ((size (matching-operand-size src dst)))
(maybe-emit-operand-size-prefix segment size)
(emit-byte segment #b00001111)
(eval-when (:compile-toplevel :execute)
(defun double-shift-inst-printer-list (op)
- `(#+nil
- (ext-reg-reg/mem-imm ((op ,(logior op #b10))
- (imm nil :type signed-imm-byte)))
- (ext-reg-reg/mem ((op ,(logior op #b10)))
+ `((ext-reg-reg/mem ((op ,(logior op #b10)) (width 0)
+ (imm nil :type signed-imm-byte)))
+ (ext-reg-reg/mem ((op ,(logior op #b10)) (width 1))
(:name :tab reg/mem ", " reg ", " 'cl)))))
(define-instruction shld (segment dst src amt)
y))
((sc-is x control-stack)
(inst test (make-ea :byte :base ebp-tn
- :disp (- (* (1+ offset) n-word-bytes)))
+ :disp (frame-byte-offset offset))
y))
(t
(inst test x y)))))
(t
(inst test x y))))
-(define-instruction or (segment dst src)
+(define-instruction or (segment dst src &optional prefix)
(:printer-list
(arith-inst-printer-list #b001))
(:emitter
+ (emit-prefix segment prefix)
(emit-random-arith-inst "OR" segment dst src #b001)))
-(define-instruction xor (segment dst src)
+(define-instruction xor (segment dst src &optional prefix)
(:printer-list
(arith-inst-printer-list #b110))
(:emitter
+ (emit-prefix segment prefix)
(emit-random-arith-inst "XOR" segment dst src #b110)))
(define-instruction not (segment dst)
(:emitter
(emit-byte segment #b11010111)))
-(define-instruction rep (segment)
- (:emitter
- (emit-byte segment #b11110010)))
-
-(define-instruction repe (segment)
- (:printer byte ((op #b11110011)))
- (:emitter
- (emit-byte segment #b11110011)))
-
-(define-instruction repne (segment)
- (:printer byte ((op #b11110010)))
- (:emitter
- (emit-byte segment #b11110010)))
-
\f
;;;; bit manipulation
(:printer byte ((op #b11000010) (imm nil :type 'imm-word-16))
'(:name :tab imm))
(:emitter
- (cond (stack-delta
+ (cond ((and stack-delta (not (zerop stack-delta)))
(emit-byte segment #b11000010)
(emit-word segment stack-delta))
(t
;; Lisp (with (DESCRIBE 'BYTE-IMM-CODE)) than to definitively deduce
;; from first principles whether it's defined in some way that genesis
;; can't grok.
- (case #!-darwin (byte-imm-code chunk dstate)
- #!+darwin (word-imm-code chunk dstate)
+ (case #!-ud2-breakpoints (byte-imm-code chunk dstate)
+ #!+ud2-breakpoints (word-imm-code chunk dstate)
(#.error-trap
(nt "error trap")
(sb!disassem:handle-break-args #'snarf-error-junk stream dstate))
(define-instruction break (segment code)
(:declare (type (unsigned-byte 8) code))
- #!-darwin (:printer byte-imm ((op #b11001100)) '(:name :tab code)
- :control #'break-control)
- #!+darwin (:printer word-imm ((op #b0000101100001111)) '(:name :tab code)
- :control #'break-control)
+ #!-ud2-breakpoints (:printer byte-imm ((op #b11001100)) '(:name :tab code)
+ :control #'break-control)
+ #!+ud2-breakpoints (:printer word-imm ((op #b0000101100001111)) '(:name :tab code)
+ :control #'break-control)
(:emitter
- #!-darwin (emit-byte segment #b11001100)
+ #!-ud2-breakpoints (emit-byte segment #b11001100)
;; On darwin, trap handling via SIGTRAP is unreliable, therefore we
;; throw a sigill with 0x0b0f instead and check for this in the
;; SIGILL handler and pass it on to the sigtrap handler if
;; appropriate
- #!+darwin (emit-word segment #b0000101100001111)
+ #!+ud2-breakpoints (emit-word segment #b0000101100001111)
(emit-byte segment code)))
(define-instruction int (segment number)
(:printer byte ((op #b10011011)))
(:emitter
(emit-byte segment #b10011011)))
-
-(define-instruction lock (segment)
- (:printer byte ((op #b11110000)))
- (:emitter
- (emit-byte segment #b11110000)))
\f
;;;; miscellaneous hackery
(define-instruction fxch (segment source)
(:printer floating-point-fp ((op '(#b001 #b001))))
(:emitter
- (unless (and (tn-p source)
- (eq (sb-name (sc-sb (tn-sc source))) 'float-registers))
- (cl:break))
+ (aver (and (tn-p source)
+ (eq (sb-name (sc-sb (tn-sc source))) 'float-registers)))
(emit-byte segment #b11011001)
(emit-fp-op segment source #b001)))
(:emitter
(emit-byte segment #b11011001)
(emit-byte segment #b11101101)))
+
+;;;; Miscellany
+
+(define-instruction cpuid (segment)
+ (:printer two-bytes ((op '(#b00001111 #b10100010))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b10100010)))
+
+(define-instruction rdtsc (segment)
+ (:printer two-bytes ((op '(#b00001111 #b00110001))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b00110001)))
+
+;;;; Late VM definitions
+(defun canonicalize-inline-constant (constant)
+ (let ((first (car constant)))
+ (typecase first
+ (single-float (setf constant (list :single-float first)))
+ (double-float (setf constant (list :double-float first)))))
+ (destructuring-bind (type value) constant
+ (ecase type
+ ((:byte :word :dword)
+ (aver (integerp value))
+ (cons type value))
+ ((:base-char)
+ (aver (base-char-p value))
+ (cons :byte (char-code value)))
+ ((:character)
+ (aver (characterp value))
+ (cons :dword (char-code value)))
+ ((:single-float)
+ (aver (typep value 'single-float))
+ (cons :dword (ldb (byte 32 0) (single-float-bits value))))
+ ((:double-float-bits)
+ (aver (integerp value))
+ (cons :double-float (ldb (byte 64 0) value)))
+ ((:double-float)
+ (aver (typep value 'double-float))
+ (cons :double-float
+ (ldb (byte 64 0) (logior (ash (double-float-high-bits value) 32)
+ (double-float-low-bits value))))))))
+
+(defun inline-constant-value (constant)
+ (let ((label (gen-label))
+ (size (ecase (car constant)
+ ((:byte :word :dword) (car constant))
+ (:double-float :dword))))
+ (values label (make-ea size
+ :disp (make-fixup nil :code-object label)))))
+
+(defun emit-constant-segment-header (segment constants optimize)
+ (declare (ignore segment constants))
+ (loop repeat (if optimize 64 16) do (inst byte #x90)))
+
+(defun size-nbyte (size)
+ (ecase size
+ (:byte 1)
+ (:word 2)
+ (:dword 4)
+ (:double-float 8)))
+
+(defun sort-inline-constants (constants)
+ (stable-sort constants #'> :key (lambda (constant)
+ (size-nbyte (caar constant)))))
+
+(defun emit-inline-constant (constant label)
+ (let ((size (size-nbyte (car constant))))
+ (emit-alignment (integer-length (1- size)))
+ (emit-label label)
+ (let ((val (cdr constant)))
+ (loop repeat size
+ do (inst byte (ldb (byte 8 0) val))
+ (setf val (ash val -8))))))