From: Daniel Barlow Date: Sat, 26 Jun 2004 17:48:22 +0000 (+0000) Subject: Youn are not expected to understand this. I don't X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=4ebdc81b1a9c6dbed6e98b112afc8dd32b17a2dd;p=sbcl.git Youn are not expected to understand this. I don't --- diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp new file mode 100644 index 0000000..bc7de22 --- /dev/null +++ b/src/compiler/x86-64/alloc.lisp @@ -0,0 +1,219 @@ +;;;; allocation VOPs for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; LIST and LIST* + +(define-vop (list-or-list*) + (:args (things :more t)) + (:temporary (:sc unsigned-reg) ptr temp) + (:temporary (:sc unsigned-reg :to (:result 0) :target result) res) + (:info num) + (:results (result :scs (descriptor-reg))) + (:variant-vars star) + (:policy :safe) + (:node-var node) + (:generator 0 + (cond ((zerop num) + ;; (move result nil-value) + (inst mov result nil-value)) + ((and star (= num 1)) + (move result (tn-ref-tn things))) + (t + (macrolet + ((store-car (tn list &optional (slot cons-car-slot)) + `(let ((reg + (sc-case ,tn + ((any-reg descriptor-reg) ,tn) + ((control-stack) + (move temp ,tn) + temp)))) + (storew reg ,list ,slot list-pointer-lowtag)))) + (let ((cons-cells (if star (1- num) num))) + (pseudo-atomic + (allocation res (* (pad-data-block cons-size) cons-cells) node) + (inst lea res + (make-ea :byte :base res :disp list-pointer-lowtag)) + (move ptr res) + (dotimes (i (1- cons-cells)) + (store-car (tn-ref-tn things) ptr) + (setf things (tn-ref-across things)) + (inst add ptr (pad-data-block cons-size)) + (storew ptr ptr (- cons-cdr-slot cons-size) + list-pointer-lowtag)) + (store-car (tn-ref-tn things) ptr) + (cond (star + (setf things (tn-ref-across things)) + (store-car (tn-ref-tn things) ptr cons-cdr-slot)) + (t + (storew nil-value ptr cons-cdr-slot + list-pointer-lowtag))) + (aver (null (tn-ref-across things))))) + (move result res)))))) + +(define-vop (list list-or-list*) + (:variant nil)) + +(define-vop (list* list-or-list*) + (:variant t)) + +;;;; special-purpose inline allocators + +(define-vop (allocate-code-object) + (:args (boxed-arg :scs (any-reg) :target boxed) + (unboxed-arg :scs (any-reg) :target unboxed)) + (:results (result :scs (descriptor-reg) :from :eval)) + (:temporary (:sc unsigned-reg :from (:argument 0)) boxed) + (:temporary (:sc unsigned-reg :from (:argument 1)) unboxed) + (:node-var node) + (:generator 100 + (move boxed boxed-arg) + (inst add boxed (fixnumize (1+ code-trace-table-offset-slot))) + (inst and boxed (lognot lowtag-mask)) + (move unboxed unboxed-arg) + (inst shr unboxed word-shift) + (inst add unboxed lowtag-mask) + (inst and unboxed (lognot lowtag-mask)) + (inst mov result boxed) + (inst add result unboxed) + (pseudo-atomic + (allocation result result node) + (inst lea result (make-ea :byte :base result :disp other-pointer-lowtag)) + (inst shl boxed (- n-widetag-bits word-shift)) + (inst or boxed code-header-widetag) + (storew boxed result 0 other-pointer-lowtag) + (storew unboxed result code-code-size-slot other-pointer-lowtag) + (storew nil-value result code-entry-points-slot other-pointer-lowtag)) + (storew nil-value result code-debug-info-slot other-pointer-lowtag))) + +(define-vop (make-fdefn) + (:policy :fast-safe) + (:translate make-fdefn) + (:args (name :scs (descriptor-reg) :to :eval)) + (:results (result :scs (descriptor-reg) :from :argument)) + (:node-var node) + (:generator 37 + (with-fixed-allocation (result fdefn-widetag fdefn-size node) + (storew name result fdefn-name-slot other-pointer-lowtag) + (storew nil-value result fdefn-fun-slot other-pointer-lowtag) + (storew (make-fixup (extern-alien-name "undefined_tramp") :foreign) + result fdefn-raw-addr-slot other-pointer-lowtag)))) + +(define-vop (make-closure) + (:args (function :to :save :scs (descriptor-reg))) + (:info length) + (:temporary (:sc any-reg) temp) + (:results (result :scs (descriptor-reg))) + (:node-var node) + (:generator 10 + (pseudo-atomic + (let ((size (+ length closure-info-offset))) + (allocation result (pad-data-block size) node) + (inst lea result + (make-ea :byte :base result :disp fun-pointer-lowtag)) + (storew (logior (ash (1- size) n-widetag-bits) closure-header-widetag) + result 0 fun-pointer-lowtag)) + (loadw temp function closure-fun-slot fun-pointer-lowtag) + (storew temp result closure-fun-slot fun-pointer-lowtag)))) + +;;; The compiler likes to be able to directly make value cells. +(define-vop (make-value-cell) + (:args (value :scs (descriptor-reg any-reg) :to :result)) + (:results (result :scs (descriptor-reg) :from :eval)) + (:node-var node) + (:generator 10 + (with-fixed-allocation + (result value-cell-header-widetag value-cell-size node)) + (storew value result value-cell-value-slot other-pointer-lowtag))) + +;;;; automatic allocators for primitive objects + +(define-vop (make-unbound-marker) + (:args) + (:results (result :scs (any-reg))) + (:generator 1 + (inst mov result unbound-marker-widetag))) + +(define-vop (fixed-alloc) + (:args) + (:info name words type lowtag) + (:ignore name) + (:results (result :scs (descriptor-reg))) + (:node-var node) + (:generator 50 + (pseudo-atomic + (allocation result (pad-data-block words) node) + (inst lea result (make-ea :byte :base result :disp lowtag)) + (when type + (storew (logior (ash (1- words) n-widetag-bits) type) + result + 0 + lowtag))))) + +(define-vop (var-alloc) + (:args (extra :scs (any-reg))) + (:arg-types positive-fixnum) + (:info name words type lowtag) + (:ignore name) + (:results (result :scs (descriptor-reg) :from (:eval 1))) + (:temporary (:sc any-reg :from :eval :to (:eval 1)) bytes) + (:temporary (:sc any-reg :from :eval :to :result) header) + (:node-var node) + (:generator 50 + (inst lea bytes + (make-ea :qword :base extra :disp (* (1+ words) n-word-bytes))) + (inst mov header bytes) + (inst shl header (- n-widetag-bits 2)) ; w+1 to length field + (inst lea header ; (w-1 << 8) | type + (make-ea :qword :base header :disp (+ (ash -2 n-widetag-bits) type))) + (inst and bytes (lognot lowtag-mask)) + (pseudo-atomic + (allocation result bytes node) + (inst lea result (make-ea :byte :base result :disp lowtag)) + (storew header result 0 lowtag)))) + +(define-vop (make-symbol) + (:policy :fast-safe) + (:translate make-symbol) + (:args (name :scs (descriptor-reg) :to :eval)) + (:temporary (:sc unsigned-reg :from :eval) temp) + (:results (result :scs (descriptor-reg) :from :argument)) + (:node-var node) + (:generator 37 + (with-fixed-allocation (result symbol-header-widetag symbol-size node) + (storew name result symbol-name-slot other-pointer-lowtag) + (storew unbound-marker-widetag + result + symbol-value-slot + other-pointer-lowtag) + ;; Set up a random hash value for the symbol. Perhaps the object + ;; address could be used for even faster and smaller code! + ;; FIXME: We don't mind the symbol hash not being repeatable, so + ;; we might as well add in the object address here, too. (Adding entropy + ;; is good, even if ANSI doesn't understand that.) + (inst imul temp + (make-fixup (extern-alien-name "fast_random_state") :foreign) + 1103515245) + (inst add temp 12345) + (inst mov (make-fixup (extern-alien-name "fast_random_state") :foreign) + temp) + ;; We want a positive fixnum for the hash value, so discard the LS bits. + ;; + ;; FIXME: OK, who wants to tell me (CSR) why these two + ;; instructions aren't replaced by (INST AND TEMP #x8FFFFFFC)? + ;; Are the following two instructions actually faster? Does the + ;; difference in behaviour really matter? + (inst shr temp 1) + (inst and temp #xfffffffc) + (storew temp result symbol-hash-slot other-pointer-lowtag) + (storew nil-value result symbol-plist-slot other-pointer-lowtag) + (storew nil-value result symbol-package-slot other-pointer-lowtag)))) diff --git a/src/compiler/x86-64/arith.lisp b/src/compiler/x86-64/arith.lisp new file mode 100644 index 0000000..eaef9b4 --- /dev/null +++ b/src/compiler/x86-64/arith.lisp @@ -0,0 +1,1705 @@ +;;;; the VM definition of arithmetic VOPs for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; unary operations + +(define-vop (fast-safe-arith-op) + (:policy :fast-safe) + (:effects) + (:affected)) + +(define-vop (fixnum-unop fast-safe-arith-op) + (:args (x :scs (any-reg) :target res)) + (:results (res :scs (any-reg))) + (:note "inline fixnum arithmetic") + (:arg-types tagged-num) + (:result-types tagged-num)) + +(define-vop (signed-unop fast-safe-arith-op) + (:args (x :scs (signed-reg) :target res)) + (:results (res :scs (signed-reg))) + (:note "inline (signed-byte 32) arithmetic") + (:arg-types signed-num) + (:result-types signed-num)) + +(define-vop (fast-negate/fixnum fixnum-unop) + (:translate %negate) + (:generator 1 + (move res x) + (inst neg res))) + +(define-vop (fast-negate/signed signed-unop) + (:translate %negate) + (:generator 2 + (move res x) + (inst neg res))) + +(define-vop (fast-lognot/fixnum fixnum-unop) + (:translate lognot) + (:generator 2 + (move res x) + (inst xor res (fixnumize -1)))) + +(define-vop (fast-lognot/signed signed-unop) + (:translate lognot) + (:generator 1 + (move res x) + (inst not res))) + +;;;; binary fixnum operations + +;;; Assume that any constant operand is the second arg... + +(define-vop (fast-fixnum-binop fast-safe-arith-op) + (:args (x :target r :scs (any-reg) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg) + (sc-is r control-stack) + (location= x r)))) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:results (r :scs (any-reg) :from (:argument 0) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg) + (sc-is r control-stack) + (location= x r))))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic")) + +(define-vop (fast-unsigned-binop fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y unsigned-reg) + (sc-is r unsigned-stack) + (location= x r)))) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:results (r :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y unsigned-reg) + (sc-is r unsigned-stack) + (location= x r))))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic")) + +(define-vop (fast-signed-binop fast-safe-arith-op) + (:args (x :target r :scs (signed-reg) + :load-if (not (and (sc-is x signed-stack) + (sc-is y signed-reg) + (sc-is r signed-stack) + (location= x r)))) + (y :scs (signed-reg signed-stack))) + (:arg-types signed-num signed-num) + (:results (r :scs (signed-reg) :from (:argument 0) + :load-if (not (and (sc-is x signed-stack) + (sc-is y signed-reg) + (sc-is r signed-stack) + (location= x r))))) + (:result-types signed-num) + (:note "inline (signed-byte 32) arithmetic")) + +(define-vop (fast-fixnum-binop-c fast-safe-arith-op) + (:args (x :target r :scs (any-reg control-stack))) + (:info y) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:results (r :scs (any-reg) + :load-if (not (location= x r)))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic")) + +(define-vop (fast-unsigned-binop-c fast-safe-arith-op) + (:args (x :target r :scs (unsigned-reg unsigned-stack))) + (:info y) + (:arg-types unsigned-num (:constant (unsigned-byte 32))) + (:results (r :scs (unsigned-reg) + :load-if (not (location= x r)))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic")) + +;; 32 not 64 because it's hard work loading 64 bit constants +(define-vop (fast-signed-binop-c fast-safe-arith-op) + (:args (x :target r :scs (signed-reg signed-stack))) + (:info y) + (:arg-types signed-num (:constant (signed-byte 32))) + (:results (r :scs (signed-reg) + :load-if (not (location= x r)))) + (:result-types signed-num) + (:note "inline (signed-byte 64) arithmetic")) + +(macrolet ((define-binop (translate untagged-penalty op) + `(progn + (define-vop (,(symbolicate "FAST-" translate "/FIXNUM=>FIXNUM") + fast-fixnum-binop) + (:translate ,translate) + (:generator 2 + (move r x) + (inst ,op r y))) + (define-vop (,(symbolicate 'fast- translate '-c/fixnum=>fixnum) + fast-fixnum-binop-c) + (:translate ,translate) + (:generator 1 + (move r x) + (inst ,op r (fixnumize y)))) + (define-vop (,(symbolicate "FAST-" translate "/SIGNED=>SIGNED") + fast-signed-binop) + (:translate ,translate) + (:generator ,(1+ untagged-penalty) + (move r x) + (inst ,op r y))) + (define-vop (,(symbolicate 'fast- translate '-c/signed=>signed) + fast-signed-binop-c) + (:translate ,translate) + (:generator ,untagged-penalty + (move r x) + (inst ,op r y))) + (define-vop (,(symbolicate "FAST-" + translate + "/UNSIGNED=>UNSIGNED") + fast-unsigned-binop) + (:translate ,translate) + (:generator ,(1+ untagged-penalty) + (move r x) + (inst ,op r y))) + (define-vop (,(symbolicate 'fast- + translate + '-c/unsigned=>unsigned) + fast-unsigned-binop-c) + (:translate ,translate) + (:generator ,untagged-penalty + (move r x) + (inst ,op r y)))))) + + ;;(define-binop + 4 add) + (define-binop - 4 sub) + (define-binop logand 2 and) + (define-binop logior 2 or) + (define-binop logxor 2 xor)) + +;;; Special handling of add on the x86; can use lea to avoid a +;;; register load, otherwise it uses add. +(define-vop (fast-+/fixnum=>fixnum fast-safe-arith-op) + (:translate +) + (:args (x :scs (any-reg) :target r + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg) + (sc-is r control-stack) + (location= x r)))) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:results (r :scs (any-reg) :from (:argument 0) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg) + (sc-is r control-stack) + (location= x r))))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic") + (:generator 2 + (cond ((and (sc-is x any-reg) (sc-is y any-reg) (sc-is r any-reg) + (not (location= x r))) + (inst lea r (make-ea :qword :base x :index y :scale 1))) + (t + (move r x) + (inst add r y))))) + +(define-vop (fast-+-c/fixnum=>fixnum fast-safe-arith-op) + (:translate +) + (:args (x :target r :scs (any-reg control-stack))) + (:info y) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:results (r :scs (any-reg) + :load-if (not (location= x r)))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic") + (:generator 1 + (cond ((and (sc-is x any-reg) (sc-is r any-reg) (not (location= x r))) + (inst lea r (make-ea :qword :base x :disp (fixnumize y)))) + (t + (move r x) + (inst add r (fixnumize y)))))) + +(define-vop (fast-+/signed=>signed fast-safe-arith-op) + (:translate +) + (:args (x :scs (signed-reg) :target r + :load-if (not (and (sc-is x signed-stack) + (sc-is y signed-reg) + (sc-is r signed-stack) + (location= x r)))) + (y :scs (signed-reg signed-stack))) + (:arg-types signed-num signed-num) + (:results (r :scs (signed-reg) :from (:argument 0) + :load-if (not (and (sc-is x signed-stack) + (sc-is y signed-reg) + (location= x r))))) + (:result-types signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:generator 5 + (cond ((and (sc-is x signed-reg) (sc-is y signed-reg) (sc-is r signed-reg) + (not (location= x r))) + (inst lea r (make-ea :qword :base x :index y :scale 1))) + (t + (move r x) + (inst add r y))))) + + +;;;; Special logand cases: (logand signed unsigned) => unsigned + +(define-vop (fast-logand/signed-unsigned=>unsigned + fast-logand/unsigned=>unsigned) + (:args (x :target r :scs (signed-reg) + :load-if (not (and (sc-is x signed-stack) + (sc-is y unsigned-reg) + (sc-is r unsigned-stack) + (location= x r)))) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types signed-num unsigned-num)) + +(define-vop (fast-logand-c/signed-unsigned=>unsigned + fast-logand-c/unsigned=>unsigned) + (:args (x :target r :scs (signed-reg signed-stack))) + (:arg-types signed-num (:constant (unsigned-byte 32)))) + +(define-vop (fast-logand/unsigned-signed=>unsigned + fast-logand/unsigned=>unsigned) + (:args (x :target r :scs (unsigned-reg) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y signed-reg) + (sc-is r unsigned-stack) + (location= x r)))) + (y :scs (signed-reg signed-stack))) + (:arg-types unsigned-num signed-num)) + + +(define-vop (fast-+-c/signed=>signed fast-safe-arith-op) + (:translate +) + (:args (x :target r :scs (signed-reg signed-stack))) + (:info y) + (:arg-types signed-num (:constant (signed-byte 32))) + (:results (r :scs (signed-reg) + :load-if (not (location= x r)))) + (:result-types signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:generator 4 + (cond ((and (sc-is x signed-reg) (sc-is r signed-reg) + (not (location= x r))) + (inst lea r (make-ea :qword :base x :disp y))) + (t + (move r x) + (if (= y 1) + (inst inc r) + (inst add r y)))))) + +(define-vop (fast-+/unsigned=>unsigned fast-safe-arith-op) + (:translate +) + (:args (x :scs (unsigned-reg) :target r + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y unsigned-reg) + (sc-is r unsigned-stack) + (location= x r)))) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:results (r :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y unsigned-reg) + (sc-is r unsigned-stack) + (location= x r))))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic") + (:generator 5 + (cond ((and (sc-is x unsigned-reg) (sc-is y unsigned-reg) + (sc-is r unsigned-reg) (not (location= x r))) + (inst lea r (make-ea :qword :base x :index y :scale 1))) + (t + (move r x) + (inst add r y))))) + +(define-vop (fast-+-c/unsigned=>unsigned fast-safe-arith-op) + (:translate +) + (:args (x :target r :scs (unsigned-reg unsigned-stack))) + (:info y) + (:arg-types unsigned-num (:constant (unsigned-byte 32))) + (:results (r :scs (unsigned-reg) + :load-if (not (location= x r)))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic") + (:generator 4 + (cond ((and (sc-is x unsigned-reg) (sc-is r unsigned-reg) + (not (location= x r))) + (inst lea r (make-ea :qword :base x :disp y))) + (t + (move r x) + (if (= y 1) + (inst inc r) + (inst add r y)))))) + +;;;; multiplication and division + +(define-vop (fast-*/fixnum=>fixnum fast-safe-arith-op) + (:translate *) + ;; We need different loading characteristics. + (:args (x :scs (any-reg) :target r) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:results (r :scs (any-reg) :from (:argument 0))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic") + (:generator 4 + (move r x) + (inst sar r 3) + (inst imul r y))) + +(define-vop (fast-*-c/fixnum=>fixnum fast-safe-arith-op) + (:translate *) + ;; We need different loading characteristics. + (:args (x :scs (any-reg control-stack))) + (:info y) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:results (r :scs (any-reg))) + (:result-types tagged-num) + (:note "inline fixnum arithmetic") + (:generator 3 + (inst imul r x y))) + +(define-vop (fast-*/signed=>signed fast-safe-arith-op) + (:translate *) + ;; We need different loading characteristics. + (:args (x :scs (signed-reg) :target r) + (y :scs (signed-reg signed-stack))) + (:arg-types signed-num signed-num) + (:results (r :scs (signed-reg) :from (:argument 0))) + (:result-types signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:generator 5 + (move r x) + (inst imul r y))) + +(define-vop (fast-*-c/signed=>signed fast-safe-arith-op) + (:translate *) + ;; We need different loading characteristics. + (:args (x :scs (signed-reg signed-stack))) + (:info y) + (:arg-types signed-num (:constant (signed-byte 32))) + (:results (r :scs (signed-reg))) + (:result-types signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:generator 4 + (inst imul r x y))) + +(define-vop (fast-*/unsigned=>unsigned fast-safe-arith-op) + (:translate *) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 0) :to :result) eax) + (:temporary (:sc unsigned-reg :offset edx-offset + :from :eval :to :result) edx) + (:ignore edx) + (:results (result :scs (unsigned-reg))) + (:result-types unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (move eax x) + (inst mul eax y) + (move result eax))) + + +(define-vop (fast-truncate/fixnum=>fixnum fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (any-reg) :target eax) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:temporary (:sc signed-reg :offset eax-offset :target quo + :from (:argument 0) :to (:result 0)) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :target rem + :from (:argument 0) :to (:result 1)) edx) + (:results (quo :scs (any-reg)) + (rem :scs (any-reg))) + (:result-types tagged-num tagged-num) + (:note "inline fixnum arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 31 + (let ((zero (generate-error-code vop division-by-zero-error x y))) + (if (sc-is y any-reg) + (inst test y y) ; smaller instruction + (inst cmp y 0)) + (inst jmp :eq zero)) + (move eax x) + (inst cqo) + (inst idiv eax y) + (if (location= quo eax) + (inst shl eax 3) + (inst lea quo (make-ea :qword :index eax :scale 8))) + (move rem edx))) + +(define-vop (fast-truncate-c/fixnum=>fixnum fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (any-reg) :target eax)) + (:info y) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:temporary (:sc signed-reg :offset eax-offset :target quo + :from :argument :to (:result 0)) eax) + (:temporary (:sc any-reg :offset edx-offset :target rem + :from :eval :to (:result 1)) edx) + (:temporary (:sc any-reg :from :eval :to :result) y-arg) + (:results (quo :scs (any-reg)) + (rem :scs (any-reg))) + (:result-types tagged-num tagged-num) + (:note "inline fixnum arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 30 + (move eax x) + (inst cqo) + (inst mov y-arg (fixnumize y)) + (inst idiv eax y-arg) + (if (location= quo eax) + (inst shl eax 3) + (inst lea quo (make-ea :qword :index eax :scale 8))) + (move rem edx))) + +(define-vop (fast-truncate/unsigned=>unsigned fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg signed-stack))) + (:arg-types unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :target quo + :from (:argument 0) :to (:result 0)) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :target rem + :from (:argument 0) :to (:result 1)) edx) + (:results (quo :scs (unsigned-reg)) + (rem :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 33 + (let ((zero (generate-error-code vop division-by-zero-error x y))) + (if (sc-is y unsigned-reg) + (inst test y y) ; smaller instruction + (inst cmp y 0)) + (inst jmp :eq zero)) + (move eax x) + (inst xor edx edx) + (inst div eax y) + (move quo eax) + (move rem edx))) + +(define-vop (fast-truncate-c/unsigned=>unsigned fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (unsigned-reg) :target eax)) + (:info y) + (:arg-types unsigned-num (:constant (unsigned-byte 32))) + (:temporary (:sc unsigned-reg :offset eax-offset :target quo + :from :argument :to (:result 0)) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :target rem + :from :eval :to (:result 1)) edx) + (:temporary (:sc unsigned-reg :from :eval :to :result) y-arg) + (:results (quo :scs (unsigned-reg)) + (rem :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:note "inline (unsigned-byte 32) arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 32 + (move eax x) + (inst xor edx edx) + (inst mov y-arg y) + (inst div eax y-arg) + (move quo eax) + (move rem edx))) + +(define-vop (fast-truncate/signed=>signed fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (signed-reg) :target eax) + (y :scs (signed-reg signed-stack))) + (:arg-types signed-num signed-num) + (:temporary (:sc signed-reg :offset eax-offset :target quo + :from (:argument 0) :to (:result 0)) eax) + (:temporary (:sc signed-reg :offset edx-offset :target rem + :from (:argument 0) :to (:result 1)) edx) + (:results (quo :scs (signed-reg)) + (rem :scs (signed-reg))) + (:result-types signed-num signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 33 + (let ((zero (generate-error-code vop division-by-zero-error x y))) + (if (sc-is y signed-reg) + (inst test y y) ; smaller instruction + (inst cmp y 0)) + (inst jmp :eq zero)) + (move eax x) + (inst cqo) + (inst idiv eax y) + (move quo eax) + (move rem edx))) + +(define-vop (fast-truncate-c/signed=>signed fast-safe-arith-op) + (:translate truncate) + (:args (x :scs (signed-reg) :target eax)) + (:info y) + (:arg-types signed-num (:constant (signed-byte 32))) + (:temporary (:sc signed-reg :offset eax-offset :target quo + :from :argument :to (:result 0)) eax) + (:temporary (:sc signed-reg :offset edx-offset :target rem + :from :eval :to (:result 1)) edx) + (:temporary (:sc signed-reg :from :eval :to :result) y-arg) + (:results (quo :scs (signed-reg)) + (rem :scs (signed-reg))) + (:result-types signed-num signed-num) + (:note "inline (signed-byte 32) arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 32 + (move eax x) + (inst cqo) + (inst mov y-arg y) + (inst idiv eax y-arg) + (move quo eax) + (move rem edx))) + + + +;;;; Shifting +(define-vop (fast-ash-c/fixnum=>fixnum) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (any-reg) :target result + :load-if (not (and (sc-is number any-reg control-stack) + (sc-is result any-reg control-stack) + (location= number result))))) + (:info amount) + (:arg-types tagged-num (:constant integer)) + (:results (result :scs (any-reg) + :load-if (not (and (sc-is number control-stack) + (sc-is result control-stack) + (location= number result))))) + (:result-types tagged-num) + (:note "inline ASH") + (:generator 2 + (cond ((and (= amount 1) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 2))) + ((and (= amount 2) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 4))) + ((and (= amount 3) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 8))) + (t + (move result number) + (cond ((plusp amount) + ;; We don't have to worry about overflow because of the + ;; result type restriction. + (inst shl result amount)) + ((zerop amount) ) + ((< amount -63) + (inst xor result result)) + (t + ;; shift too far then back again, to zero tag bits + (inst sar result (- 3 amount)) + (inst lea result + (make-ea :qword :index result :scale 8)))))))) + + +(define-vop (fast-ash-left/fixnum=>fixnum) + (:translate ash) + (:args (number :scs (any-reg) :target result + :load-if (not (and (sc-is number control-stack) + (sc-is result control-stack) + (location= number result)))) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types tagged-num positive-fixnum) + (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx) + (:results (result :scs (any-reg) :from (:argument 0) + :load-if (not (and (sc-is number control-stack) + (sc-is result control-stack) + (location= number result))))) + (:result-types tagged-num) + (:policy :fast-safe) + (:note "inline ASH") + (:generator 3 + (move result number) + (move ecx amount) + ;; The result-type ensures us that this shift will not overflow. + (inst shl result :cl))) + +(define-vop (fast-ash-c/signed=>signed) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (signed-reg) :target result + :load-if (not (and (sc-is number signed-stack) + (sc-is result signed-stack) + (location= number result))))) + (:info amount) + (:arg-types signed-num (:constant integer)) + (:results (result :scs (signed-reg) + :load-if (not (and (sc-is number signed-stack) + (sc-is result signed-stack) + (location= number result))))) + (:result-types signed-num) + (:note "inline ASH") + (:generator 3 + (cond ((and (= amount 1) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 2))) + ((and (= amount 2) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 4))) + ((and (= amount 3) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 8))) + (t + (move result number) + (cond ((plusp amount) (inst shl result amount)) + (t (inst sar result (min 63 (- amount))))))))) + +(define-vop (fast-ash-c/unsigned=>unsigned) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (unsigned-reg) :target result + :load-if (not (and (sc-is number unsigned-stack) + (sc-is result unsigned-stack) + (location= number result))))) + (:info amount) + (:arg-types unsigned-num (:constant integer)) + (:results (result :scs (unsigned-reg) + :load-if (not (and (sc-is number unsigned-stack) + (sc-is result unsigned-stack) + (location= number result))))) + (:result-types unsigned-num) + (:note "inline ASH") + (:generator 3 + (cond ((and (= amount 1) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 2))) + ((and (= amount 2) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 4))) + ((and (= amount 3) (not (location= number result))) + (inst lea result (make-ea :qword :index number :scale 8))) + (t + (move result number) + (cond ((< -64 amount 64) ;; XXXX + ;; this code is used both in ASH and ASH-MOD32, so + ;; be careful + (if (plusp amount) + (inst shl result amount) + (inst shr result (- amount)))) + (t (if (sc-is result unsigned-reg) + (inst xor result result) + (inst mov result 0)))))))) + +(define-vop (fast-ash-left/signed=>signed) + (:translate ash) + (:args (number :scs (signed-reg) :target result + :load-if (not (and (sc-is number signed-stack) + (sc-is result signed-stack) + (location= number result)))) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types signed-num positive-fixnum) + (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx) + (:results (result :scs (signed-reg) :from (:argument 0) + :load-if (not (and (sc-is number signed-stack) + (sc-is result signed-stack) + (location= number result))))) + (:result-types signed-num) + (:policy :fast-safe) + (:note "inline ASH") + (:generator 4 + (move result number) + (move ecx amount) + (inst shl result :cl))) + +(define-vop (fast-ash-left/unsigned=>unsigned) + (:translate ash) + (:args (number :scs (unsigned-reg) :target result + :load-if (not (and (sc-is number unsigned-stack) + (sc-is result unsigned-stack) + (location= number result)))) + (amount :scs (unsigned-reg) :target ecx)) + (:arg-types unsigned-num positive-fixnum) + (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx) + (:results (result :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is number unsigned-stack) + (sc-is result unsigned-stack) + (location= number result))))) + (:result-types unsigned-num) + (:policy :fast-safe) + (:note "inline ASH") + (:generator 4 + (move result number) + (move ecx amount) + (inst shl result :cl))) + +(define-vop (fast-ash/signed=>signed) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (signed-reg) :target result) + (amount :scs (signed-reg) :target ecx)) + (:arg-types signed-num signed-num) + (:results (result :scs (signed-reg) :from (:argument 0))) + (:result-types signed-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:note "inline ASH") + (:generator 5 + (move result number) + (move ecx amount) + (inst or ecx ecx) + (inst jmp :ns positive) + (inst neg ecx) + (inst cmp ecx 63) + (inst jmp :be okay) + (inst mov ecx 63) + OKAY + (inst sar result :cl) + (inst jmp done) + + POSITIVE + ;; The result-type ensures us that this shift will not overflow. + (inst shl result :cl) + + DONE)) + +(define-vop (fast-ash/unsigned=>unsigned) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (unsigned-reg) :target result) + (amount :scs (signed-reg) :target ecx)) + (:arg-types unsigned-num signed-num) + (:results (result :scs (unsigned-reg) :from (:argument 0))) + (:result-types unsigned-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:note "inline ASH") + (:generator 5 + (move result number) + (move ecx amount) + (inst or ecx ecx) + (inst jmp :ns positive) + (inst neg ecx) + (inst cmp ecx 63) + (inst jmp :be okay) + (inst xor result result) + (inst jmp done) + OKAY + (inst shr result :cl) + (inst jmp done) + + POSITIVE + ;; The result-type ensures us that this shift will not overflow. + (inst shl result :cl) + + DONE)) + +(in-package "SB!C") + +(defknown %lea (integer integer (member 1 2 4 8 16) (signed-byte 64)) + integer + (foldable flushable movable)) + +(defoptimizer (%lea derive-type) ((base index scale disp)) + (when (and (constant-lvar-p scale) + (constant-lvar-p disp)) + (let ((scale (lvar-value scale)) + (disp (lvar-value disp)) + (base-type (lvar-type base)) + (index-type (lvar-type index))) + (when (and (numeric-type-p base-type) + (numeric-type-p index-type)) + (let ((base-lo (numeric-type-low base-type)) + (base-hi (numeric-type-high base-type)) + (index-lo (numeric-type-low index-type)) + (index-hi (numeric-type-high index-type))) + (make-numeric-type :class 'integer + :complexp :real + :low (when (and base-lo index-lo) + (+ base-lo (* index-lo scale) disp)) + :high (when (and base-hi index-hi) + (+ base-hi (* index-hi scale) disp)))))))) + +(defun %lea (base index scale disp) + (+ base (* index scale) disp)) + +(in-package "SB!VM") + +(define-vop (%lea/unsigned=>unsigned) + (:translate %lea) + (:policy :fast-safe) + (:args (base :scs (unsigned-reg)) + (index :scs (unsigned-reg))) + (:info scale disp) + (:arg-types unsigned-num unsigned-num + (:constant (member 1 2 4 8)) + (:constant (signed-byte 64))) + (:results (r :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 5 + (inst lea r (make-ea :qword :base base :index index + :scale scale :disp disp)))) + +(define-vop (%lea/signed=>signed) + (:translate %lea) + (:policy :fast-safe) + (:args (base :scs (signed-reg)) + (index :scs (signed-reg))) + (:info scale disp) + (:arg-types signed-num signed-num + (:constant (member 1 2 4 8)) + (:constant (signed-byte 64))) + (:results (r :scs (signed-reg))) + (:result-types signed-num) + (:generator 4 + (inst lea r (make-ea :qword :base base :index index + :scale scale :disp disp)))) + +(define-vop (%lea/fixnum=>fixnum) + (:translate %lea) + (:policy :fast-safe) + (:args (base :scs (any-reg)) + (index :scs (any-reg))) + (:info scale disp) + (:arg-types tagged-num tagged-num + (:constant (member 1 2 4 8)) + (:constant (signed-byte 64))) + (:results (r :scs (any-reg))) + (:result-types tagged-num) + (:generator 3 + (inst lea r (make-ea :qword :base base :index index + :scale scale :disp disp)))) + +;;; FIXME: before making knowledge of this too public, it needs to be +;;; fixed so that it's actually _faster_ than the non-CMOV version; at +;;; least on my Celeron-XXX laptop, this version is marginally slower +;;; than the above version with branches. -- CSR, 2003-09-04 +(define-vop (fast-cmov-ash/unsigned=>unsigned) + (:translate ash) + (:policy :fast-safe) + (:args (number :scs (unsigned-reg) :target result) + (amount :scs (signed-reg) :target ecx)) + (:arg-types unsigned-num signed-num) + (:results (result :scs (unsigned-reg) :from (:argument 0))) + (:result-types unsigned-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:temporary (:sc any-reg :from (:eval 0) :to (:eval 1)) zero) + (:note "inline ASH") + (:guard (member :cmov *backend-subfeatures*)) + (:generator 4 + (move result number) + (move ecx amount) + (inst or ecx ecx) + (inst jmp :ns positive) + (inst neg ecx) + (inst xor zero zero) + (inst shr result :cl) + (inst cmp ecx 63) + (inst cmov :nbe result zero) + (inst jmp done) + + POSITIVE + ;; The result-type ensures us that this shift will not overflow. + (inst shl result :cl) + + DONE)) + +;;; Note: documentation for this function is wrong - rtfm +(define-vop (signed-byte-64-len) + (:translate integer-length) + (:note "inline (signed-byte 32) integer-length") + (:policy :fast-safe) + (:args (arg :scs (signed-reg) :target res)) + (:arg-types signed-num) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 28 + (move res arg) + (inst cmp res 0) + (inst jmp :ge POS) + (inst not res) + POS + (inst bsr res res) + (inst jmp :z zero) + (inst inc res) + (inst jmp done) + ZERO + (inst xor res res) + DONE)) + +(define-vop (unsigned-byte-64-len) + (:translate integer-length) + (:note "inline (unsigned-byte 32) integer-length") + (:policy :fast-safe) + (:args (arg :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:generator 26 + (inst bsr res arg) + (inst jmp :z zero) + (inst inc res) + (inst jmp done) + ZERO + (inst xor res res) + DONE)) + + +(define-vop (unsigned-byte-64-count) + (:translate logcount) + (:note "inline (unsigned-byte 64) logcount") + (:policy :fast-safe) + (:args (arg :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:temporary (:sc unsigned-reg :from (:argument 0)) temp) + (:temporary (:sc unsigned-reg :from (:argument 0)) t1) + (:generator 60 + (move result arg) + + (inst mov temp result) + (inst shr temp 1) + (inst and result #x55555555) ; note these masks will restrict the + (inst and temp #x55555555) ; count to the lower half of arg + (inst add result temp) + + (inst mov temp result) + (inst shr temp 2) + (inst and result #x33333333) + (inst and temp #x33333333) + (inst add result temp) + + (inst mov temp result) + (inst shr temp 4) + (inst and result #x0f0f0f0f) + (inst and temp #x0f0f0f0f) + (inst add result temp) + + (inst mov temp result) + (inst shr temp 8) + (inst and result #x00ff00ff) + (inst and temp #x00ff00ff) + (inst add result temp) + + (inst mov temp result) + (inst shr temp 16) + (inst and result #x0000ffff) + (inst and temp #x0000ffff) + (inst add result temp) + + ;;; now do the upper half + (move t1 arg) + (inst bswap t1) + + (inst mov temp t1) + (inst shr temp 1) + (inst and t1 #x55555555) + (inst and temp #x55555555) + (inst add t1 temp) + + (inst mov temp t1) + (inst shr temp 2) + (inst and t1 #x33333333) + (inst and temp #x33333333) + (inst add t1 temp) + + (inst mov temp t1) + (inst shr temp 4) + (inst and t1 #x0f0f0f0f) + (inst and temp #x0f0f0f0f) + (inst add t1 temp) + + (inst mov temp t1) + (inst shr temp 8) + (inst and t1 #x00ff00ff) + (inst and temp #x00ff00ff) + (inst add t1 temp) + + (inst mov temp t1) + (inst shr temp 16) + (inst and t1 #x0000ffff) + (inst and temp #x0000ffff) + (inst add t1 temp) + (inst add result t1))) + + + +;;;; binary conditional VOPs + +(define-vop (fast-conditional) + (:conditional) + (:info target not-p) + (:effects) + (:affected) + (:policy :fast-safe)) + +;;; constant variants are declared for 32 bits not 64 bits, because +;;; loading a 64 bit constant is silly + +(define-vop (fast-conditional/fixnum fast-conditional) + (:args (x :scs (any-reg) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg)))) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:note "inline fixnum comparison")) + +(define-vop (fast-conditional-c/fixnum fast-conditional/fixnum) + (:args (x :scs (any-reg control-stack))) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:info target not-p y)) + +(define-vop (fast-conditional/signed fast-conditional) + (:args (x :scs (signed-reg) + :load-if (not (and (sc-is x signed-stack) + (sc-is y signed-reg)))) + (y :scs (signed-reg signed-stack))) + (:arg-types signed-num signed-num) + (:note "inline (signed-byte 32) comparison")) + +(define-vop (fast-conditional-c/signed fast-conditional/signed) + (:args (x :scs (signed-reg signed-stack))) + (:arg-types signed-num (:constant (signed-byte 32))) + (:info target not-p y)) + +(define-vop (fast-conditional/unsigned fast-conditional) + (:args (x :scs (unsigned-reg) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is y unsigned-reg)))) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:note "inline (unsigned-byte 32) comparison")) + +(define-vop (fast-conditional-c/unsigned fast-conditional/unsigned) + (:args (x :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num (:constant (unsigned-byte 32))) + (:info target not-p y)) + + +(macrolet ((define-conditional-vop (tran cond unsigned not-cond not-unsigned) + `(progn + ,@(mapcar + (lambda (suffix cost signed) + `(define-vop (;; FIXME: These could be done more + ;; cleanly with SYMBOLICATE. + ,(intern (format nil "~:@(FAST-IF-~A~A~)" + tran suffix)) + ,(intern + (format nil "~:@(FAST-CONDITIONAL~A~)" + suffix))) + (:translate ,tran) + (:generator ,cost + (inst cmp x + ,(if (eq suffix '-c/fixnum) + '(fixnumize y) + 'y)) + (inst jmp (if not-p + ,(if signed + not-cond + not-unsigned) + ,(if signed + cond + unsigned)) + target)))) + '(/fixnum -c/fixnum /signed -c/signed /unsigned -c/unsigned) +; '(/fixnum /signed /unsigned) + '(4 3 6 5 6 5) + '(t t t t nil nil))))) + + (define-conditional-vop < :l :b :ge :ae) + (define-conditional-vop > :g :a :le :be)) + +(define-vop (fast-if-eql/signed fast-conditional/signed) + (:translate eql) + (:generator 6 + (inst cmp x y) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (fast-if-eql-c/signed fast-conditional-c/signed) + (:translate eql) + (:generator 5 + (cond ((and (sc-is x signed-reg) (zerop y)) + (inst test x x)) ; smaller instruction + (t + (inst cmp x y))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (fast-if-eql/unsigned fast-conditional/unsigned) + (:translate eql) + (:generator 6 + (inst cmp x y) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (fast-if-eql-c/unsigned fast-conditional-c/unsigned) + (:translate eql) + (:generator 5 + (cond ((and (sc-is x unsigned-reg) (zerop y)) + (inst test x x)) ; smaller instruction + (t + (inst cmp x y))) + (inst jmp (if not-p :ne :e) target))) + +;;; EQL/FIXNUM is funny because the first arg can be of any type, not just a +;;; known fixnum. + +;;; These versions specify a fixnum restriction on their first arg. We have +;;; also generic-eql/fixnum VOPs which are the same, but have no restriction on +;;; the first arg and a higher cost. The reason for doing this is to prevent +;;; fixnum specific operations from being used on word integers, spuriously +;;; consing the argument. + +(define-vop (fast-eql/fixnum fast-conditional) + (:args (x :scs (any-reg) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg)))) + (y :scs (any-reg control-stack))) + (:arg-types tagged-num tagged-num) + (:note "inline fixnum comparison") + (:translate eql) + (:generator 4 + (inst cmp x y) + (inst jmp (if not-p :ne :e) target))) +(define-vop (generic-eql/fixnum fast-eql/fixnum) + (:args (x :scs (any-reg descriptor-reg) + :load-if (not (and (sc-is x control-stack) + (sc-is y any-reg)))) + (y :scs (any-reg control-stack))) + (:arg-types * tagged-num) + (:variant-cost 7)) + + +(define-vop (fast-eql-c/fixnum fast-conditional/fixnum) + (:args (x :scs (any-reg control-stack))) + (:arg-types tagged-num (:constant (signed-byte 29))) + (:info target not-p y) + (:translate eql) + (:generator 2 + (cond ((and (sc-is x any-reg) (zerop y)) + (inst test x x)) ; smaller instruction + (t + (inst cmp x (fixnumize y)))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (generic-eql-c/fixnum fast-eql-c/fixnum) + (:args (x :scs (any-reg descriptor-reg control-stack))) + (:arg-types * (:constant (signed-byte 29))) + (:variant-cost 6)) + +;;;; 32-bit logical operations + +(define-vop (merge-bits) + (:translate merge-bits) + (:args (shift :scs (signed-reg unsigned-reg) :target ecx) + (prev :scs (unsigned-reg) :target result) + (next :scs (unsigned-reg))) + (:arg-types tagged-num unsigned-num unsigned-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 0)) ecx) + (:results (result :scs (unsigned-reg) :from (:argument 1))) + (:result-types unsigned-num) + (:policy :fast-safe) + (:generator 4 + (move ecx shift) + (move result prev) + (inst shrd result next :cl))) + +(define-source-transform 64bit-logical-not (x) + `(logand (lognot (the (unsigned-byte 64) ,x)) #.(1- (ash 1 64)))) + +(deftransform 64bit-logical-and ((x y)) + '(logand x y)) + +(define-source-transform 64bit-logical-nand (x y) + `(64bit-logical-not (64bit-logical-and ,x ,y))) + +(deftransform 64bit-logical-or ((x y)) + '(logior x y)) + +(define-source-transform 64bit-logical-nor (x y) + `(64bit-logical-not (64bit-logical-or ,x ,y))) + +(deftransform 64bit-logical-xor ((x y)) + '(logxor x y)) + +(define-source-transform 64bit-logical-eqv (x y) + `(64bit-logical-not (64bit-logical-xor ,x ,y))) + +(define-source-transform 64bit-logical-orc1 (x y) + `(64bit-logical-or (64bit-logical-not ,x) ,y)) + +(define-source-transform 64bit-logical-orc2 (x y) + `(64bit-logical-or ,x (64bit-logical-not ,y))) + +(define-source-transform 64bit-logical-andc1 (x y) + `(64bit-logical-and (64bit-logical-not ,x) ,y)) + +(define-source-transform 64bit-logical-andc2 (x y) + `(64bit-logical-and ,x (64bit-logical-not ,y))) + +;;; Only the lower 6 bits of the shift amount are significant. +(define-vop (shift-towards-someplace) + (:policy :fast-safe) + (:args (num :scs (unsigned-reg) :target r) + (amount :scs (signed-reg) :target ecx)) + (:arg-types unsigned-num tagged-num) + (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx) + (:results (r :scs (unsigned-reg) :from (:argument 0))) + (:result-types unsigned-num)) + +(define-vop (shift-towards-start shift-towards-someplace) + (:translate shift-towards-start) + (:note "SHIFT-TOWARDS-START") + (:generator 1 + (move r num) + (move ecx amount) + (inst shr r :cl))) + +(define-vop (shift-towards-end shift-towards-someplace) + (:translate shift-towards-end) + (:note "SHIFT-TOWARDS-END") + (:generator 1 + (move r num) + (move ecx amount) + (inst shl r :cl))) + +;;;; Modular functions + +(define-modular-fun +-mod64 (x y) + 64) +(define-vop (fast-+-mod64/unsigned=>unsigned fast-+/unsigned=>unsigned) + (:translate +-mod64)) +(define-vop (fast-+-mod64-c/unsigned=>unsigned fast-+-c/unsigned=>unsigned) + (:translate +-mod64)) +(define-modular-fun --mod64 (x y) - 64) +(define-vop (fast---mod64/unsigned=>unsigned fast--/unsigned=>unsigned) + (:translate --mod64)) +(define-vop (fast---mod64-c/unsigned=>unsigned fast---c/unsigned=>unsigned) + (:translate --mod64)) + +(define-modular-fun *-mod64 (x y) * 64) +(define-vop (fast-*-mod64/unsigned=>unsigned fast-*/unsigned=>unsigned) + (:translate *-mod64)) +;;; (no -C variant as x86 MUL instruction doesn't take an immediate) + +(define-vop (fast-ash-left-mod64-c/unsigned=>unsigned + fast-ash-c/unsigned=>unsigned) + (:translate ash-left-mod64)) + +(in-package "SB!C") + +(defknown sb!vm::%lea-mod64 (integer integer (member 1 2 4 8) (signed-byte 64)) + (unsigned-byte 64) + (foldable flushable movable)) + +(define-modular-fun-optimizer %lea ((base index scale disp) :width width) + (when (and (<= width 64) + (constant-lvar-p scale) + (constant-lvar-p disp)) + (cut-to-width base width) + (cut-to-width index width) + 'sb!vm::%lea-mod64)) + +#+sb-xc-host +(defun sb!vm::%lea-mod64 (base index scale disp) + (ldb (byte 64 0) (%lea base index scale disp))) +#-sb-xc-host +(defun sb!vm::%lea-mod64 (base index scale disp) + (let ((base (logand base #xffffffffffffffff)) + (index (logand index #xffffffffffffffff))) + ;; can't use modular version of %LEA, as we only have VOPs for + ;; constant SCALE and DISP. + (ldb (byte 64 0) (+ base (* index scale) disp)))) + +(in-package "SB!VM") + +(define-vop (%lea-mod64/unsigned=>unsigned + %lea/unsigned=>unsigned) + (:translate %lea-mod64)) + +;;; logical operations +(define-modular-fun lognot-mod64 (x) lognot 64) +(define-vop (lognot-mod64/unsigned=>unsigned) + (:translate lognot-mod64) + (:args (x :scs (unsigned-reg unsigned-stack) :target r + :load-if (not (and (sc-is x unsigned-stack) + (sc-is r unsigned-stack) + (location= x r))))) + (:arg-types unsigned-num) + (:results (r :scs (unsigned-reg) + :load-if (not (and (sc-is x unsigned-stack) + (sc-is r unsigned-stack) + (location= x r))))) + (:result-types unsigned-num) + (:policy :fast-safe) + (:generator 1 + (move r x) + (inst not r))) + +(define-modular-fun logxor-mod64 (x y) logxor 64) +(define-vop (fast-logxor-mod64/unsigned=>unsigned + fast-logxor/unsigned=>unsigned) + (:translate logxor-mod64)) +(define-vop (fast-logxor-mod64-c/unsigned=>unsigned + fast-logxor-c/unsigned=>unsigned) + (:translate logxor-mod64)) + +(define-source-transform logeqv (&rest args) + (if (oddp (length args)) + `(logxor ,@args) + `(lognot (logxor ,@args)))) +(define-source-transform logandc1 (x y) + `(logand (lognot ,x) ,y)) +(define-source-transform logandc2 (x y) + `(logand ,x (lognot ,y))) +(define-source-transform logorc1 (x y) + `(logior (lognot ,x) ,y)) +(define-source-transform logorc2 (x y) + `(logior ,x (lognot ,y))) +(define-source-transform lognor (x y) + `(lognot (logior ,x ,y))) +(define-source-transform lognand (x y) + `(lognot (logand ,x ,y))) + +;;;; bignum stuff + +(define-vop (bignum-length get-header-data) + (:translate sb!bignum:%bignum-length) + (:policy :fast-safe)) + +(define-vop (bignum-set-length set-header-data) + (:translate sb!bignum:%bignum-set-length) + (:policy :fast-safe)) + +(define-full-reffer bignum-ref * bignum-digits-offset other-pointer-lowtag + (unsigned-reg) unsigned-num sb!bignum:%bignum-ref) + +(define-full-setter bignum-set * bignum-digits-offset other-pointer-lowtag + (unsigned-reg) unsigned-num sb!bignum:%bignum-set) + +(define-vop (digit-0-or-plus) + (:translate sb!bignum:%digit-0-or-plusp) + (:policy :fast-safe) + (:args (digit :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:conditional) + (:info target not-p) + (:generator 3 + (inst or digit digit) + (inst jmp (if not-p :s :ns) target))) + + +;;; For add and sub with carry the sc of carry argument is any-reg so +;;; the it may be passed as a fixnum or word and thus may be 0, 1, or +;;; 4. This is easy to deal with and may save a fixnum-word +;;; conversion. +(define-vop (add-w/carry) + (:translate sb!bignum:%add-with-carry) + (:policy :fast-safe) + (:args (a :scs (unsigned-reg) :target result) + (b :scs (unsigned-reg unsigned-stack) :to :eval) + (c :scs (any-reg) :target temp)) + (:arg-types unsigned-num unsigned-num positive-fixnum) + (:temporary (:sc any-reg :from (:argument 2) :to :eval) temp) + (:results (result :scs (unsigned-reg) :from (:argument 0)) + (carry :scs (unsigned-reg))) + (:result-types unsigned-num positive-fixnum) + (:generator 4 + (move result a) + (move temp c) + (inst neg temp) ; Set the carry flag to 0 if c=0 else to 1 + (inst adc result b) + (inst mov carry 0) + (inst adc carry carry))) + +;;; Note: the borrow is the oppostite of the x86 convention - 1 for no +;;; borrow and 0 for a borrow. +(define-vop (sub-w/borrow) + (:translate sb!bignum:%subtract-with-borrow) + (:policy :fast-safe) + (:args (a :scs (unsigned-reg) :to :eval :target result) + (b :scs (unsigned-reg unsigned-stack) :to :result) + (c :scs (any-reg control-stack))) + (:arg-types unsigned-num unsigned-num positive-fixnum) + (:results (result :scs (unsigned-reg) :from :eval) + (borrow :scs (unsigned-reg))) + (:result-types unsigned-num positive-fixnum) + (:generator 5 + (inst cmp c 1) ; Set the carry flag to 1 if c=0 else to 0 + (move result a) + (inst sbb result b) + (inst mov borrow 0) + (inst adc borrow borrow) + (inst xor borrow 1))) + + +(define-vop (bignum-mult-and-add-3-arg) + (:translate sb!bignum:%multiply-and-add) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack)) + (carry-in :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0) + :to (:result 1) :target lo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (unsigned-reg)) + (lo :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 20 + (move eax x) + (inst mul eax y) + (inst add eax carry-in) + (inst adc edx 0) + (move hi edx) + (move lo eax))) + +(define-vop (bignum-mult-and-add-4-arg) + (:translate sb!bignum:%multiply-and-add) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack)) + (prev :scs (unsigned-reg unsigned-stack)) + (carry-in :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0) + :to (:result 1) :target lo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (unsigned-reg)) + (lo :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 20 + (move eax x) + (inst mul eax y) + (inst add eax prev) + (inst adc edx 0) + (inst add eax carry-in) + (inst adc edx 0) + (move hi edx) + (move lo eax))) + + +(define-vop (bignum-mult) + (:translate sb!bignum:%multiply) + (:policy :fast-safe) + (:args (x :scs (unsigned-reg) :target eax) + (y :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0) + :to (:result 1) :target lo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1) + :to (:result 0) :target hi) edx) + (:results (hi :scs (unsigned-reg)) + (lo :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 20 + (move eax x) + (inst mul eax y) + (move hi edx) + (move lo eax))) + +(define-vop (bignum-lognot lognot-mod64/unsigned=>unsigned) + (:translate sb!bignum:%lognot)) + +(define-vop (fixnum-to-digit) + (:translate sb!bignum:%fixnum-to-digit) + (:policy :fast-safe) + (:args (fixnum :scs (any-reg control-stack) :target digit)) + (:arg-types tagged-num) + (:results (digit :scs (unsigned-reg) + :load-if (not (and (sc-is fixnum control-stack) + (sc-is digit unsigned-stack) + (location= fixnum digit))))) + (:result-types unsigned-num) + (:generator 1 + (move digit fixnum) + (inst sar digit 3))) + +(define-vop (bignum-floor) + (:translate sb!bignum:%floor) + (:policy :fast-safe) + (:args (div-high :scs (unsigned-reg) :target edx) + (div-low :scs (unsigned-reg) :target eax) + (divisor :scs (unsigned-reg unsigned-stack))) + (:arg-types unsigned-num unsigned-num unsigned-num) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 1) + :to (:result 0) :target quo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 0) + :to (:result 1) :target rem) edx) + (:results (quo :scs (unsigned-reg)) + (rem :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 300 + (move edx div-high) + (move eax div-low) + (inst div eax divisor) + (move quo eax) + (move rem edx))) + +(define-vop (signify-digit) + (:translate sb!bignum:%fixnum-digit-with-correct-sign) + (:policy :fast-safe) + (:args (digit :scs (unsigned-reg unsigned-stack) :target res)) + (:arg-types unsigned-num) + (:results (res :scs (any-reg signed-reg) + :load-if (not (and (sc-is digit unsigned-stack) + (sc-is res control-stack signed-stack) + (location= digit res))))) + (:result-types signed-num) + (:generator 1 + (move res digit) + (when (sc-is res any-reg control-stack) + (inst shl res 3)))) + +(define-vop (digit-ashr) + (:translate sb!bignum:%ashr) + (:policy :fast-safe) + (:args (digit :scs (unsigned-reg unsigned-stack) :target result) + (count :scs (unsigned-reg) :target ecx)) + (:arg-types unsigned-num positive-fixnum) + (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx) + (:results (result :scs (unsigned-reg) :from (:argument 0) + :load-if (not (and (sc-is result unsigned-stack) + (location= digit result))))) + (:result-types unsigned-num) + (:generator 1 + (move result digit) + (move ecx count) + (inst sar result :cl))) + +(define-vop (digit-lshr digit-ashr) + (:translate sb!bignum:%digit-logical-shift-right) + (:generator 1 + (move result digit) + (move ecx count) + (inst shr result :cl))) + +(define-vop (digit-ashl digit-ashr) + (:translate sb!bignum:%ashl) + (:generator 1 + (move result digit) + (move ecx count) + (inst shl result :cl))) + +;;;; static functions + +(define-static-fun two-arg-/ (x y) :translate /) + +(define-static-fun two-arg-gcd (x y) :translate gcd) +(define-static-fun two-arg-lcm (x y) :translate lcm) + +(define-static-fun two-arg-and (x y) :translate logand) +(define-static-fun two-arg-ior (x y) :translate logior) +(define-static-fun two-arg-xor (x y) :translate logxor) + + +(in-package "SB!C") + +;;; This is essentially a straight implementation of the algorithm in +;;; "Strength Reduction of Multiplications by Integer Constants", +;;; Youfeng Wu, ACM SIGPLAN Notices, Vol. 30, No.2, February 1995. +(defun basic-decompose-multiplication (arg num n-bits condensed) + (case (aref condensed 0) + (0 + (let ((tmp (min 3 (aref condensed 1)))) + (decf (aref condensed 1) tmp) + `(logand #xffffffff + (%lea ,arg + ,(decompose-multiplication + arg (ash (1- num) (- tmp)) (1- n-bits) (subseq condensed 1)) + ,(ash 1 tmp) 0)))) + ((1 2 3) + (let ((r0 (aref condensed 0))) + (incf (aref condensed 1) r0) + `(logand #xffffffff + (%lea ,(decompose-multiplication + arg (- num (ash 1 r0)) (1- n-bits) (subseq condensed 1)) + ,arg + ,(ash 1 r0) 0)))) + (t (let ((r0 (aref condensed 0))) + (setf (aref condensed 0) 0) + `(logand #xffffffff + (ash ,(decompose-multiplication + arg (ash num (- r0)) n-bits condensed) + ,r0)))))) + +(defun decompose-multiplication (arg num n-bits condensed) + (cond + ((= n-bits 0) 0) + ((= num 1) arg) + ((= n-bits 1) + `(logand #xffffffff (ash ,arg ,(1- (integer-length num))))) + ((let ((max 0) (end 0)) + (loop for i from 2 to (length condensed) + for j = (reduce #'+ (subseq condensed 0 i)) + when (and (> (- (* 2 i) 3 j) max) + (< (+ (ash 1 (1+ j)) + (ash (ldb (byte (- 64 (1+ j)) (1+ j)) num) + (1+ j))) + (ash 1 64))) + do (setq max (- (* 2 i) 3 j) + end i)) + (when (> max 0) + (let ((j (reduce #'+ (subseq condensed 0 end)))) + (let ((n2 (+ (ash 1 (1+ j)) + (ash (ldb (byte (- 64 (1+ j)) (1+ j)) num) (1+ j)))) + (n1 (1+ (ldb (byte (1+ j) 0) (lognot num))))) + `(logand #xffffffff + (- ,(optimize-multiply arg n2) ,(optimize-multiply arg n1)))))))) + ((dolist (i '(9 5 3)) + (when (integerp (/ num i)) + (when (< (logcount (/ num i)) (logcount num)) + (let ((x (gensym))) + (return `(let ((,x ,(optimize-multiply arg (/ num i)))) + (logand #xffffffff + (%lea ,x ,x (1- ,i) 0))))))))) + (t (basic-decompose-multiplication arg num n-bits condensed)))) + +(defun optimize-multiply (arg x) + (let* ((n-bits (logcount x)) + (condensed (make-array n-bits))) + (let ((count 0) (bit 0)) + (dotimes (i 64) + (cond ((logbitp i x) + (setf (aref condensed bit) count) + (setf count 1) + (incf bit)) + (t (incf count))))) + (decompose-multiplication arg x n-bits condensed))) + +(defun *-transformer (y) + (cond + (t (give-up-ir1-transform)) + ((= y (ash 1 (integer-length y))) + ;; there's a generic transform for y = 2^k + (give-up-ir1-transform)) + ((member y '(3 5 9)) + ;; we can do these multiplications directly using LEA + `(%lea x x ,(1- y) 0)) + ((member :pentium4 *backend-subfeatures*) + ;; the pentium4's multiply unit is reportedly very good + (give-up-ir1-transform)) + ;; FIXME: should make this more fine-grained. If nothing else, + ;; there should probably be a cutoff of about 9 instructions on + ;; pentium-class machines. + (t (optimize-multiply 'x y)))) + +(deftransform * ((x y) + ((unsigned-byte 64) (constant-arg (unsigned-byte 64))) + (unsigned-byte 64)) + "recode as leas, shifts and adds" + (let ((y (lvar-value y))) + (*-transformer y))) + +(deftransform sb!vm::*-mod64 + ((x y) ((unsigned-byte 64) (constant-arg (unsigned-byte 64))) + (unsigned-byte 64)) + "recode as leas, shifts and adds" + (let ((y (lvar-value y))) + (*-transformer y))) + +;;; FIXME: we should also be able to write an optimizer or two to +;;; convert (+ (* x 2) 17), (- (* x 9) 5) to a %LEA. diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp new file mode 100644 index 0000000..27daf0c --- /dev/null +++ b/src/compiler/x86-64/array.lisp @@ -0,0 +1,1396 @@ +;;;; array operations for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; allocator for the array header + +(define-vop (make-array-header) + (:translate make-array-header) + (:policy :fast-safe) + (:args (type :scs (any-reg)) + (rank :scs (any-reg))) + (:arg-types positive-fixnum positive-fixnum) + (:temporary (:sc any-reg :to :eval) bytes) + (:temporary (:sc any-reg :to :result) header) + (:results (result :scs (descriptor-reg) :from :eval)) + (:node-var node) + (:generator 13 + (inst lea bytes + (make-ea :qword :base rank + :disp (+ (* (1+ array-dimensions-offset) n-word-bytes) + lowtag-mask))) + (inst and bytes (lognot lowtag-mask)) + (inst lea header (make-ea :qword :base rank + :disp (fixnumize (1- array-dimensions-offset)))) + (inst shl header n-widetag-bits) + (inst or header type) + (inst shr header (1- n-widetag-bits)) ;XXX was naked 2, am guessing + (pseudo-atomic + (allocation result bytes node) + (inst lea result (make-ea :qword :base result :disp other-pointer-lowtag)) + (storew header result 0 other-pointer-lowtag)))) + +;;;; additional accessors and setters for the array header +(define-full-reffer %array-dimension * + array-dimensions-offset other-pointer-lowtag + (any-reg) positive-fixnum sb!kernel:%array-dimension) + +(define-full-setter %set-array-dimension * + array-dimensions-offset other-pointer-lowtag + (any-reg) positive-fixnum sb!kernel:%set-array-dimension) + +(define-vop (array-rank-vop) + (:translate sb!kernel:%array-rank) + (:policy :fast-safe) + (:args (x :scs (descriptor-reg))) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (loadw res x 0 other-pointer-lowtag) + (inst shr res n-widetag-bits) + (inst sub res (1- array-dimensions-offset)))) + +;;;; bounds checking routine + +;;; Note that the immediate SC for the index argument is disabled +;;; because it is not possible to generate a valid error code SC for +;;; an immediate value. +;;; +;;; FIXME: As per the KLUDGE note explaining the :IGNORE-FAILURE-P +;;; flag in build-order.lisp-expr, compiling this file causes warnings +;;; Argument FOO to VOP CHECK-BOUND has SC restriction +;;; DESCRIPTOR-REG which is not allowed by the operand type: +;;; (:OR POSITIVE-FIXNUM) +;;; CSR's message "format ~/ /" on sbcl-devel 2002-03-12 contained +;;; a possible patch, described as +;;; Another patch is included more for information than anything -- +;;; removing the descriptor-reg SCs from the CHECK-BOUND vop in +;;; x86/array.lisp seems to allow that file to compile without error[*], +;;; and build; I haven't tested rebuilding capability, but I'd be +;;; surprised if there were a problem. I'm not certain that this is the +;;; correct fix, though, as the restrictions on the arguments to the VOP +;;; aren't the same as in the sparc and alpha ports, where, incidentally, +;;; the corresponding file builds without error currently. +;;; Since neither of us (CSR or WHN) was quite sure that this is the +;;; right thing, I've just recorded the patch here in hopes it might +;;; help when someone attacks this problem again: +;;; diff -u -r1.7 array.lisp +;;; --- src/compiler/x86/array.lisp 11 Oct 2001 14:05:26 -0000 1.7 +;;; +++ src/compiler/x86/array.lisp 12 Mar 2002 12:23:37 -0000 +;;; @@ -76,10 +76,10 @@ +;;; (:translate %check-bound) +;;; (:policy :fast-safe) +;;; (:args (array :scs (descriptor-reg)) +;;; - (bound :scs (any-reg descriptor-reg)) +;;; - (index :scs (any-reg descriptor-reg #+nil immediate) :target result)) +;;; + (bound :scs (any-reg)) +;;; + (index :scs (any-reg #+nil immediate) :target result)) +;;; (:arg-types * positive-fixnum tagged-num) +;;; - (:results (result :scs (any-reg descriptor-reg))) +;;; + (:results (result :scs (any-reg))) +;;; (:result-types positive-fixnum) +;;; (:vop-var vop) +;;; (:save-p :compute-only) +(define-vop (check-bound) + (:translate %check-bound) + (:policy :fast-safe) + (:args (array :scs (descriptor-reg)) + (bound :scs (any-reg descriptor-reg)) + (index :scs (any-reg descriptor-reg) :target result)) +; (:arg-types * positive-fixnum tagged-num) + (:results (result :scs (any-reg descriptor-reg))) + ; (:result-types positive-fixnum) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (let ((error (generate-error-code vop invalid-array-index-error + array bound index)) + (index (if (sc-is index immediate) + (fixnumize (tn-value index)) + index))) + (inst cmp bound index) + ;; We use below-or-equal even though it's an unsigned test, + ;; because negative indexes appear as large unsigned numbers. + ;; Therefore, we get the <0 and >=bound test all rolled into one. + (inst jmp :be error) + (unless (and (tn-p index) (location= result index)) + (inst mov result index))))) + +;;;; accessors/setters + +;;; variants built on top of WORD-INDEX-REF, etc. I.e., those vectors +;;; whose elements are represented in integer registers and are built +;;; out of 8, 16, or 32 bit elements. +(macrolet ((def-full-data-vector-frobs (type element-type &rest scs) + `(progn + (define-full-reffer ,(symbolicate "DATA-VECTOR-REF/" type) + ,type vector-data-offset other-pointer-lowtag ,scs + ,element-type data-vector-ref) + (define-full-setter ,(symbolicate "DATA-VECTOR-SET/" type) + ,type vector-data-offset other-pointer-lowtag ,scs + ,element-type data-vector-set))) + ) + (def-full-data-vector-frobs simple-vector * descriptor-reg any-reg) + (def-full-data-vector-frobs simple-array-unsigned-byte-32 unsigned-num + unsigned-reg) + (def-full-data-vector-frobs simple-array-unsigned-byte-64 unsigned-num + unsigned-reg) + (def-full-data-vector-frobs simple-array-signed-byte-61 tagged-num any-reg) + (def-full-data-vector-frobs simple-array-unsigned-byte-60 + positive-fixnum any-reg) + (def-full-data-vector-frobs simple-array-signed-byte-32 + signed-num signed-reg) + (def-full-data-vector-frobs simple-array-signed-byte-64 + signed-num signed-reg) + (def-full-data-vector-frobs simple-array-unsigned-byte-63 unsigned-num + unsigned-reg)) + +;;;; integer vectors whose elements are smaller than a byte, i.e., +;;;; bit, 2-bit, and 4-bit vectors + +(macrolet ((def-small-data-vector-frobs (type bits) + (let* ((elements-per-word (floor n-word-bits bits)) + (bit-shift (1- (integer-length elements-per-word)))) + `(progn + (define-vop (,(symbolicate 'data-vector-ref/ type)) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types ,type positive-fixnum) + (:results (result :scs (unsigned-reg) :from (:argument 0))) + (:result-types positive-fixnum) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) + (:generator 20 + (move ecx index) + (inst shr ecx ,bit-shift) + (inst mov result + (make-ea :qword :base object :index ecx :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))) + (move ecx index) + (inst and ecx ,(1- elements-per-word)) + ,@(unless (= bits 1) + `((inst shl ecx ,(1- (integer-length bits))))) + (inst shr result :cl) + (inst and result ,(1- (ash 1 bits))))) + (define-vop (,(symbolicate 'data-vector-ref-c/ type)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:arg-types ,type (:constant index)) + (:info index) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 15 + (multiple-value-bind (word extra) (floor index ,elements-per-word) + (loadw result object (+ word vector-data-offset) + other-pointer-lowtag) + (unless (zerop extra) + (inst shr result (* extra ,bits))) + (unless (= extra ,(1- elements-per-word)) + (inst and result ,(1- (ash 1 bits))))))) + (define-vop (,(symbolicate 'data-vector-set/ type)) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :target ptr) + (index :scs (unsigned-reg) :target ecx) + (value :scs (unsigned-reg immediate) :target result)) + (:arg-types ,type positive-fixnum positive-fixnum) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:temporary (:sc unsigned-reg) word-index) + (:temporary (:sc unsigned-reg :from (:argument 0)) ptr old) + (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) + ecx) + (:generator 25 + (move word-index index) + (inst shr word-index ,bit-shift) + (inst lea ptr + (make-ea :qword :base object :index word-index + :scale n-word-bytes + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))) + (loadw old ptr) + (move ecx index) + (inst and ecx ,(1- elements-per-word)) + ,@(unless (= bits 1) + `((inst shl ecx ,(1- (integer-length bits))))) + (inst ror old :cl) + (unless (and (sc-is value immediate) + (= (tn-value value) ,(1- (ash 1 bits)))) + (inst and old ,(lognot (1- (ash 1 bits))))) + (sc-case value + (immediate + (unless (zerop (tn-value value)) + (inst or old (logand (tn-value value) ,(1- (ash 1 bits)))))) + (unsigned-reg + (inst or old value))) + (inst rol old :cl) + (storew old ptr) + (sc-case value + (immediate + (inst mov result (tn-value value))) + (unsigned-reg + (move result value))))) + (define-vop (,(symbolicate 'data-vector-set-c/ type)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (unsigned-reg immediate) :target result)) + (:arg-types ,type (:constant index) positive-fixnum) + (:info index) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:temporary (:sc unsigned-reg :to (:result 0)) old) + (:generator 20 + (multiple-value-bind (word extra) (floor index ,elements-per-word) + (inst mov old + (make-ea :qword :base object + :disp (- (* (+ word vector-data-offset) + n-word-bytes) + other-pointer-lowtag))) + (sc-case value + (immediate + (let* ((value (tn-value value)) + (mask ,(1- (ash 1 bits))) + (shift (* extra ,bits))) + (unless (= value mask) + (inst and old (lognot (ash mask shift)))) + (unless (zerop value) + (inst or old (ash value shift))))) + (unsigned-reg + (let ((shift (* extra ,bits))) + (unless (zerop shift) + (inst ror old shift)) + (inst and old (lognot ,(1- (ash 1 bits)))) + (inst or old value) + (unless (zerop shift) + (inst rol old shift))))) + (inst mov (make-ea :dword :base object + :disp (- (* (+ word vector-data-offset) + n-word-bytes) + other-pointer-lowtag)) + old) + (sc-case value + (immediate + (inst mov result (tn-value value))) + (unsigned-reg + (move result value)))))))))) + (def-small-data-vector-frobs simple-bit-vector 1) + (def-small-data-vector-frobs simple-array-unsigned-byte-2 2) + (def-small-data-vector-frobs simple-array-unsigned-byte-4 4)) +;;; And the float variants. + +(define-vop (data-vector-ref/simple-array-single-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-single-float positive-fixnum) + (:results (value :scs (single-reg))) + (:result-types single-float) + (:generator 5 + (with-empty-tn@fp-top(value) + (inst fld (make-ea :dword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag)))))) + +(define-vop (data-vector-ref-c/simple-array-single-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 61))) + (:results (value :scs (single-reg))) + (:result-types single-float) + (:generator 4 + (with-empty-tn@fp-top(value) + (inst fld (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 4 index)) + other-pointer-lowtag)))))) + +(define-vop (data-vector-set/simple-array-single-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (single-reg) :target result)) + (:arg-types simple-array-single-float positive-fixnum single-float) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 5 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fst (make-ea :dword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fst result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fst (make-ea :dword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fst value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fst result)) + (inst fxch value))))))) + +(define-vop (data-vector-set-c/simple-array-single-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (single-reg) :target result)) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 29)) + single-float) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 4 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fst (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 4 index)) + other-pointer-lowtag))) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fst result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fst (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 4 index)) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fst value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fst result)) + (inst fxch value))))))) + +(define-vop (data-vector-ref/simple-array-double-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-double-float positive-fixnum) + (:results (value :scs (double-reg))) + (:result-types double-float) + (:generator 7 + (with-empty-tn@fp-top(value) + (inst fldd (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag)))))) + +(define-vop (data-vector-ref-c/simple-array-double-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-double-float (:constant (signed-byte 29))) + (:results (value :scs (double-reg))) + (:result-types double-float) + (:generator 6 + (with-empty-tn@fp-top(value) + (inst fldd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag)))))) + +(define-vop (data-vector-set/simple-array-double-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (double-reg) :target result)) + (:arg-types simple-array-double-float positive-fixnum double-float) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 20 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fstd (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fstd result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fstd (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fstd value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fstd result)) + (inst fxch value))))))) + +(define-vop (data-vector-set-c/simple-array-double-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (double-reg) :target result)) + (:info index) + (:arg-types simple-array-double-float (:constant (signed-byte 61)) + double-float) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 19 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fstd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag))) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fstd result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fstd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fstd value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fstd result)) + (inst fxch value))))))) + + + +;;; complex float variants + +(define-vop (data-vector-ref/simple-array-complex-single-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-complex-single-float positive-fixnum) + (:results (value :scs (complex-single-reg))) + (:result-types complex-single-float) + (:generator 5 + (let ((real-tn (complex-single-reg-real-tn value))) + (with-empty-tn@fp-top (real-tn) + (inst fld (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))))) + (let ((imag-tn (complex-single-reg-imag-tn value))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (make-ea :dword :base object :index index :scale 2 + :disp (- (* (1+ vector-data-offset) + n-word-bytes) + other-pointer-lowtag))))))) + +(define-vop (data-vector-ref-c/simple-array-complex-single-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-complex-single-float (:constant (signed-byte 29))) + (:results (value :scs (complex-single-reg))) + (:result-types complex-single-float) + (:generator 4 + (let ((real-tn (complex-single-reg-real-tn value))) + (with-empty-tn@fp-top (real-tn) + (inst fld (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag))))) + (let ((imag-tn (complex-single-reg-imag-tn value))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index) 4) + other-pointer-lowtag))))))) + +(define-vop (data-vector-set/simple-array-complex-single-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (complex-single-reg) :target result)) + (:arg-types simple-array-complex-single-float positive-fixnum + complex-single-float) + (:results (result :scs (complex-single-reg))) + (:result-types complex-single-float) + (:generator 5 + (let ((value-real (complex-single-reg-real-tn value)) + (result-real (complex-single-reg-real-tn result))) + (cond ((zerop (tn-offset value-real)) + ;; Value is in ST0. + (inst fst (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (unless (zerop (tn-offset result-real)) + ;; Value is in ST0 but not result. + (inst fst result-real))) + (t + ;; Value is not in ST0. + (inst fxch value-real) + (inst fst (make-ea :dword :base object :index index :scale 2 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result-real)) + ;; The result is in ST0. + (inst fst value-real)) + (t + ;; Neither value or result are in ST0 + (unless (location= value-real result-real) + (inst fst result-real)) + (inst fxch value-real)))))) + (let ((value-imag (complex-single-reg-imag-tn value)) + (result-imag (complex-single-reg-imag-tn result))) + (inst fxch value-imag) + (inst fst (make-ea :dword :base object :index index :scale 2 + :disp (- (+ (* vector-data-offset + n-word-bytes) + 4) + other-pointer-lowtag))) + (unless (location= value-imag result-imag) + (inst fst result-imag)) + (inst fxch value-imag)))) + +(define-vop (data-vector-set-c/simple-array-complex-single-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (complex-single-reg) :target result)) + (:info index) + (:arg-types simple-array-complex-single-float (:constant (signed-byte 61)) + complex-single-float) + (:results (result :scs (complex-single-reg))) + (:result-types complex-single-float) + (:generator 4 + (let ((value-real (complex-single-reg-real-tn value)) + (result-real (complex-single-reg-real-tn result))) + (cond ((zerop (tn-offset value-real)) + ;; Value is in ST0. + (inst fst (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag))) + (unless (zerop (tn-offset result-real)) + ;; Value is in ST0 but not result. + (inst fst result-real))) + (t + ;; Value is not in ST0. + (inst fxch value-real) + (inst fst (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index)) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result-real)) + ;; The result is in ST0. + (inst fst value-real)) + (t + ;; Neither value or result are in ST0 + (unless (location= value-real result-real) + (inst fst result-real)) + (inst fxch value-real)))))) + (let ((value-imag (complex-single-reg-imag-tn value)) + (result-imag (complex-single-reg-imag-tn result))) + (inst fxch value-imag) + (inst fst (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 8 index) 4) + other-pointer-lowtag))) + (unless (location= value-imag result-imag) + (inst fst result-imag)) + (inst fxch value-imag)))) + + +(define-vop (data-vector-ref/simple-array-complex-double-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-complex-double-float positive-fixnum) + (:results (value :scs (complex-double-reg))) + (:result-types complex-double-float) + (:generator 7 + (let ((real-tn (complex-double-reg-real-tn value))) + (with-empty-tn@fp-top (real-tn) + (inst fldd (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))))) + (let ((imag-tn (complex-double-reg-imag-tn value))) + (with-empty-tn@fp-top (imag-tn) + (inst fldd (make-ea :dword :base object :index index :scale 4 + :disp (- (+ (* vector-data-offset + n-word-bytes) + 8) + other-pointer-lowtag))))))) + +(define-vop (data-vector-ref-c/simple-array-complex-double-float) + (:note "inline array access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-complex-double-float (:constant (signed-byte 29))) + (:results (value :scs (complex-double-reg))) + (:result-types complex-double-float) + (:generator 6 + (let ((real-tn (complex-double-reg-real-tn value))) + (with-empty-tn@fp-top (real-tn) + (inst fldd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag))))) + (let ((imag-tn (complex-double-reg-imag-tn value))) + (with-empty-tn@fp-top (imag-tn) + (inst fldd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index) 8) + other-pointer-lowtag))))))) + +(define-vop (data-vector-set/simple-array-complex-double-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (complex-double-reg) :target result)) + (:arg-types simple-array-complex-double-float positive-fixnum + complex-double-float) + (:results (result :scs (complex-double-reg))) + (:result-types complex-double-float) + (:generator 20 + (let ((value-real (complex-double-reg-real-tn value)) + (result-real (complex-double-reg-real-tn result))) + (cond ((zerop (tn-offset value-real)) + ;; Value is in ST0. + (inst fstd (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (unless (zerop (tn-offset result-real)) + ;; Value is in ST0 but not result. + (inst fstd result-real))) + (t + ;; Value is not in ST0. + (inst fxch value-real) + (inst fstd (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result-real)) + ;; The result is in ST0. + (inst fstd value-real)) + (t + ;; Neither value or result are in ST0 + (unless (location= value-real result-real) + (inst fstd result-real)) + (inst fxch value-real)))))) + (let ((value-imag (complex-double-reg-imag-tn value)) + (result-imag (complex-double-reg-imag-tn result))) + (inst fxch value-imag) + (inst fstd (make-ea :dword :base object :index index :scale 4 + :disp (- (+ (* vector-data-offset + n-word-bytes) + 8) + other-pointer-lowtag))) + (unless (location= value-imag result-imag) + (inst fstd result-imag)) + (inst fxch value-imag)))) + +(define-vop (data-vector-set-c/simple-array-complex-double-float) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (complex-double-reg) :target result)) + (:info index) + (:arg-types simple-array-complex-double-float (:constant (signed-byte 61)) + complex-double-float) + (:results (result :scs (complex-double-reg))) + (:result-types complex-double-float) + (:generator 19 + (let ((value-real (complex-double-reg-real-tn value)) + (result-real (complex-double-reg-real-tn result))) + (cond ((zerop (tn-offset value-real)) + ;; Value is in ST0. + (inst fstd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag))) + (unless (zerop (tn-offset result-real)) + ;; Value is in ST0 but not result. + (inst fstd result-real))) + (t + ;; Value is not in ST0. + (inst fxch value-real) + (inst fstd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag))) + (cond ((zerop (tn-offset result-real)) + ;; The result is in ST0. + (inst fstd value-real)) + (t + ;; Neither value or result are in ST0 + (unless (location= value-real result-real) + (inst fstd result-real)) + (inst fxch value-real)))))) + (let ((value-imag (complex-double-reg-imag-tn value)) + (result-imag (complex-double-reg-imag-tn result))) + (inst fxch value-imag) + (inst fstd (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index) 8) + other-pointer-lowtag))) + (unless (location= value-imag result-imag) + (inst fstd result-imag)) + (inst fxch value-imag)))) + + + + + + +;;; unsigned-byte-8 +(macrolet ((define-data-vector-frobs (ptype) + `(progn + (define-vop (,(symbolicate "DATA-VECTOR-REF/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types ,ptype positive-fixnum) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (inst movzx value + (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-REF-C/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 61))) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (inst movzx value + (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-SET/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target eax)) + (:arg-types ,ptype positive-fixnum positive-fixnum) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (move eax value) + (inst mov (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + al-tn) + (move result eax))) + (define-vop (,(symbolicate "DATA-VECTOR-SET-C/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target eax)) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 61)) + positive-fixnum) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (move eax value) + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + al-tn) + (move result eax)))))) + (define-data-vector-frobs simple-array-unsigned-byte-7) + (define-data-vector-frobs simple-array-unsigned-byte-8)) + +;;; unsigned-byte-16 +(macrolet ((define-data-vector-frobs (ptype) + `(progn + (define-vop (,(symbolicate "DATA-VECTOR-REF/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types ,ptype positive-fixnum) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (inst movzx value + (make-ea :word :base object :index index :scale 2 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-REF-C/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 29))) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (inst movzx value + (make-ea :word :base object + :disp (- (+ (* vector-data-offset n-word-bytes) (* 2 index)) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-SET/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target eax)) + (:arg-types ,ptype positive-fixnum positive-fixnum) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (move eax value) + (inst mov (make-ea :word :base object :index index :scale 2 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + ax-tn) + (move result eax))) + + (define-vop (,(symbolicate "DATA-VECTOR-SET-C/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target eax)) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 29)) + positive-fixnum) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (move eax value) + (inst mov (make-ea :word :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 2 index)) + other-pointer-lowtag)) + ax-tn) + (move result eax)))))) + (define-data-vector-frobs simple-array-unsigned-byte-15) + (define-data-vector-frobs simple-array-unsigned-byte-16)) + +(macrolet ((define-data-vector-frobs (ptype) + `(progn + (define-vop (,(symbolicate "DATA-VECTOR-REF/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types ,ptype positive-fixnum) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (inst movzxd value + (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-REF-C/" ptype)) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 61))) + (:results (value :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (inst movzxd value + (make-ea :dword :base object + :disp (- (+ (* vector-data-offset n-word-bytes) (* 4 index)) + other-pointer-lowtag))))) + (define-vop (,(symbolicate "DATA-VECTOR-SET/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target rax)) + (:arg-types ,ptype positive-fixnum positive-fixnum) + (:temporary (:sc unsigned-reg :offset rax-offset :target result + :from (:argument 2) :to (:result 0)) + rax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 5 + (move rax value) + (inst mov (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + eax-tn) + (move result rax))) + + (define-vop (,(symbolicate "DATA-VECTOR-SET-C/" ptype)) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (unsigned-reg signed-reg) :target rax)) + (:info index) + (:arg-types ,ptype (:constant (signed-byte 61)) + positive-fixnum) + (:temporary (:sc unsigned-reg :offset rax-offset :target result + :from (:argument 1) :to (:result 0)) + rax) + (:results (result :scs (unsigned-reg signed-reg))) + (:result-types positive-fixnum) + (:generator 4 + (move rax value) + (inst mov (make-ea :dword :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 4 index)) + other-pointer-lowtag)) + eax-tn) + (move result rax)))))) + (define-data-vector-frobs simple-array-unsigned-byte-32) + (define-data-vector-frobs simple-array-unsigned-byte-31)) + +;;; simple-string + +(define-vop (data-vector-ref/simple-base-string) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types simple-base-string positive-fixnum) + (:results (value :scs (base-char-reg))) + (:result-types base-char) + (:generator 5 + (inst mov value + (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-base-string) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-base-string (:constant (signed-byte 61))) + (:results (value :scs (base-char-reg))) + (:result-types base-char) + (:generator 4 + (inst mov value + (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag))))) + +(define-vop (data-vector-set/simple-base-string) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (base-char-reg) :target result)) + (:arg-types simple-base-string positive-fixnum base-char) + (:results (result :scs (base-char-reg))) + (:result-types base-char) + (:generator 5 + (inst mov (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + value) + (move result value))) + +(define-vop (data-vector-set/simple-base-string-c) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (base-char-reg))) + (:info index) + (:arg-types simple-base-string (:constant (signed-byte 61)) base-char) + (:results (result :scs (base-char-reg))) + (:result-types base-char) + (:generator 4 + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + value) + (move result value))) + +;;; signed-byte-8 + +(define-vop (data-vector-ref/simple-array-signed-byte-8) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types simple-array-signed-byte-8 positive-fixnum) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (inst movsx value + (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-array-signed-byte-8) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-signed-byte-8 (:constant (signed-byte 61))) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (inst movsx value + (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag))))) + +(define-vop (data-vector-set/simple-array-signed-byte-8) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:arg-types simple-array-signed-byte-8 positive-fixnum tagged-num) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (move eax value) + (inst mov (make-ea :byte :base object :index index :scale 1 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + al-tn) + (move result eax))) + +(define-vop (data-vector-set-c/simple-array-signed-byte-8) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:info index) + (:arg-types simple-array-signed-byte-8 (:constant (signed-byte 61)) + tagged-num) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (move eax value) + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + al-tn) + (move result eax))) + +;;; signed-byte-16 + +(define-vop (data-vector-ref/simple-array-signed-byte-16) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types simple-array-signed-byte-16 positive-fixnum) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (inst movsx value + (make-ea :word :base object :index index :scale 2 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-array-signed-byte-16) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-signed-byte-16 (:constant (signed-byte 61))) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (inst movsx value + (make-ea :word :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 2 index)) + other-pointer-lowtag))))) + +(define-vop (data-vector-set/simple-array-signed-byte-16) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:arg-types simple-array-signed-byte-16 positive-fixnum tagged-num) + (:temporary (:sc signed-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (move eax value) + (inst mov (make-ea :word :base object :index index :scale 2 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + ax-tn) + (move result eax))) + +(define-vop (data-vector-set-c/simple-array-signed-byte-16) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:info index) + (:arg-types simple-array-signed-byte-16 (:constant (signed-byte 61)) tagged-num) + (:temporary (:sc signed-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (move eax value) + (inst mov + (make-ea :word :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 2 index)) + other-pointer-lowtag)) + ax-tn) + (move result eax))) + + +(define-vop (data-vector-ref/simple-array-signed-byte-32) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:arg-types simple-array-signed-byte-32 positive-fixnum) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (inst movsxd value + (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-array-signed-byte-32) + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-signed-byte-32 (:constant (signed-byte 61))) + (:results (value :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (inst movsxd value + (make-ea :dword :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 4 index)) + other-pointer-lowtag))))) + +(define-vop (data-vector-set/simple-array-signed-byte-32) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (index :scs (unsigned-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:arg-types simple-array-signed-byte-32 positive-fixnum tagged-num) + (:temporary (:sc signed-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 5 + (move eax value) + (inst mov (make-ea :dword :base object :index index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + eax-tn) + (move result eax))) + +(define-vop (data-vector-set-c/simple-array-signed-byte-32) + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:eval 0)) + (value :scs (signed-reg) :target eax)) + (:info index) + (:arg-types simple-array-signed-byte-32 (:constant (signed-byte 61)) tagged-num) + (:temporary (:sc signed-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (signed-reg))) + (:result-types tagged-num) + (:generator 4 + (move eax value) + (inst mov + (make-ea :dword :base object + :disp (- (+ (* vector-data-offset n-word-bytes) + (* 4 index)) + other-pointer-lowtag)) + rax-tn) + (move result eax))) + +;;; These VOPs are used for implementing float slots in structures (whose raw +;;; data is an unsigned-32 vector). +(define-vop (raw-ref-single data-vector-ref/simple-array-single-float) + (:translate %raw-ref-single) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum)) +(define-vop (raw-ref-single-c data-vector-ref-c/simple-array-single-float) + (:translate %raw-ref-single) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)))) +(define-vop (raw-set-single data-vector-set/simple-array-single-float) + (:translate %raw-set-single) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum single-float)) +(define-vop (raw-set-single-c data-vector-set-c/simple-array-single-float) + (:translate %raw-set-single) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)) + single-float)) +(define-vop (raw-ref-double data-vector-ref/simple-array-double-float) + (:translate %raw-ref-double) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum)) +(define-vop (raw-ref-double-c data-vector-ref-c/simple-array-double-float) + (:translate %raw-ref-double) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)))) +(define-vop (raw-set-double data-vector-set/simple-array-double-float) + (:translate %raw-set-double) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum double-float)) +(define-vop (raw-set-double-c data-vector-set-c/simple-array-double-float) + (:translate %raw-set-double) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)) + double-float)) + + +;;;; complex-float raw structure slot accessors + +(define-vop (raw-ref-complex-single + data-vector-ref/simple-array-complex-single-float) + (:translate %raw-ref-complex-single) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum)) +(define-vop (raw-ref-complex-single-c + data-vector-ref-c/simple-array-complex-single-float) + (:translate %raw-ref-complex-single) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)))) +(define-vop (raw-set-complex-single + data-vector-set/simple-array-complex-single-float) + (:translate %raw-set-complex-single) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum complex-single-float)) +(define-vop (raw-set-complex-single-c + data-vector-set-c/simple-array-complex-single-float) + (:translate %raw-set-complex-single) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)) + complex-single-float)) +(define-vop (raw-ref-complex-double + data-vector-ref/simple-array-complex-double-float) + (:translate %raw-ref-complex-double) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum)) +(define-vop (raw-ref-complex-double-c + data-vector-ref-c/simple-array-complex-double-float) + (:translate %raw-ref-complex-double) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)))) +(define-vop (raw-set-complex-double + data-vector-set/simple-array-complex-double-float) + (:translate %raw-set-complex-double) + (:arg-types simple-array-unsigned-byte-32 positive-fixnum + complex-double-float)) +(define-vop (raw-set-complex-double-c + data-vector-set-c/simple-array-complex-double-float) + (:translate %raw-set-complex-double) + (:arg-types simple-array-unsigned-byte-32 (:constant (signed-byte 61)) + complex-double-float)) + + +;;; These vops are useful for accessing the bits of a vector +;;; irrespective of what type of vector it is. +(define-full-reffer raw-bits * 0 other-pointer-lowtag (unsigned-reg) + unsigned-num %raw-bits) +(define-full-setter set-raw-bits * 0 other-pointer-lowtag (unsigned-reg) + unsigned-num %set-raw-bits) + +;;;; miscellaneous array VOPs + +(define-vop (get-vector-subtype get-header-data)) +(define-vop (set-vector-subtype set-header-data)) diff --git a/src/compiler/x86-64/backend-parms.lisp b/src/compiler/x86-64/backend-parms.lisp new file mode 100644 index 0000000..a1802f5 --- /dev/null +++ b/src/compiler/x86-64/backend-parms.lisp @@ -0,0 +1,51 @@ +;;;; that part of the parms.lisp file from original CMU CL which is defined in +;;;; terms of the BACKEND structure +;;;; +;;;; FIXME: When we break up the BACKEND structure, this might be mergeable +;;;; back into the parms.lisp file. + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; compiler constants + +(def!constant +backend-fasl-file-implementation+ :x86) + +(setf *backend-register-save-penalty* 3) + +(setf *backend-byte-order* :little-endian) + +;;; KLUDGE: It would seem natural to set this by asking our C runtime +;;; code for it, but mostly we need it for GENESIS, which doesn't in +;;; general have our C runtime code running to ask, so instead we set +;;; it by hand. -- WHN 2001-04-15 +;;; +;;; Though note that POSIX specifies (as far as I can tell) +;;; +;;; sysconf(_SC_PAGE_SIZE); +;;; +;;; as a portable way of retrieving this information; a call to this +;;; could be made in grovel-headers (which, strictly speaking, would +;;; no longer solely be grovelling headers), though the question of +;;; how to make this information appear in GENESIS, which is built and +;;; run from host-1 files (which are made before grovel-headers runs) +;;; would remain. -- CSR, 2002-09-01 +(setf *backend-page-size* 4096) +;;; comment from CMU CL: +;;; +;;; in case we ever wanted to do this for Windows NT.. +;;; +;;; Windows NT uses a memory system granularity of 64K, which means +;;; everything that gets mapped must be a multiple of that. The real +;;; page size is 512, but that doesn't do us a whole lot of good. +;;; Effectively, the page size is 64K. +;;; +;;; would be: (setf *backend-page-size* 65536) diff --git a/src/compiler/x86-64/c-call.lisp b/src/compiler/x86-64/c-call.lisp new file mode 100644 index 0000000..7c1d468 --- /dev/null +++ b/src/compiler/x86-64/c-call.lisp @@ -0,0 +1,332 @@ +;;;; the VOPs and other necessary machine specific support +;;;; routines for call-out to C + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;; The MOVE-ARG vop is going to store args on the stack for +;; call-out. These tn's will be used for that. move-arg is normally +;; used for things going down the stack but C wants to have args +;; indexed in the positive direction. + +(defun my-make-wired-tn (prim-type-name sc-name offset) + (make-wired-tn (primitive-type-or-lose prim-type-name) + (sc-number-or-lose sc-name) + offset)) + +(defstruct (arg-state (:copier nil)) + (stack-frame-size 0)) + +(define-alien-type-method (integer :arg-tn) (type state) + (let ((stack-frame-size (arg-state-stack-frame-size state))) + (setf (arg-state-stack-frame-size state) (1+ stack-frame-size)) + (multiple-value-bind (ptype stack-sc) + (if (alien-integer-type-signed type) + (values 'signed-byte-64 'signed-stack) + (values 'unsigned-byte-64 'unsigned-stack)) + (my-make-wired-tn ptype stack-sc stack-frame-size)))) + +(define-alien-type-method (system-area-pointer :arg-tn) (type state) + (declare (ignore type)) + (let ((stack-frame-size (arg-state-stack-frame-size state))) + (setf (arg-state-stack-frame-size state) (1+ stack-frame-size)) + (my-make-wired-tn 'system-area-pointer + 'sap-stack + stack-frame-size))) + +#!+long-float +(define-alien-type-method (long-float :arg-tn) (type state) + (declare (ignore type)) + (let ((stack-frame-size (arg-state-stack-frame-size state))) + (setf (arg-state-stack-frame-size state) (+ stack-frame-size 3)) + (my-make-wired-tn 'long-float 'long-stack stack-frame-size))) + +(define-alien-type-method (double-float :arg-tn) (type state) + (declare (ignore type)) + (let ((stack-frame-size (arg-state-stack-frame-size state))) + (setf (arg-state-stack-frame-size state) (+ stack-frame-size 2)) + (my-make-wired-tn 'double-float 'double-stack stack-frame-size))) + +(define-alien-type-method (single-float :arg-tn) (type state) + (declare (ignore type)) + (let ((stack-frame-size (arg-state-stack-frame-size state))) + (setf (arg-state-stack-frame-size state) (1+ stack-frame-size)) + (my-make-wired-tn 'single-float 'single-stack stack-frame-size))) + +(defstruct (result-state (:copier nil)) + (num-results 0)) + +(defun result-reg-offset (slot) + (ecase slot + (0 eax-offset) + (1 edx-offset))) + +(define-alien-type-method (integer :result-tn) (type state) + (let ((num-results (result-state-num-results state))) + (setf (result-state-num-results state) (1+ num-results)) + (multiple-value-bind (ptype reg-sc) + (if (alien-integer-type-signed type) + (values 'signed-byte-64 'signed-reg) + (values 'unsigned-byte-64 'unsigned-reg)) + (my-make-wired-tn ptype reg-sc (result-reg-offset num-results))))) + +(define-alien-type-method (system-area-pointer :result-tn) (type state) + (declare (ignore type)) + (let ((num-results (result-state-num-results state))) + (setf (result-state-num-results state) (1+ num-results)) + (my-make-wired-tn 'system-area-pointer 'sap-reg + (result-reg-offset num-results)))) + +#!+long-float +(define-alien-type-method (long-float :result-tn) (type state) + (declare (ignore type)) + (let ((num-results (result-state-num-results state))) + (setf (result-state-num-results state) (1+ num-results)) + (my-make-wired-tn 'long-float 'long-reg (* num-results 2)))) + +(define-alien-type-method (double-float :result-tn) (type state) + (declare (ignore type)) + (let ((num-results (result-state-num-results state))) + (setf (result-state-num-results state) (1+ num-results)) + (my-make-wired-tn 'double-float 'double-reg (* num-results 2)))) + +(define-alien-type-method (single-float :result-tn) (type state) + (declare (ignore type)) + (let ((num-results (result-state-num-results state))) + (setf (result-state-num-results state) (1+ num-results)) + (my-make-wired-tn 'single-float 'single-reg (* num-results 2)))) + +(define-alien-type-method (values :result-tn) (type state) + (let ((values (alien-values-type-values type))) + (when (> (length values) 2) + (error "Too many result values from c-call.")) + (mapcar (lambda (type) + (invoke-alien-type-method :result-tn type state)) + values))) + +(!def-vm-support-routine make-call-out-tns (type) + (let ((arg-state (make-arg-state))) + (collect ((arg-tns)) + (dolist (arg-type (alien-fun-type-arg-types type)) + (arg-tns (invoke-alien-type-method :arg-tn arg-type arg-state))) + (values (my-make-wired-tn 'positive-fixnum 'any-reg esp-offset) + (* (arg-state-stack-frame-size arg-state) n-word-bytes) + (arg-tns) + (invoke-alien-type-method :result-tn + (alien-fun-type-result-type type) + (make-result-state)))))) + + +(deftransform %alien-funcall ((function type &rest args) * * :node node) + (aver (sb!c::constant-lvar-p type)) + (let* ((type (sb!c::lvar-value type)) + (env (sb!c::node-lexenv node)) + (arg-types (alien-fun-type-arg-types type)) + (result-type (alien-fun-type-result-type type))) + (aver (= (length arg-types) (length args))) + (if (or (some #'(lambda (type) + (and (alien-integer-type-p type) + (> (sb!alien::alien-integer-type-bits type) 64))) + arg-types) + (and (alien-integer-type-p result-type) + (> (sb!alien::alien-integer-type-bits result-type) 64))) + (collect ((new-args) (lambda-vars) (new-arg-types)) + (dolist (type arg-types) + (let ((arg (gensym))) + (lambda-vars arg) + (cond ((and (alien-integer-type-p type) + (> (sb!alien::alien-integer-type-bits type) 64)) + (new-args `(logand ,arg #xffffffff)) + (new-args `(ash ,arg -64)) + (new-arg-types (parse-alien-type '(unsigned 64) env)) + (if (alien-integer-type-signed type) + (new-arg-types (parse-alien-type '(signed 64) env)) + (new-arg-types (parse-alien-type '(unsigned 64) env)))) + (t + (new-args arg) + (new-arg-types type))))) + (cond ((and (alien-integer-type-p result-type) + (> (sb!alien::alien-integer-type-bits result-type) 64)) + (let ((new-result-type + (let ((sb!alien::*values-type-okay* t)) + (parse-alien-type + (if (alien-integer-type-signed result-type) + '(values (unsigned 64) (signed 64)) + '(values (unsigned 64) (unsigned 64))) + env)))) + `(lambda (function type ,@(lambda-vars)) + (declare (ignore type)) + (multiple-value-bind (low high) + (%alien-funcall function + ',(make-alien-fun-type + :arg-types (new-arg-types) + :result-type new-result-type) + ,@(new-args)) + (logior low (ash high 64)))))) + (t + `(lambda (function type ,@(lambda-vars)) + (declare (ignore type)) + (%alien-funcall function + ',(make-alien-fun-type + :arg-types (new-arg-types) + :result-type result-type) + ,@(new-args)))))) + (sb!c::give-up-ir1-transform)))) + + + + +(define-vop (foreign-symbol-address) + (:translate foreign-symbol-address) + (:policy :fast-safe) + (:args) + (:arg-types (:constant simple-base-string)) + (:info foreign-symbol) + (:results (res :scs (sap-reg))) + (:result-types system-area-pointer) + (:generator 2 + (inst lea res (make-fixup (extern-alien-name foreign-symbol) :foreign)))) + +(define-vop (call-out) + (:args (function :scs (sap-reg)) + (args :more t)) + (:results (results :more t)) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:temporary (:sc unsigned-reg :offset ecx-offset + :from :eval :to :result) ecx) + (:temporary (:sc unsigned-reg :offset edx-offset + :from :eval :to :result) edx) + (:node-var node) + (:vop-var vop) + (:save-p t) + (:ignore args ecx edx) + (:generator 0 + (cond ((policy node (> space speed)) + (move eax function) + (inst call (make-fixup (extern-alien-name "call_into_c") :foreign))) + (t + ;; Setup the NPX for C; all the FP registers need to be + ;; empty; pop them all. + (dotimes (i 8) + (inst fstp fr0-tn)) + + (inst call function) + ;; To give the debugger a clue. XX not really internal-error? + (note-this-location vop :internal-error) + + ;; Restore the NPX for lisp; ensure no regs are empty + (dotimes (i 7) + (inst fldz)) + + (if (and results + (location= (tn-ref-tn results) fr0-tn)) + ;; The return result is in fr0. + (inst fxch fr7-tn) ; move the result back to fr0 + (inst fldz)) ; insure no regs are empty + )))) + +(define-vop (alloc-number-stack-space) + (:info amount) + (:results (result :scs (sap-reg any-reg))) + (:generator 0 + (aver (location= result rsp-tn)) + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst sub rsp-tn delta))) + (move result rsp-tn))) + +(define-vop (dealloc-number-stack-space) + (:info amount) + (:generator 0 + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst add rsp-tn delta))))) + +(define-vop (alloc-alien-stack-space) + (:info amount) + #!+sb-thread (:temporary (:sc unsigned-reg) temp) + (:results (result :scs (sap-reg any-reg))) + #!+sb-thread + (:generator 0 + (aver (not (location= result rsp-tn))) + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst mov temp + (make-ea :dword + :disp (+ nil-value + (static-symbol-offset '*alien-stack*) + (ash symbol-tls-index-slot word-shift) + (- other-pointer-lowtag)))) + (inst fs-segment-prefix) + (inst sub (make-ea :dword :scale 1 :index temp) delta))) + (load-tl-symbol-value result *alien-stack*)) + #!-sb-thread + (:generator 0 + (aver (not (location= result rsp-tn))) + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst sub (make-ea :qword + :disp (+ nil-value + (static-symbol-offset '*alien-stack*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + delta))) + (load-symbol-value result *alien-stack*))) + +(define-vop (dealloc-alien-stack-space) + (:info amount) + #!+sb-thread (:temporary (:sc unsigned-reg) temp) + #!+sb-thread + (:generator 0 + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst mov temp + (make-ea :dword + :disp (+ nil-value + (static-symbol-offset '*alien-stack*) + (ash symbol-tls-index-slot word-shift) + (- other-pointer-lowtag)))) + (inst fs-segment-prefix) + (inst add (make-ea :dword :scale 1 :index temp) delta)))) + #!-sb-thread + (:generator 0 + (unless (zerop amount) + (let ((delta (logandc2 (+ amount 3) 3))) + (inst add (make-ea :qword + :disp (+ nil-value + (static-symbol-offset '*alien-stack*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + delta))))) + +;;; these are not strictly part of the c-call convention, but are +;;; needed for the WITH-PRESERVED-POINTERS macro used for "locking +;;; down" lisp objects so that GC won't move them while foreign +;;; functions go to work. + +(define-vop (push-word-on-c-stack) + (:translate push-word-on-c-stack) + (:args (val :scs (sap-reg))) + (:policy :fast-safe) + (:arg-types system-area-pointer) + (:generator 2 + (inst push val))) + +(define-vop (pop-words-from-c-stack) + (:translate pop-words-from-c-stack) + (:args) + (:arg-types (:constant (unsigned-byte 60))) + (:info number) + (:policy :fast-safe) + (:generator 2 + (inst add rsp-tn (fixnumize number)))) + diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp new file mode 100644 index 0000000..f1ce595 --- /dev/null +++ b/src/compiler/x86-64/call.lisp @@ -0,0 +1,1358 @@ +;;;; function call for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; interfaces to IR2 conversion + +;;; Return a wired TN describing the N'th full call argument passing +;;; location. +(!def-vm-support-routine standard-arg-location (n) + (declare (type unsigned-byte n)) + (if (< n register-arg-count) + (make-wired-tn *backend-t-primitive-type* descriptor-reg-sc-number + (nth n *register-arg-offsets*)) + (make-wired-tn *backend-t-primitive-type* control-stack-sc-number n))) + +;;; Make a passing location TN for a local call return PC. +;;; +;;; Always wire the return PC location to the stack in its standard +;;; location. +(!def-vm-support-routine make-return-pc-passing-location (standard) + (declare (ignore standard)) + (make-wired-tn (primitive-type-or-lose 'system-area-pointer) + sap-stack-sc-number return-pc-save-offset)) + +;;; This is similar to MAKE-RETURN-PC-PASSING-LOCATION, but makes a +;;; location to pass OLD-FP in. +;;; +;;; This is wired in both the standard and the local-call conventions, +;;; because we want to be able to assume it's always there. Besides, +;;; the x86 doesn't have enough registers to really make it profitable +;;; to pass it in a register. +(!def-vm-support-routine make-old-fp-passing-location (standard) + (declare (ignore standard)) + (make-wired-tn *fixnum-primitive-type* control-stack-sc-number + ocfp-save-offset)) + +;;; Make the TNs used to hold OLD-FP and RETURN-PC within the current +;;; function. We treat these specially so that the debugger can find +;;; them at a known location. +;;; +;;; Without using a save-tn - which does not make much sense if it is +;;; wired to the stack? +(!def-vm-support-routine make-old-fp-save-location (physenv) + (physenv-debug-live-tn (make-wired-tn *fixnum-primitive-type* + control-stack-sc-number + ocfp-save-offset) + physenv)) +(!def-vm-support-routine make-return-pc-save-location (physenv) + (physenv-debug-live-tn + (make-wired-tn (primitive-type-or-lose 'system-area-pointer) + sap-stack-sc-number return-pc-save-offset) + physenv)) + +;;; Make a TN for the standard argument count passing location. We only +;;; need to make the standard location, since a count is never passed when we +;;; are using non-standard conventions. +(!def-vm-support-routine make-arg-count-location () + (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rcx-offset)) + +;;; Make a TN to hold the number-stack frame pointer. This is allocated +;;; once per component, and is component-live. +(!def-vm-support-routine make-nfp-tn () + (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number)) + +(!def-vm-support-routine make-stack-pointer-tn () + (make-normal-tn *fixnum-primitive-type*)) + +(!def-vm-support-routine make-number-stack-pointer-tn () + (make-restricted-tn *fixnum-primitive-type* ignore-me-sc-number)) + +;;; Return a list of TNs that can be used to represent an unknown-values +;;; continuation within a function. +(!def-vm-support-routine make-unknown-values-locations () + (list (make-stack-pointer-tn) + (make-normal-tn *fixnum-primitive-type*))) + +;;; This function is called by the ENTRY-ANALYZE phase, allowing +;;; VM-dependent initialization of the IR2-COMPONENT structure. We +;;; push placeholder entries in the CONSTANTS to leave room for +;;; additional noise in the code object header. +(!def-vm-support-routine select-component-format (component) + (declare (type component component)) + ;; The 1+ here is because for the x86 the first constant is a + ;; pointer to a list of fixups, or NIL if the code object has none. + ;; (If I understand correctly, the fixups are needed at GC copy + ;; time because the X86 code isn't relocatable.) + ;; + ;; KLUDGE: It'd be cleaner to have the fixups entry be a named + ;; element of the CODE (aka component) primitive object. However, + ;; it's currently a large, tricky, error-prone chore to change + ;; the layout of any primitive object, so for the foreseeable future + ;; we'll just live with this ugliness. -- WHN 2002-01-02 + (dotimes (i (1+ code-constants-offset)) + (vector-push-extend nil + (ir2-component-constants (component-info component)))) + (values)) + +;;;; frame hackery + +;;; This is used for setting up the Old-FP in local call. +(define-vop (current-fp) + (:results (val :scs (any-reg control-stack))) + (:generator 1 + (move val rbp-tn))) + +;;; We don't have a separate NFP, so we don't need to do anything here. +(define-vop (compute-old-nfp) + (:results (val)) + (:ignore val) + (:generator 1 + nil)) + +(define-vop (xep-allocate-frame) + (:info start-lab copy-more-arg-follows) + (:vop-var vop) + (:generator 1 + (align n-lowtag-bits) + (trace-table-entry trace-table-fun-prologue) + (emit-label start-lab) + ;; Skip space for the function header. + (inst simple-fun-header-word) + (dotimes (i (* n-word-bytes (1- simple-fun-code-offset))) + (inst byte 0)) + + ;; The start of the actual code. + ;; Save the return-pc. + (popw rbp-tn (- (1+ return-pc-save-offset))) + + ;; If copy-more-arg follows it will allocate the correct stack + ;; size. The stack is not allocated first here as this may expose + ;; args on the stack if they take up more space than the frame! + (unless copy-more-arg-follows + ;; The args fit within the frame so just allocate the frame. + (inst lea rsp-tn + (make-ea :qword :base rbp-tn + :disp (- (* n-word-bytes + (max 3 (sb-allocated-size 'stack))))))) + + (trace-table-entry trace-table-normal))) + +;;; This is emitted directly before either a known-call-local, call-local, +;;; or a multiple-call-local. All it does is allocate stack space for the +;;; callee (who has the same size stack as us). +(define-vop (allocate-frame) + (:results (res :scs (any-reg control-stack)) + (nfp)) + (:info callee) + (:ignore nfp callee) + (:generator 2 + (move res rsp-tn) + (inst sub rsp-tn (* n-word-bytes (sb-allocated-size 'stack))))) + +;;; Allocate a partial frame for passing stack arguments in a full +;;; call. NARGS is the number of arguments passed. We allocate at +;;; least 3 slots, because the XEP noise is going to want to use them +;;; before it can extend the stack. +(define-vop (allocate-full-call-frame) + (:info nargs) + (:results (res :scs (any-reg control-stack))) + (:generator 2 + (move res rsp-tn) + (inst sub rsp-tn (* (max nargs 3) n-word-bytes)))) + +;;; Emit code needed at the return-point from an unknown-values call +;;; for a fixed number of values. Values is the head of the TN-REF +;;; list for the locations that the values are to be received into. +;;; Nvals is the number of values that are to be received (should +;;; equal the length of Values). +;;; +;;; MOVE-TEMP is a DESCRIPTOR-REG TN used as a temporary. +;;; +;;; This code exploits the fact that in the unknown-values convention, +;;; a single value return returns at the return PC + 2, whereas a +;;; return of other than one value returns directly at the return PC. +;;; +;;; If 0 or 1 values are expected, then we just emit an instruction to +;;; reset the SP (which will only be executed when other than 1 value +;;; is returned.) +;;; +;;; In the general case we have to do three things: +;;; -- Default unsupplied register values. This need only be done +;;; when a single value is returned, since register values are +;;; defaulted by the called in the non-single case. +;;; -- Default unsupplied stack values. This needs to be done whenever +;;; there are stack values. +;;; -- Reset SP. This must be done whenever other than 1 value is +;;; returned, regardless of the number of values desired. +(defun default-unknown-values (vop values nvals) + (declare (type (or tn-ref null) values) + (type unsigned-byte nvals)) + (cond + ((<= nvals 1) + (note-this-location vop :single-value-return) + (inst mov rsp-tn rbx-tn)) + ((<= nvals register-arg-count) + (let ((regs-defaulted (gen-label))) + (note-this-location vop :unknown-return) + (inst jmp-short regs-defaulted) + ;; Default the unsupplied registers. + (let* ((2nd-tn-ref (tn-ref-across values)) + (2nd-tn (tn-ref-tn 2nd-tn-ref))) + (inst mov 2nd-tn nil-value) + (when (> nvals 2) + (loop + for tn-ref = (tn-ref-across 2nd-tn-ref) + then (tn-ref-across tn-ref) + for count from 2 below register-arg-count + do (inst mov (tn-ref-tn tn-ref) 2nd-tn)))) + (inst mov rbx-tn rsp-tn) + (emit-label regs-defaulted) + (inst mov rsp-tn rbx-tn))) + ((<= nvals 7) + ;; The number of bytes depends on the relative jump instructions. + ;; Best case is 31+(n-3)*14, worst case is 35+(n-3)*18. For + ;; NVALS=6 that is 73/89 bytes, and for NVALS=7 that is 87/107 + ;; bytes which is likely better than using the blt below. + (let ((regs-defaulted (gen-label)) + (defaulting-done (gen-label)) + (default-stack-slots (gen-label))) + (note-this-location vop :unknown-return) + ;; Branch off to the MV case. + (inst jmp-short regs-defaulted) + ;; Do the single value case. + ;; Default the register args + (inst mov rax-tn nil-value) + (do ((i 1 (1+ i)) + (val (tn-ref-across values) (tn-ref-across val))) + ((= i (min nvals register-arg-count))) + (inst mov (tn-ref-tn val) rax-tn)) + + ;; Fake other registers so it looks like we returned with all the + ;; registers filled in. + (move rbx-tn rsp-tn) + (inst push rdx-tn) + (inst jmp default-stack-slots) + + (emit-label regs-defaulted) + + (inst mov rax-tn nil-value) + (storew rdx-tn rbx-tn -1) + (collect ((defaults)) + (do ((i register-arg-count (1+ i)) + (val (do ((i 0 (1+ i)) + (val values (tn-ref-across val))) + ((= i register-arg-count) val)) + (tn-ref-across val))) + ((null val)) + (let ((default-lab (gen-label)) + (tn (tn-ref-tn val))) + (defaults (cons default-lab tn)) + + (inst cmp rcx-tn (fixnumize i)) + (inst jmp :be default-lab) + (loadw rdx-tn rbx-tn (- (1+ i))) + (inst mov tn rdx-tn))) + + (emit-label defaulting-done) + (loadw rdx-tn rbx-tn -1) + (move rsp-tn rbx-tn) + + (let ((defaults (defaults))) + (when defaults + (assemble (*elsewhere*) + (trace-table-entry trace-table-fun-prologue) + (emit-label default-stack-slots) + (dolist (default defaults) + (emit-label (car default)) + (inst mov (cdr default) rax-tn)) + (inst jmp defaulting-done) + (trace-table-entry trace-table-normal))))))) + (t + (let ((regs-defaulted (gen-label)) + (restore-edi (gen-label)) + (no-stack-args (gen-label)) + (default-stack-vals (gen-label)) + (count-okay (gen-label))) + (note-this-location vop :unknown-return) + ;; Branch off to the MV case. + (inst jmp-short regs-defaulted) + + ;; Default the register args, and set up the stack as if we + ;; entered the MV return point. + (inst mov rbx-tn rsp-tn) + (inst push rdx-tn) + (inst mov rdi-tn nil-value) + (inst push rdi-tn) + (inst mov rsi-tn rdi-tn) + ;; Compute a pointer to where to put the [defaulted] stack values. + (emit-label no-stack-args) + (inst lea rdi-tn + (make-ea :qword :base rbp-tn + :disp (* (- (1+ register-arg-count)) n-word-bytes))) + ;; Load RAX with NIL so we can quickly store it, and set up + ;; stuff for the loop. + (inst mov rax-tn nil-value) + (inst std) + (inst mov rcx-tn (- nvals register-arg-count)) + ;; Jump into the default loop. + (inst jmp default-stack-vals) + + ;; The regs are defaulted. We need to copy any stack arguments, + ;; and then default the remaining stack arguments. + (emit-label regs-defaulted) + ;; Save EDI. + (storew rdi-tn rbx-tn (- (1+ 1))) + ;; Compute the number of stack arguments, and if it's zero or + ;; less, don't copy any stack arguments. + (inst sub rcx-tn (fixnumize register-arg-count)) + (inst jmp :le no-stack-args) + + ;; Throw away any unwanted args. + (inst cmp rcx-tn (fixnumize (- nvals register-arg-count))) + (inst jmp :be count-okay) + (inst mov rcx-tn (fixnumize (- nvals register-arg-count))) + (emit-label count-okay) + ;; Save the number of stack values. + (inst mov rax-tn rcx-tn) + ;; Compute a pointer to where the stack args go. + (inst lea rdi-tn + (make-ea :qword :base rbp-tn + :disp (* (- (1+ register-arg-count)) n-word-bytes))) + ;; Save ESI, and compute a pointer to where the args come from. + (storew rsi-tn rbx-tn (- (1+ 2))) + (inst lea rsi-tn + (make-ea :qword :base rbx-tn + :disp (* (- (1+ register-arg-count)) n-word-bytes))) + ;; Do the copy. + (inst shr rcx-tn word-shift) ; make word count + (inst std) + (inst rep) + (inst movs :qword) + ;; Restore RSI. + (loadw rsi-tn rbx-tn (- (1+ 2))) + ;; Now we have to default the remaining args. Find out how many. + (inst sub rax-tn (fixnumize (- nvals register-arg-count))) + (inst neg rax-tn) + ;; If none, then just blow out of here. + (inst jmp :le restore-edi) + (inst mov rcx-tn rax-tn) + (inst shr rcx-tn word-shift) ; word count + ;; Load RAX with NIL for fast storing. + (inst mov rax-tn nil-value) + ;; Do the store. + (emit-label default-stack-vals) + (inst rep) + (inst stos rax-tn) + ;; Restore EDI, and reset the stack. + (emit-label restore-edi) + (loadw rdi-tn rbx-tn (- (1+ 1))) + (inst mov rsp-tn rbx-tn)))) + (values)) + +;;;; unknown values receiving + +;;; Emit code needed at the return point for an unknown-values call +;;; for an arbitrary number of values. +;;; +;;; We do the single and non-single cases with no shared code: there +;;; doesn't seem to be any potential overlap, and receiving a single +;;; value is more important efficiency-wise. +;;; +;;; When there is a single value, we just push it on the stack, +;;; returning the old SP and 1. +;;; +;;; When there is a variable number of values, we move all of the +;;; argument registers onto the stack, and return ARGS and NARGS. +;;; +;;; ARGS and NARGS are TNs wired to the named locations. We must +;;; explicitly allocate these TNs, since their lifetimes overlap with +;;; the results start and count. (Also, it's nice to be able to target +;;; them.) +(defun receive-unknown-values (args nargs start count) + (declare (type tn args nargs start count)) + (let ((variable-values (gen-label)) + (done (gen-label))) + (inst jmp-short variable-values) + + (cond ((location= start (first *register-arg-tns*)) + (inst push (first *register-arg-tns*)) + (inst lea start (make-ea :qword :base rsp-tn :disp 8))) + (t (inst mov start rsp-tn) + (inst push (first *register-arg-tns*)))) + (inst mov count (fixnumize 1)) + (inst jmp done) + + (emit-label variable-values) + ;; dtc: this writes the registers onto the stack even if they are + ;; not needed, only the number specified in rcx are used and have + ;; stack allocated to them. No harm is done. + (loop + for arg in *register-arg-tns* + for i downfrom -1 + do (storew arg args i)) + (move start args) + (move count nargs) + + (emit-label done)) + (values)) + +;;; VOP that can be inherited by unknown values receivers. The main thing this +;;; handles is allocation of the result temporaries. +(define-vop (unknown-values-receiver) + (:temporary (:sc descriptor-reg :offset rbx-offset + :from :eval :to (:result 0)) + values-start) + (:temporary (:sc any-reg :offset rcx-offset + :from :eval :to (:result 1)) + nvals) + (:results (start :scs (any-reg control-stack)) + (count :scs (any-reg control-stack)))) + +;;;; local call with unknown values convention return + +;;; Non-TR local call for a fixed number of values passed according to +;;; the unknown values convention. +;;; +;;; FP is the frame pointer in install before doing the call. +;;; +;;; NFP would be the number-stack frame pointer if we had a separate +;;; number stack. +;;; +;;; Args are the argument passing locations, which are specified only +;;; to terminate their lifetimes in the caller. +;;; +;;; VALUES are the return value locations (wired to the standard +;;; passing locations). NVALS is the number of values received. +;;; +;;; Save is the save info, which we can ignore since saving has been +;;; done. +;;; +;;; TARGET is a continuation pointing to the start of the called +;;; function. +(define-vop (call-local) + (:args (fp) + (nfp) + (args :more t)) + (:results (values :more t)) + (:save-p t) + (:move-args :local-call) + (:info arg-locs callee target nvals) + (:vop-var vop) + (:ignore nfp arg-locs args #+nil callee) + (:generator 5 + (trace-table-entry trace-table-call-site) + (move rbp-tn fp) + + (let ((ret-tn (callee-return-pc-tn callee))) + #+nil + (format t "*call-local ~S; tn-kind ~S; tn-save-tn ~S; its tn-kind ~S~%" + ret-tn (sb!c::tn-kind ret-tn) (sb!c::tn-save-tn ret-tn) + (sb!c::tn-kind (sb!c::tn-save-tn ret-tn))) + + ;; Is the return-pc on the stack or in a register? + (sc-case ret-tn + ((sap-stack) + #+nil (format t "*call-local: ret-tn on stack; offset=~S~%" + (tn-offset ret-tn)) + (storew (make-fixup nil :code-object return) + rbp-tn (- (1+ (tn-offset ret-tn))))) + ((sap-reg) + (inst lea ret-tn (make-fixup nil :code-object return))))) + + (note-this-location vop :call-site) + (inst jmp target) + RETURN + (default-unknown-values vop values nvals) + (trace-table-entry trace-table-normal))) + +;;; Non-TR local call for a variable number of return values passed according +;;; to the unknown values convention. The results are the start of the values +;;; glob and the number of values received. +(define-vop (multiple-call-local unknown-values-receiver) + (:args (fp) + (nfp) + (args :more t)) + (:save-p t) + (:move-args :local-call) + (:info save callee target) + (:ignore args save nfp #+nil callee) + (:vop-var vop) + (:generator 20 + (trace-table-entry trace-table-call-site) + (move rbp-tn fp) + + (let ((ret-tn (callee-return-pc-tn callee))) + #+nil + (format t "*multiple-call-local ~S; tn-kind ~S; tn-save-tn ~S; its tn-kind ~S~%" + ret-tn (sb!c::tn-kind ret-tn) (sb!c::tn-save-tn ret-tn) + (sb!c::tn-kind (sb!c::tn-save-tn ret-tn))) + + ;; Is the return-pc on the stack or in a register? + (sc-case ret-tn + ((sap-stack) + #+nil (format t "*multiple-call-local: ret-tn on stack; offset=~S~%" + (tn-offset ret-tn)) + ;; Stack + (storew (make-fixup nil :code-object return) + rbp-tn (- (1+ (tn-offset ret-tn))))) + ((sap-reg) + ;; Register + (inst lea ret-tn (make-fixup nil :code-object return))))) + + (note-this-location vop :call-site) + (inst jmp target) + RETURN + (note-this-location vop :unknown-return) + (receive-unknown-values values-start nvals start count) + (trace-table-entry trace-table-normal))) + +;;;; local call with known values return + +;;; Non-TR local call with known return locations. Known-value return +;;; works just like argument passing in local call. +;;; +;;; Note: we can't use normal load-tn allocation for the fixed args, +;;; since all registers may be tied up by the more operand. Instead, +;;; we use MAYBE-LOAD-STACK-TN. +(define-vop (known-call-local) + (:args (fp) + (nfp) + (args :more t)) + (:results (res :more t)) + (:move-args :local-call) + (:save-p t) + (:info save callee target) + (:ignore args res save nfp #+nil callee) + (:vop-var vop) + (:generator 5 + (trace-table-entry trace-table-call-site) + (move rbp-tn fp) + + (let ((ret-tn (callee-return-pc-tn callee))) + + #+nil + (format t "*known-call-local ~S; tn-kind ~S; tn-save-tn ~S; its tn-kind ~S~%" + ret-tn (sb!c::tn-kind ret-tn) (sb!c::tn-save-tn ret-tn) + (sb!c::tn-kind (sb!c::tn-save-tn ret-tn))) + + ;; Is the return-pc on the stack or in a register? + (sc-case ret-tn + ((sap-stack) + #+nil (format t "*known-call-local: ret-tn on stack; offset=~S~%" + (tn-offset ret-tn)) + ;; Stack + (storew (make-fixup nil :code-object return) + rbp-tn (- (1+ (tn-offset ret-tn))))) + ((sap-reg) + ;; Register + (inst lea ret-tn (make-fixup nil :code-object return))))) + + (note-this-location vop :call-site) + (inst jmp target) + RETURN + (note-this-location vop :known-return) + (trace-table-entry trace-table-normal))) + +;;; Return from known values call. We receive the return locations as +;;; arguments to terminate their lifetimes in the returning function. We +;;; restore FP and CSP and jump to the Return-PC. +;;; +;;; We can assume we know exactly where old-fp and return-pc are because +;;; make-old-fp-save-location and make-return-pc-save-location always +;;; return the same place. +#+nil +(define-vop (known-return) + (:args (old-fp) + (return-pc :scs (any-reg immediate-stack) :target rpc) + (vals :more t)) + (:move-args :known-return) + (:info val-locs) + (:temporary (:sc unsigned-reg :from (:argument 1)) rpc) + (:ignore val-locs vals) + (:vop-var vop) + (:generator 6 + (trace-table-entry trace-table-fun-epilogue) + ;; Save the return-pc in a register 'cause the frame-pointer is + ;; going away. Note this not in the usual stack location so we + ;; can't use RET + (move rpc return-pc) + ;; Restore the stack. + (move rsp-tn rbp-tn) + ;; Restore the old fp. We know OLD-FP is going to be in its stack + ;; save slot, which is a different frame that than this one, + ;; so we don't have to worry about having just cleared + ;; most of the stack. + (move rbp-tn old-fp) + (inst jmp rpc) + (trace-table-entry trace-table-normal))) + +;;; From Douglas Crosher +;;; Return from known values call. We receive the return locations as +;;; arguments to terminate their lifetimes in the returning function. We +;;; restore FP and CSP and jump to the Return-PC. +;;; +;;; The old-fp may be either in a register or on the stack in its +;;; standard save locations - slot 0. +;;; +;;; The return-pc may be in a register or on the stack in any slot. +(define-vop (known-return) + (:args (old-fp) + (return-pc) + (vals :more t)) + (:move-args :known-return) + (:info val-locs) + (:ignore val-locs vals) + (:vop-var vop) + (:generator 6 + (trace-table-entry trace-table-fun-epilogue) + ;; return-pc may be either in a register or on the stack. + (sc-case return-pc + ((sap-reg) + (sc-case old-fp + ((control-stack) + (cond ((zerop (tn-offset old-fp)) + ;; Zot all of the stack except for the old-fp. + (inst lea rsp-tn (make-ea :qword :base rbp-tn + :disp (- (* (1+ ocfp-save-offset) + n-word-bytes)))) + ;; Restore the old fp from its save location on the stack, + ;; and zot the stack. + (inst pop rbp-tn)) + + (t + (cerror "Continue anyway" + "VOP return-local doesn't work if old-fp (in slot ~ + ~S) is not in slot 0" + (tn-offset old-fp))))) + + ((any-reg descriptor-reg) + ;; Zot all the stack. + (move rsp-tn rbp-tn) + ;; Restore the old-fp. + (move rbp-tn old-fp))) + + ;; Return; return-pc is in a register. + (inst jmp return-pc)) + + ((sap-stack) + (inst lea rsp-tn + (make-ea :qword :base rbp-tn + :disp (- (* (1+ (tn-offset return-pc)) n-word-bytes)))) + (move rbp-tn old-fp) + (inst ret (* (tn-offset return-pc) n-word-bytes)))) + + (trace-table-entry trace-table-normal))) + +;;;; full call +;;; +;;; There is something of a cross-product effect with full calls. +;;; Different versions are used depending on whether we know the +;;; number of arguments or the name of the called function, and +;;; whether we want fixed values, unknown values, or a tail call. +;;; +;;; In full call, the arguments are passed creating a partial frame on +;;; the stack top and storing stack arguments into that frame. On +;;; entry to the callee, this partial frame is pointed to by FP. + +;;; This macro helps in the definition of full call VOPs by avoiding +;;; code replication in defining the cross-product VOPs. +;;; +;;; NAME is the name of the VOP to define. +;;; +;;; NAMED is true if the first argument is an fdefinition object whose +;;; definition is to be called. +;;; +;;; RETURN is either :FIXED, :UNKNOWN or :TAIL: +;;; -- If :FIXED, then the call is for a fixed number of values, returned in +;;; the standard passing locations (passed as result operands). +;;; -- If :UNKNOWN, then the result values are pushed on the stack, and the +;;; result values are specified by the Start and Count as in the +;;; unknown-values continuation representation. +;;; -- If :TAIL, then do a tail-recursive call. No values are returned. +;;; The Old-Fp and Return-PC are passed as the second and third arguments. +;;; +;;; In non-tail calls, the pointer to the stack arguments is passed as +;;; the last fixed argument. If Variable is false, then the passing +;;; locations are passed as a more arg. Variable is true if there are +;;; a variable number of arguments passed on the stack. Variable +;;; cannot be specified with :TAIL return. TR variable argument call +;;; is implemented separately. +;;; +;;; In tail call with fixed arguments, the passing locations are +;;; passed as a more arg, but there is no new-FP, since the arguments +;;; have been set up in the current frame. +(macrolet ((define-full-call (name named return variable) + (aver (not (and variable (eq return :tail)))) + `(define-vop (,name + ,@(when (eq return :unknown) + '(unknown-values-receiver))) + (:args + ,@(unless (eq return :tail) + '((new-fp :scs (any-reg) :to (:argument 1)))) + + (fun :scs (descriptor-reg control-stack) + :target rax :to (:argument 0)) + + ,@(when (eq return :tail) + '((old-fp) + (return-pc))) + + ,@(unless variable '((args :more t :scs (descriptor-reg))))) + + ,@(when (eq return :fixed) + '((:results (values :more t)))) + + (:save-p ,(if (eq return :tail) :compute-only t)) + + ,@(unless (or (eq return :tail) variable) + '((:move-args :full-call))) + + (:vop-var vop) + (:info + ,@(unless (or variable (eq return :tail)) '(arg-locs)) + ,@(unless variable '(nargs)) + ,@(when (eq return :fixed) '(nvals))) + + (:ignore + ,@(unless (or variable (eq return :tail)) '(arg-locs)) + ,@(unless variable '(args))) + + ;; We pass either the fdefn object (for named call) or + ;; the actual function object (for unnamed call) in + ;; RAX. With named call, closure-tramp will replace it + ;; with the real function and invoke the real function + ;; for closures. Non-closures do not need this value, + ;; so don't care what shows up in it. + (:temporary + (:sc descriptor-reg + :offset rax-offset + :from (:argument 0) + :to :eval) + rax) + + ;; We pass the number of arguments in RCX. + (:temporary (:sc unsigned-reg :offset rcx-offset :to :eval) rcx) + + ;; With variable call, we have to load the + ;; register-args out of the (new) stack frame before + ;; doing the call. Therefore, we have to tell the + ;; lifetime stuff that we need to use them. + ,@(when variable + (mapcar (lambda (name offset) + `(:temporary (:sc descriptor-reg + :offset ,offset + :from (:argument 0) + :to :eval) + ,name)) + *register-arg-names* *register-arg-offsets*)) + + ,@(when (eq return :tail) + '((:temporary (:sc unsigned-reg + :from (:argument 1) + :to (:argument 2)) + old-fp-tmp))) + + (:generator ,(+ (if named 5 0) + (if variable 19 1) + (if (eq return :tail) 0 10) + 15 + (if (eq return :unknown) 25 0)) + (trace-table-entry trace-table-call-site) + + ;; This has to be done before the frame pointer is + ;; changed! RAX stores the 'lexical environment' needed + ;; for closures. + (move rax fun) + + + ,@(if variable + ;; For variable call, compute the number of + ;; arguments and move some of the arguments to + ;; registers. + (collect ((noise)) + ;; Compute the number of arguments. + (noise '(inst mov rcx new-fp)) + (noise '(inst sub rcx rsp-tn)) + ;; Move the necessary args to registers, + ;; this moves them all even if they are + ;; not all needed. + (loop + for name in *register-arg-names* + for index downfrom -1 + do (noise `(loadw ,name new-fp ,index))) + (noise)) + '((if (zerop nargs) + (inst xor rcx rcx) + (inst mov rcx (fixnumize nargs))))) + ,@(cond ((eq return :tail) + '(;; Python has figured out what frame we should + ;; return to so might as well use that clue. + ;; This seems really important to the + ;; implementation of things like + ;; (without-interrupts ...) + ;; + ;; dtc; Could be doing a tail call from a + ;; known-local-call etc in which the old-fp + ;; or ret-pc are in regs or in non-standard + ;; places. If the passing location were + ;; wired to the stack in standard locations + ;; then these moves will be un-necessary; + ;; this is probably best for the x86. + (sc-case old-fp + ((control-stack) + (unless (= ocfp-save-offset + (tn-offset old-fp)) + ;; FIXME: FORMAT T for stale + ;; diagnostic output (several of + ;; them around here), ick + (format t "** tail-call old-fp not S0~%") + (move old-fp-tmp old-fp) + (storew old-fp-tmp + rbp-tn + (- (1+ ocfp-save-offset))))) + ((any-reg descriptor-reg) + (format t "** tail-call old-fp in reg not S0~%") + (storew old-fp + rbp-tn + (- (1+ ocfp-save-offset))))) + + ;; For tail call, we have to push the + ;; return-pc so that it looks like we CALLed + ;; drspite the fact that we are going to JMP. + (inst push return-pc) + )) + (t + ;; For non-tail call, we have to save our + ;; frame pointer and install the new frame + ;; pointer. We can't load stack tns after this + ;; point. + `(;; Python doesn't seem to allocate a frame + ;; here which doesn't leave room for the + ;; ofp/ret stuff. + + ;; The variable args are on the stack and + ;; become the frame, but there may be <3 + ;; args and 3 stack slots are assumed + ;; allocate on the call. So need to ensure + ;; there are at least 3 slots. This hack + ;; just adds 3 more. + ,(if variable + '(inst sub rsp-tn (fixnumize 3))) + + ;; Save the fp + (storew rbp-tn new-fp (- (1+ ocfp-save-offset))) + + (move rbp-tn new-fp) ; NB - now on new stack frame. + ))) + + (note-this-location vop :call-site) + + (inst ,(if (eq return :tail) 'jmp 'call) + (make-ea :qword :base rax + :disp ,(if named + '(- (* fdefn-raw-addr-slot + n-word-bytes) + other-pointer-lowtag) + '(- (* closure-fun-slot n-word-bytes) + fun-pointer-lowtag)))) + ,@(ecase return + (:fixed + '((default-unknown-values vop values nvals))) + (:unknown + '((note-this-location vop :unknown-return) + (receive-unknown-values values-start nvals start count))) + (:tail)) + (trace-table-entry trace-table-normal))))) + + (define-full-call call nil :fixed nil) + (define-full-call call-named t :fixed nil) + (define-full-call multiple-call nil :unknown nil) + (define-full-call multiple-call-named t :unknown nil) + (define-full-call tail-call nil :tail nil) + (define-full-call tail-call-named t :tail nil) + + (define-full-call call-variable nil :fixed t) + (define-full-call multiple-call-variable nil :unknown t)) + +;;; This is defined separately, since it needs special code that BLT's +;;; the arguments down. All the real work is done in the assembly +;;; routine. We just set things up so that it can find what it needs. +(define-vop (tail-call-variable) + (:args (args :scs (any-reg control-stack) :target rsi) + (function :scs (descriptor-reg control-stack) :target rax) + (old-fp) + (ret-addr)) + (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 0)) rsi) + (:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 1)) rax) +; (:ignore ret-addr old-fp) + (:generator 75 + ;; Move these into the passing locations if they are not already there. + (move rsi args) + (move rax function) + + ;; The following assumes that the return-pc and old-fp are on the + ;; stack in their standard save locations - Check this. + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "tail-call-variable: ocfp not on stack in standard save location?")) + (unless (and (sc-is ret-addr sap-stack) + (= (tn-offset ret-addr) return-pc-save-offset)) + (error "tail-call-variable: ret-addr not on stack in standard save location?")) + + + ;; And jump to the assembly routine. + (inst jmp (make-fixup 'tail-call-variable :assembly-routine)))) + +;;;; unknown values return + +;;; Return a single-value using the Unknown-Values convention. Specifically, +;;; we jump to clear the stack and jump to return-pc+3. +;;; +;;; We require old-fp to be in a register, because we want to reset RSP before +;;; restoring RBP. If old-fp were still on the stack, it could get clobbered +;;; by a signal. +;;; +;;; pfw--get wired-tn conflicts sometimes if register sc specd for args +;;; having problems targeting args to regs -- using temps instead. +(define-vop (return-single) + (:args (old-fp) + (return-pc) + (value)) + (:temporary (:sc unsigned-reg) ofp) + (:temporary (:sc unsigned-reg) ret) + (:ignore value) + (:generator 6 + (trace-table-entry trace-table-fun-epilogue) + (move ret return-pc) + ;; Clear the control stack + (move ofp old-fp) + ;; Adjust the return address for the single value return. + (inst add ret 3) + ;; Restore the frame pointer. + (move rsp-tn rbp-tn) + (move rbp-tn ofp) + ;; Out of here. + (inst jmp ret))) + +;;; Do unknown-values return of a fixed (other than 1) number of +;;; values. The VALUES are required to be set up in the standard +;;; passing locations. NVALS is the number of values returned. +;;; +;;; Basically, we just load RCX with the number of values returned and +;;; RBX with a pointer to the values, set RSP to point to the end of +;;; the values, and jump directly to return-pc. +(define-vop (return) + (:args (old-fp) + (return-pc :to (:eval 1)) + (values :more t)) + (:ignore values) + (:info nvals) + + ;; In the case of other than one value, we need these registers to + ;; tell the caller where they are and how many there are. + (:temporary (:sc unsigned-reg :offset rbx-offset) rbx) + (:temporary (:sc unsigned-reg :offset rcx-offset) rcx) + + ;; We need to stretch the lifetime of return-pc past the argument + ;; registers so that we can default the argument registers without + ;; trashing return-pc. + (:temporary (:sc unsigned-reg :offset (first *register-arg-offsets*) + :from :eval) a0) + (:temporary (:sc unsigned-reg :offset (second *register-arg-offsets*) + :from :eval) a1) + (:temporary (:sc unsigned-reg :offset (third *register-arg-offsets*) + :from :eval) a2) + + (:generator 6 + (trace-table-entry trace-table-fun-epilogue) + ;; Establish the values pointer and values count. + (move rbx rbp-tn) + (if (zerop nvals) + (inst xor rcx rcx) ; smaller + (inst mov rcx (fixnumize nvals))) + ;; Restore the frame pointer. + (move rbp-tn old-fp) + ;; Clear as much of the stack as possible, but not past the return + ;; address. + (inst lea rsp-tn (make-ea :qword :base rbx + :disp (- (* (max nvals 2) n-word-bytes)))) + ;; Pre-default any argument register that need it. + (when (< nvals register-arg-count) + (let* ((arg-tns (nthcdr nvals (list a0 a1 a2))) + (first (first arg-tns))) + (inst mov first nil-value) + (dolist (tn (cdr arg-tns)) + (inst mov tn first)))) + ;; And away we go. Except that return-pc is still on the + ;; stack and we've changed the stack pointer. So we have to + ;; tell it to index off of RBX instead of RBP. + (cond ((zerop nvals) + ;; Return popping the return address and the OCFP. + (inst ret n-word-bytes)) + ((= nvals 1) + ;; Return popping the return, leaving 1 slot. Can this + ;; happen, or is a single value return handled elsewhere? + (inst ret)) + (t + (inst jmp (make-ea :qword :base rbx + :disp (- (* (1+ (tn-offset return-pc)) + n-word-bytes)))))) + + (trace-table-entry trace-table-normal))) + +;;; Do unknown-values return of an arbitrary number of values (passed +;;; on the stack.) We check for the common case of a single return +;;; value, and do that inline using the normal single value return +;;; convention. Otherwise, we branch off to code that calls an +;;; assembly-routine. +;;; +;;; The assembly routine takes the following args: +;;; RAX -- the return-pc to finally jump to. +;;; RBX -- pointer to where to put the values. +;;; RCX -- number of values to find there. +;;; RSI -- pointer to where to find the values. +(define-vop (return-multiple) + (:args (old-fp :to (:eval 1) :target old-fp-temp) + (return-pc :target rax) + (vals :scs (any-reg) :target rsi) + (nvals :scs (any-reg) :target rcx)) + + (:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 1)) rax) + (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 2)) rsi) + (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 3)) rcx) + (:temporary (:sc unsigned-reg :offset rbx-offset :from (:eval 0)) rbx) + (:temporary (:sc descriptor-reg :offset (first *register-arg-offsets*) + :from (:eval 0)) a0) + (:temporary (:sc unsigned-reg :from (:eval 1)) old-fp-temp) + (:node-var node) + + (:generator 13 + (trace-table-entry trace-table-fun-epilogue) + ;; Load the return-pc. + (move rax return-pc) + (unless (policy node (> space speed)) + ;; Check for the single case. + (let ((not-single (gen-label))) + (inst cmp nvals (fixnumize 1)) + (inst jmp :ne not-single) + + ;; Return with one value. + (loadw a0 vals -1) + ;; Clear the stack. We load old-fp into a register before clearing + ;; the stack. + (move old-fp-temp old-fp) + (move rsp-tn rbp-tn) + (move rbp-tn old-fp-temp) + ;; Fix the return-pc to point at the single-value entry point. + (inst add rax 3) ; skip "mov %rbx,%rsp" insn in caller + ;; Out of here. + (inst jmp rax) + + ;; Nope, not the single case. Jump to the assembly routine. + (emit-label not-single))) + (move rsi vals) + (move rcx nvals) + (move rbx rbp-tn) + (move rbp-tn old-fp) + (inst jmp (make-fixup 'return-multiple :assembly-routine)) + (trace-table-entry trace-table-normal))) + +;;;; XEP hackery + +;;; We don't need to do anything special for regular functions. +(define-vop (setup-environment) + (:info label) + (:ignore label) + (:generator 0 + ;; Don't bother doing anything. + nil)) + +;;; Get the lexical environment from its passing location. +(define-vop (setup-closure-environment) + (:results (closure :scs (descriptor-reg))) + (:info label) + (:ignore label) + (:generator 6 + ;; Get result. + (move closure rax-tn))) + +;;; Copy a &MORE arg from the argument area to the end of the current +;;; frame. FIXED is the number of non-&MORE arguments. +;;; +;;; The tricky part is doing this without trashing any of the calling +;;; convention registers that are still needed. This vop is emitted +;;; directly after the xep-allocate frame. That means the registers +;;; are in use as follows: +;;; +;;; RAX -- The lexenv. +;;; RBX -- Available. +;;; RCX -- The total number of arguments. +;;; RDX -- The first arg. +;;; RDI -- The second arg. +;;; RSI -- The third arg. +;;; +;;; So basically, we have one register available for our use: RBX. +;;; +;;; What we can do is push the other regs onto the stack, and then +;;; restore their values by looking directly below where we put the +;;; more-args. +(define-vop (copy-more-arg) + (:info fixed) + (:generator 20 + ;; Avoid the copy if there are no more args. + (cond ((zerop fixed) + (inst jecxz just-alloc-frame)) + (t + (inst cmp rcx-tn (fixnumize fixed)) + (inst jmp :be just-alloc-frame))) + + ;; Allocate the space on the stack. + ;; stack = rbp - (max 3 frame-size) - (nargs - fixed) + (inst lea rbx-tn + (make-ea :qword :base rbp-tn + :disp (- (fixnumize fixed) + (* n-word-bytes + (max 3 (sb-allocated-size 'stack)))))) + (inst sub rbx-tn rcx-tn) ; Got the new stack in rbx + (inst mov rsp-tn rbx-tn) + + ;; Now: nargs>=1 && nargs>fixed + + ;; Save the original count of args. + (inst mov rbx-tn rcx-tn) + + (cond ((< fixed register-arg-count) + ;; We must stop when we run out of stack args, not when we + ;; run out of more args. + ;; Number to copy = nargs-3 + (inst sub rcx-tn (fixnumize register-arg-count)) + ;; Everything of interest in registers. + (inst jmp :be do-regs)) + (t + ;; Number to copy = nargs-fixed + (inst sub rcx-tn (fixnumize fixed)))) + + ;; Save rdi and rsi register args. + (inst push rdi-tn) + (inst push rsi-tn) + ;; Okay, we have pushed the register args. We can trash them + ;; now. + + ;; Initialize dst to be end of stack; skiping the values pushed + ;; above. + (inst lea rdi-tn (make-ea :qword :base rsp-tn :disp 16)) + + ;; Initialize src to be end of args. + (inst mov rsi-tn rbp-tn) + (inst sub rsi-tn rbx-tn) + + (inst shr rcx-tn word-shift) ; make word count + ;; And copy the args. + (inst cld) ; auto-inc RSI and RDI. + (inst rep) + (inst movs :qword) + + ;; So now we need to restore RDI and RSI. + (inst pop rsi-tn) + (inst pop rdi-tn) + + DO-REGS + + ;; Restore RCX + (inst mov rcx-tn rbx-tn) + + ;; Here: nargs>=1 && nargs>fixed + (when (< fixed register-arg-count) + ;; Now we have to deposit any more args that showed up in + ;; registers. + (do ((i fixed)) + ( nil ) + ;; Store it relative to rbp + (inst mov (make-ea :qword :base rbp-tn + :disp (- (* n-word-bytes + (+ 1 (- i fixed) + (max 3 (sb-allocated-size 'stack)))))) + (nth i *register-arg-tns*)) + + (incf i) + (when (>= i register-arg-count) + (return)) + + ;; Don't deposit any more than there are. + (if (zerop i) + (inst test rcx-tn rcx-tn) + (inst cmp rcx-tn (fixnumize i))) + (inst jmp :eq done))) + + (inst jmp done) + + JUST-ALLOC-FRAME + (inst lea rsp-tn + (make-ea :qword :base rbp-tn + :disp (- (* n-word-bytes + (max 3 (sb-allocated-size 'stack)))))) + + DONE)) + +;;; &MORE args are stored contiguously on the stack, starting +;;; immediately at the context pointer. The context pointer is not +;;; typed, so the lowtag is 0. +(define-vop (more-arg) + (:translate %more-arg) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to :result) + (index :scs (any-reg) :target temp)) + (:arg-types * tagged-num) + (:temporary (:sc unsigned-reg :from (:argument 1) :to :result) temp) + (:results (value :scs (any-reg descriptor-reg))) + (:result-types *) + (:generator 5 + (move temp index) + (inst neg temp) + (inst mov value (make-ea :qword :base object :index temp)))) + +(define-vop (more-arg-c) + (:translate %more-arg) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types * (:constant (signed-byte 30))) + (:results (value :scs (any-reg descriptor-reg))) + (:result-types *) + (:generator 4 + (inst mov value + (make-ea :qword :base object :disp (- (* index n-word-bytes)))))) + + +;;; Turn more arg (context, count) into a list. +(define-vop (listify-rest-args) + (:translate %listify-rest-args) + (:policy :safe) + (:args (context :scs (descriptor-reg) :target src) + (count :scs (any-reg) :target rcx)) + (:arg-types * tagged-num) + (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 0)) src) + (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 1)) rcx) + (:temporary (:sc unsigned-reg :offset rax-offset) rax) + (:temporary (:sc unsigned-reg) dst) + (:results (result :scs (descriptor-reg))) + (:node-var node) + (:generator 20 + (let ((enter (gen-label)) + (loop (gen-label)) + (done (gen-label))) + (move src context) + (move rcx count) + ;; Check to see whether there are no args, and just return NIL if so. + (inst mov result nil-value) + (inst jecxz done) + (inst lea dst (make-ea :qword :index rcx :scale 2)) + (pseudo-atomic + (allocation dst dst node) + (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) + ;; Convert the count into a raw value, so that we can use the + ;; LOOP instruction. + (inst shr rcx (1- n-word-bytes)) + ;; Set decrement mode (successive args at lower addresses) + (inst std) + ;; Set up the result. + (move result dst) + ;; Jump into the middle of the loop, 'cause that's where we want + ;; to start. + (inst jmp enter) + (emit-label loop) + ;; Compute a pointer to the next cons. + (inst add dst (* cons-size n-word-bytes)) + ;; Store a pointer to this cons in the CDR of the previous cons. + (storew dst dst -1 list-pointer-lowtag) + (emit-label enter) + ;; Grab one value and stash it in the car of this cons. + (inst lods rax) + (storew rax dst 0 list-pointer-lowtag) + ;; Go back for more. + (inst loop loop) + ;; NIL out the last cons. + (storew nil-value dst 1 list-pointer-lowtag)) + (emit-label done)))) + +;;; Return the location and size of the &MORE arg glob created by +;;; COPY-MORE-ARG. SUPPLIED is the total number of arguments supplied +;;; (originally passed in RCX). FIXED is the number of non-rest +;;; arguments. +;;; +;;; We must duplicate some of the work done by COPY-MORE-ARG, since at +;;; that time the environment is in a pretty brain-damaged state, +;;; preventing this info from being returned as values. What we do is +;;; compute supplied - fixed, and return a pointer that many words +;;; below the current stack top. +(define-vop (more-arg-context) + (:policy :fast-safe) + (:translate sb!c::%more-arg-context) + (:args (supplied :scs (any-reg) :target count)) + (:arg-types positive-fixnum (:constant fixnum)) + (:info fixed) + (:results (context :scs (descriptor-reg)) + (count :scs (any-reg))) + (:result-types t tagged-num) + (:note "more-arg-context") + (:generator 5 + (move count supplied) + ;; SP at this point points at the last arg pushed. + ;; Point to the first more-arg, not above it. + (inst lea context (make-ea :qword :base rsp-tn + :index count :scale 1 + :disp (- (+ (fixnumize fixed) n-word-bytes)))) + (unless (zerop fixed) + (inst sub count (fixnumize fixed))))) + +;;; Signal wrong argument count error if NARGS isn't equal to COUNT. +(define-vop (verify-arg-count) + (:policy :fast-safe) + (:translate sb!c::%verify-arg-count) + (:args (nargs :scs (any-reg))) + (:arg-types positive-fixnum (:constant t)) + (:info count) + (:vop-var vop) + (:save-p :compute-only) + (:generator 3 + (let ((err-lab + (generate-error-code vop invalid-arg-count-error nargs))) + (if (zerop count) + (inst test nargs nargs) ; smaller instruction + (inst cmp nargs (fixnumize count))) + (inst jmp :ne err-lab)))) + +;;; Various other error signallers. +(macrolet ((def (name error translate &rest args) + `(define-vop (,name) + ,@(when translate + `((:policy :fast-safe) + (:translate ,translate))) + (:args ,@(mapcar (lambda (arg) + `(,arg :scs (any-reg descriptor-reg))) + args)) + (:vop-var vop) + (:save-p :compute-only) + (:generator 1000 + (error-call vop ,error ,@args))))) + (def arg-count-error invalid-arg-count-error + sb!c::%arg-count-error nargs) + (def type-check-error object-not-type-error sb!c::%type-check-error + object type) + (def layout-invalid-error layout-invalid-error sb!c::%layout-invalid-error + object layout) + (def odd-key-args-error odd-key-args-error + sb!c::%odd-key-args-error) + (def unknown-key-arg-error unknown-key-arg-error + sb!c::%unknown-key-arg-error key) + (def nil-fun-returned-error nil-fun-returned-error nil fun)) diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp new file mode 100644 index 0000000..a6290c8 --- /dev/null +++ b/src/compiler/x86-64/cell.lisp @@ -0,0 +1,491 @@ +;;;; various primitive memory access VOPs for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; data object ref/set stuff + +(define-vop (slot) + (:args (object :scs (descriptor-reg))) + (:info name offset lowtag) + (:ignore name) + (:results (result :scs (descriptor-reg any-reg))) + (:generator 1 + (loadw result object offset lowtag))) + +(define-vop (set-slot) + (:args (object :scs (descriptor-reg)) + (value :scs (descriptor-reg any-reg immediate))) + (:info name offset lowtag) + (:ignore name) + (:results) + (:generator 1 + (if (sc-is value immediate) + (let ((val (tn-value value))) + (etypecase val + (integer + (inst mov + (make-ea :dword :base object + :disp (- (* offset n-word-bytes) lowtag)) + (fixnumize val))) + (symbol + (inst mov + (make-ea :dword :base object + :disp (- (* offset n-word-bytes) lowtag)) + (+ nil-value (static-symbol-offset val)))) + (character + (inst mov + (make-ea :dword :base object + :disp (- (* offset n-word-bytes) lowtag)) + (logior (ash (char-code val) n-widetag-bits) + base-char-widetag))))) + ;; Else, value not immediate. + (storew value object offset lowtag)))) + + + +;;;; symbol hacking VOPs + +;;; these next two cf the sparc version, by jrd. +;;; FIXME: Deref this ^ reference. + + +;;; The compiler likes to be able to directly SET symbols. +#!+sb-thread +(define-vop (set) + (:args (symbol :scs (descriptor-reg)) + (value :scs (descriptor-reg any-reg))) + (:temporary (:sc descriptor-reg) tls) + ;;(:policy :fast-safe) + (:generator 4 + (let ((global-val (gen-label)) + (done (gen-label))) + (loadw tls symbol symbol-tls-index-slot other-pointer-lowtag) + (inst or tls tls) + (inst jmp :z global-val) + (inst fs-segment-prefix) + (inst cmp (make-ea :dword :scale 1 :index tls) unbound-marker-widetag) + (inst jmp :z global-val) + (inst fs-segment-prefix) + (inst mov (make-ea :dword :scale 1 :index tls) value) + (inst jmp done) + (emit-label global-val) + (storew value symbol symbol-value-slot other-pointer-lowtag) + (emit-label done)))) + +;; unithreaded it's a lot simpler ... +#!-sb-thread +(define-vop (set cell-set) + (:variant symbol-value-slot other-pointer-lowtag)) + +;;; Do a cell ref with an error check for being unbound. +;;; XXX stil used? I can't see where -dan +(define-vop (checked-cell-ref) + (:args (object :scs (descriptor-reg) :target obj-temp)) + (:results (value :scs (descriptor-reg any-reg))) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:temporary (:sc descriptor-reg :from (:argument 0)) obj-temp)) + +;;; With Symbol-Value, we check that the value isn't the trap object. So +;;; Symbol-Value of NIL is NIL. +#!+sb-thread +(define-vop (symbol-value) + (:translate symbol-value) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:result 1))) + (:results (value :scs (descriptor-reg any-reg))) + (:vop-var vop) + (:save-p :compute-only) + (:generator 9 + (let* ((err-lab (generate-error-code vop unbound-symbol-error object)) + (ret-lab (gen-label))) + (loadw value object symbol-tls-index-slot other-pointer-lowtag) + (inst fs-segment-prefix) + (inst mov value (make-ea :dword :index value :scale 1)) + (inst cmp value unbound-marker-widetag) + (inst jmp :ne ret-lab) + (loadw value object symbol-value-slot other-pointer-lowtag) + (inst cmp value unbound-marker-widetag) + (inst jmp :e err-lab) + (emit-label ret-lab)))) + +#!+sb-thread +(define-vop (fast-symbol-value symbol-value) + ;; KLUDGE: not really fast, in fact, because we're going to have to + ;; do a full lookup of the thread-local area anyway. But half of + ;; the meaning of FAST-SYMBOL-VALUE is "do not signal an error if + ;; unbound", which is used in the implementation of COPY-SYMBOL. -- + ;; CSR, 2003-04-22 + (:policy :fast) + (:translate symbol-value) + (:generator 8 + (let ((ret-lab (gen-label))) + (loadw value object symbol-tls-index-slot other-pointer-lowtag) + (inst fs-segment-prefix) + (inst mov value (make-ea :dword :index value :scale 1)) + (inst cmp value unbound-marker-widetag) + (inst jmp :ne ret-lab) + (loadw value object symbol-value-slot other-pointer-lowtag) + (emit-label ret-lab)))) + +#!-sb-thread +(define-vop (symbol-value) + (:translate symbol-value) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:result 1))) + (:results (value :scs (descriptor-reg any-reg))) + (:vop-var vop) + (:save-p :compute-only) + (:generator 9 + (let ((err-lab (generate-error-code vop unbound-symbol-error object))) + (loadw value object symbol-value-slot other-pointer-lowtag) + (inst cmp value unbound-marker-widetag) + (inst jmp :e err-lab)))) + +#!-sb-thread +(define-vop (fast-symbol-value cell-ref) + (:variant symbol-value-slot other-pointer-lowtag) + (:policy :fast) + (:translate symbol-value)) + +(defknown locked-symbol-global-value-add (symbol fixnum) fixnum ()) + +(define-vop (locked-symbol-global-value-add) + (:args (object :scs (descriptor-reg) :to :result) + (value :scs (any-reg) :target result)) + (:arg-types * tagged-num) + (:results (result :scs (any-reg) :from (:argument 1))) + (:policy :fast) + (:translate locked-symbol-global-value-add) + (:result-types tagged-num) + (:policy :fast-safe) + (:generator 4 + (move result value) + (inst lock) + (inst add (make-ea :dword :base object + :disp (- (* symbol-value-slot n-word-bytes) + other-pointer-lowtag)) + value))) + +#!+sb-thread +(define-vop (boundp) + (:translate boundp) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:conditional) + (:info target not-p) + (:temporary (:sc descriptor-reg #+nil(:from (:argument 0))) value) + (:generator 9 + (if not-p + (let ((not-target (gen-label))) + (loadw value object symbol-value-slot other-pointer-lowtag) + (inst cmp value unbound-marker-widetag) + (inst jmp :ne not-target) + (loadw value object symbol-tls-index-slot other-pointer-lowtag) + (inst fs-segment-prefix) + (inst cmp (make-ea :dword :index value :scale 1) unbound-marker-widetag) + (inst jmp :e target) + (emit-label not-target)) + (progn + (loadw value object symbol-value-slot other-pointer-lowtag) + (inst cmp value unbound-marker-widetag) + (inst jmp :ne target) + (loadw value object symbol-tls-index-slot other-pointer-lowtag) + (inst fs-segment-prefix) + (inst cmp (make-ea :dword :index value :scale 1) unbound-marker-widetag) + (inst jmp :ne target))))) + +#!-sb-thread +(define-vop (boundp) + (:translate boundp) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:conditional) + (:info target not-p) + (:temporary (:sc descriptor-reg :from (:argument 0)) value) + (:generator 9 + (loadw value object symbol-value-slot other-pointer-lowtag) + (inst cmp value unbound-marker-widetag) + (inst jmp (if not-p :e :ne) target))) + + +(define-vop (symbol-hash) + (:policy :fast-safe) + (:translate symbol-hash) + (:args (symbol :scs (descriptor-reg))) + (:results (res :scs (any-reg))) + (:result-types positive-fixnum) + (:generator 2 + ;; The symbol-hash slot of NIL holds NIL because it is also the + ;; cdr slot, so we have to strip off the three low bits to make sure + ;; it is a fixnum. The lowtag selection magic that is required to + ;; ensure this is explained in the comment in objdef.lisp + (loadw res symbol symbol-hash-slot other-pointer-lowtag) + (inst and res (lognot #b111)))) + +;;;; fdefinition (FDEFN) objects + +(define-vop (fdefn-fun cell-ref) ; /pfw - alpha + (:variant fdefn-fun-slot other-pointer-lowtag)) + +(define-vop (safe-fdefn-fun) + (:args (object :scs (descriptor-reg) :to (:result 1))) + (:results (value :scs (descriptor-reg any-reg))) + (:vop-var vop) + (:save-p :compute-only) + (:generator 10 + (loadw value object fdefn-fun-slot other-pointer-lowtag) + (inst cmp value nil-value) + (let ((err-lab (generate-error-code vop undefined-fun-error object))) + (inst jmp :e err-lab)))) + +(define-vop (set-fdefn-fun) + (:policy :fast-safe) + (:translate (setf fdefn-fun)) + (:args (function :scs (descriptor-reg) :target result) + (fdefn :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg) raw) + (:temporary (:sc byte-reg) type) + (:results (result :scs (descriptor-reg))) + (:generator 38 + (load-type type function (- fun-pointer-lowtag)) + (inst lea raw + (make-ea :byte :base function + :disp (- (* simple-fun-code-offset n-word-bytes) + fun-pointer-lowtag))) + (inst cmp type simple-fun-header-widetag) + (inst jmp :e normal-fn) + (inst lea raw (make-fixup (extern-alien-name "closure_tramp") :foreign)) + NORMAL-FN + (storew function fdefn fdefn-fun-slot other-pointer-lowtag) + (storew raw fdefn fdefn-raw-addr-slot other-pointer-lowtag) + (move result function))) + +(define-vop (fdefn-makunbound) + (:policy :fast-safe) + (:translate fdefn-makunbound) + (:args (fdefn :scs (descriptor-reg) :target result)) + (:results (result :scs (descriptor-reg))) + (:generator 38 + (storew nil-value fdefn fdefn-fun-slot other-pointer-lowtag) + (storew (make-fixup (extern-alien-name "undefined_tramp") :foreign) + fdefn fdefn-raw-addr-slot other-pointer-lowtag) + (move result fdefn))) + +;;;; binding and unbinding + +;;; BIND -- Establish VAL as a binding for SYMBOL. Save the old value and +;;; the symbol on the binding stack and stuff the new value into the +;;; symbol. + +#!+sb-thread +(define-vop (bind) + (:args (val :scs (any-reg descriptor-reg)) + (symbol :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg) tls-index temp bsp) + (:generator 5 + (let ((tls-index-valid (gen-label))) + (load-tl-symbol-value bsp *binding-stack-pointer*) + (loadw tls-index symbol symbol-tls-index-slot other-pointer-lowtag) + (inst add bsp (* binding-size n-word-bytes)) + (store-tl-symbol-value bsp *binding-stack-pointer* temp) + + (inst or tls-index tls-index) + (inst jmp :ne tls-index-valid) + ;; allocate a new tls-index + (load-symbol-value tls-index *free-tls-index*) + (inst add tls-index 4) ;XXX surely we can do this more + (store-symbol-value tls-index *free-tls-index*) ;succintly + (inst sub tls-index 4) + (storew tls-index symbol symbol-tls-index-slot other-pointer-lowtag) + (emit-label tls-index-valid) + (inst fs-segment-prefix) + (inst mov temp (make-ea :dword :scale 1 :index tls-index)) + (storew temp bsp (- binding-value-slot binding-size)) + (storew symbol bsp (- binding-symbol-slot binding-size)) + (inst fs-segment-prefix) + (inst mov (make-ea :dword :scale 1 :index tls-index) val)))) + +#!-sb-thread +(define-vop (bind) + (:args (val :scs (any-reg descriptor-reg)) + (symbol :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg) temp bsp) + (:generator 5 + (load-symbol-value bsp *binding-stack-pointer*) + (loadw temp symbol symbol-value-slot other-pointer-lowtag) + (inst add bsp (* binding-size n-word-bytes)) + (store-symbol-value bsp *binding-stack-pointer*) + (storew temp bsp (- binding-value-slot binding-size)) + (storew symbol bsp (- binding-symbol-slot binding-size)) + (storew val symbol symbol-value-slot other-pointer-lowtag))) + + +#!+sb-thread +(define-vop (unbind) + ;; four temporaries? + (:temporary (:sc unsigned-reg) symbol value bsp tls-index) + (:generator 0 + (load-tl-symbol-value bsp *binding-stack-pointer*) + (loadw symbol bsp (- binding-symbol-slot binding-size)) + (loadw value bsp (- binding-value-slot binding-size)) + + (loadw tls-index symbol symbol-tls-index-slot other-pointer-lowtag) + (inst fs-segment-prefix) + (inst mov (make-ea :dword :scale 1 :index tls-index) value) + + (storew 0 bsp (- binding-symbol-slot binding-size)) + (inst sub bsp (* binding-size n-word-bytes)) + ;; we're done with value, so we can use it as a temp here + (store-tl-symbol-value bsp *binding-stack-pointer* value))) + +#!-sb-thread +(define-vop (unbind) + (:temporary (:sc unsigned-reg) symbol value bsp) + (:generator 0 + (load-symbol-value bsp *binding-stack-pointer*) + (loadw symbol bsp (- binding-symbol-slot binding-size)) + (loadw value bsp (- binding-value-slot binding-size)) + (storew value symbol symbol-value-slot other-pointer-lowtag) + (storew 0 bsp (- binding-symbol-slot binding-size)) + (inst sub bsp (* binding-size n-word-bytes)) + (store-symbol-value bsp *binding-stack-pointer*))) + + +(define-vop (unbind-to-here) + (:args (where :scs (descriptor-reg any-reg))) + (:temporary (:sc unsigned-reg) symbol value bsp #!+sb-thread tls-index) + (:generator 0 + (load-tl-symbol-value bsp *binding-stack-pointer*) + (inst cmp where bsp) + (inst jmp :e done) + + LOOP + (loadw symbol bsp (- binding-symbol-slot binding-size)) + (inst or symbol symbol) + (inst jmp :z skip) + (loadw value bsp (- binding-value-slot binding-size)) + #!-sb-thread (storew value symbol symbol-value-slot other-pointer-lowtag) + + #!+sb-thread (loadw + tls-index symbol symbol-tls-index-slot other-pointer-lowtag) + #!+sb-thread (inst fs-segment-prefix) + #!+sb-thread (inst mov (make-ea :dword :scale 1 :index tls-index) value) + (storew 0 bsp (- binding-symbol-slot binding-size)) + + SKIP + (inst sub bsp (* binding-size n-word-bytes)) + (inst cmp where bsp) + (inst jmp :ne loop) + ;; we're done with value, so can use it as a temporary + (store-tl-symbol-value bsp *binding-stack-pointer* value) + + DONE)) + + + +;;;; closure indexing + +(define-full-reffer closure-index-ref * + closure-info-offset fun-pointer-lowtag + (any-reg descriptor-reg) * %closure-index-ref) + +(define-full-setter set-funcallable-instance-info * + funcallable-instance-info-offset fun-pointer-lowtag + (any-reg descriptor-reg) * %set-funcallable-instance-info) + +(define-full-reffer funcallable-instance-info * + funcallable-instance-info-offset fun-pointer-lowtag + (descriptor-reg any-reg) * %funcallable-instance-info) + +(define-vop (funcallable-instance-lexenv cell-ref) + (:variant funcallable-instance-lexenv-slot fun-pointer-lowtag)) + +(define-vop (closure-ref slot-ref) + (:variant closure-info-offset fun-pointer-lowtag)) + +(define-vop (closure-init slot-set) + (:variant closure-info-offset fun-pointer-lowtag)) + +;;;; value cell hackery + +(define-vop (value-cell-ref cell-ref) + (:variant value-cell-value-slot other-pointer-lowtag)) + +(define-vop (value-cell-set cell-set) + (:variant value-cell-value-slot other-pointer-lowtag)) + +;;;; structure hackery + +(define-vop (instance-length) + (:policy :fast-safe) + (:translate %instance-length) + (:args (struct :scs (descriptor-reg))) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 4 + (loadw res struct 0 instance-pointer-lowtag) + (inst shr res n-widetag-bits))) + +(define-vop (instance-ref slot-ref) + (:variant instance-slots-offset instance-pointer-lowtag) + (:policy :fast-safe) + (:translate %instance-ref) + (:arg-types instance (:constant index))) + +(define-vop (instance-set slot-set) + (:policy :fast-safe) + (:translate %instance-set) + (:variant instance-slots-offset instance-pointer-lowtag) + (:arg-types instance (:constant index) *)) + +(define-full-reffer instance-index-ref * instance-slots-offset + instance-pointer-lowtag (any-reg descriptor-reg) * %instance-ref) + +(define-full-setter instance-index-set * instance-slots-offset + instance-pointer-lowtag (any-reg descriptor-reg) * %instance-set) + + +(defknown %instance-set-conditional (instance index t t) t + (unsafe)) + +(define-vop (instance-set-conditional) + (:translate %instance-set-conditional) + (:args (object :scs (descriptor-reg) :to :eval) + (slot :scs (any-reg) :to :result) + (old-value :scs (descriptor-reg any-reg) :target eax) + (new-value :scs (descriptor-reg any-reg))) + (:arg-types instance positive-fixnum * *) + (:temporary (:sc descriptor-reg :offset eax-offset + :from (:argument 2) :to :result :target result) eax) + (:results (result :scs (descriptor-reg any-reg))) + ;(:guard (backend-featurep :i486)) + (:policy :fast-safe) + (:generator 5 + (move eax old-value) + (inst lock) + (inst cmpxchg (make-ea :dword :base object :index slot :scale 1 + :disp (- (* instance-slots-offset n-word-bytes) + instance-pointer-lowtag)) + new-value) + (move result eax))) + + + +;;;; code object frobbing + +(define-full-reffer code-header-ref * 0 other-pointer-lowtag + (any-reg descriptor-reg) * code-header-ref) + +(define-full-setter code-header-set * 0 other-pointer-lowtag + (any-reg descriptor-reg) * code-header-set) diff --git a/src/compiler/x86-64/char.lisp b/src/compiler/x86-64/char.lisp new file mode 100644 index 0000000..684a88a --- /dev/null +++ b/src/compiler/x86-64/char.lisp @@ -0,0 +1,164 @@ +;;;; x86 definition of character operations + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; moves and coercions + +;;; Move a tagged char to an untagged representation. +(define-vop (move-to-base-char) + (:args (x :scs (any-reg control-stack) :target al)) + (:temporary (:sc byte-reg :offset al-offset + :from (:argument 0) :to (:eval 0)) al) + (:ignore al) + (:temporary (:sc byte-reg :offset ah-offset :target y + :from (:argument 0) :to (:result 0)) ah) + (:results (y :scs (base-char-reg base-char-stack))) + (:note "character untagging") + (:generator 1 + (move rax-tn x) + (move y ah))) +(define-move-vop move-to-base-char :move + (any-reg control-stack) (base-char-reg base-char-stack)) + +;;; Move an untagged char to a tagged representation. +(define-vop (move-from-base-char) + (:args (x :scs (base-char-reg base-char-stack) :target ah)) + (:temporary (:sc byte-reg :offset al-offset :target y + :from (:argument 0) :to (:result 0)) al) + (:temporary (:sc byte-reg :offset ah-offset + :from (:argument 0) :to (:result 0)) ah) + (:results (y :scs (any-reg descriptor-reg control-stack))) + (:note "character tagging") + (:generator 1 + (move ah x) ; Maybe move char byte. + (inst mov al base-char-widetag) ; x86 to type bits + (inst and rax-tn #xffff) ; Remove any junk bits. + (move y rax-tn))) +(define-move-vop move-from-base-char :move + (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack)) + +;;; Move untagged base-char values. +(define-vop (base-char-move) + (:args (x :target y + :scs (base-char-reg) + :load-if (not (location= x y)))) + (:results (y :scs (base-char-reg base-char-stack) + :load-if (not (location= x y)))) + (:note "character move") + (:effects) + (:affected) + (:generator 0 + (move y x))) +(define-move-vop base-char-move :move + (base-char-reg) (base-char-reg base-char-stack)) + +;;; Move untagged base-char arguments/return-values. +(define-vop (move-base-char-arg) + (:args (x :target y + :scs (base-char-reg)) + (fp :scs (any-reg) + :load-if (not (sc-is y base-char-reg)))) + (:results (y)) + (:note "character arg move") + (:generator 0 + (sc-case y + (base-char-reg + (move y x)) + (base-char-stack + (inst mov + (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4))) + x))))) +(define-move-vop move-base-char-arg :move-arg + (any-reg base-char-reg) (base-char-reg)) + +;;; Use standard MOVE-ARG + coercion to move an untagged base-char +;;; to a descriptor passing location. +(define-move-vop move-arg :move-arg + (base-char-reg) (any-reg descriptor-reg)) + +;;;; other operations + +(define-vop (char-code) + (:translate char-code) + (:policy :fast-safe) + (:args (ch :scs (base-char-reg base-char-stack))) + (:arg-types base-char) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 1 + (inst movzx res ch))) + +(define-vop (code-char) + (:translate code-char) + (:policy :fast-safe) + (:args (code :scs (unsigned-reg unsigned-stack) :target eax)) + (:arg-types positive-fixnum) + (:temporary (:sc unsigned-reg :offset rax-offset :target res + :from (:argument 0) :to (:result 0)) + eax) + (:results (res :scs (base-char-reg))) + (:result-types base-char) + (:generator 1 + (move eax code) + (move res al-tn))) + +;;; comparison of BASE-CHARs +(define-vop (base-char-compare) + (:args (x :scs (base-char-reg base-char-stack)) + (y :scs (base-char-reg) + :load-if (not (and (sc-is x base-char-reg) + (sc-is y base-char-stack))))) + (:arg-types base-char base-char) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline comparison") + (:variant-vars condition not-condition) + (:generator 3 + (inst cmp x y) + (inst jmp (if not-p not-condition condition) target))) + +(define-vop (fast-char=/base-char base-char-compare) + (:translate char=) + (:variant :e :ne)) + +(define-vop (fast-char/base-char base-char-compare) + (:translate char>) + (:variant :a :na)) + +(define-vop (base-char-compare/c) + (:args (x :scs (base-char-reg base-char-stack))) + (:arg-types base-char (:constant base-char)) + (:conditional) + (:info target not-p y) + (:policy :fast-safe) + (:note "inline constant comparison") + (:variant-vars condition not-condition) + (:generator 2 + (inst cmp x (sb!xc:char-code y)) + (inst jmp (if not-p not-condition condition) target))) + +(define-vop (fast-char=/base-char/c base-char-compare/c) + (:translate char=) + (:variant :e :ne)) + +(define-vop (fast-char/base-char/c base-char-compare/c) + (:translate char>) + (:variant :a :na)) diff --git a/src/compiler/x86-64/debug.lisp b/src/compiler/x86-64/debug.lisp new file mode 100644 index 0000000..74ebd63 --- /dev/null +++ b/src/compiler/x86-64/debug.lisp @@ -0,0 +1,154 @@ +;;;; x86 support for the debugger + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(define-vop (debug-cur-sp) + (:translate current-sp) + (:policy :fast-safe) + (:results (res :scs (sap-reg sap-stack))) + (:result-types system-area-pointer) + (:generator 1 + (move res rsp-tn))) + +(define-vop (debug-cur-fp) + (:translate current-fp) + (:policy :fast-safe) + (:results (res :scs (sap-reg sap-stack))) + (:result-types system-area-pointer) + (:generator 1 + (move res rbp-tn))) + +;;; Stack-ref and %set-stack-ref can be used to read and store +;;; descriptor objects on the control stack. Use the sap-ref +;;; functions to access other data types. +(define-vop (read-control-stack) + (:translate stack-ref) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to :eval) + (offset :scs (any-reg) :target temp)) + (:arg-types system-area-pointer positive-fixnum) + (:temporary (:sc unsigned-reg :from (:argument 1)) temp) + (:results (result :scs (descriptor-reg))) + (:result-types *) + (:generator 9 + (move temp offset) + (inst neg temp) + (inst mov result + (make-ea :qword :base sap :disp (- n-word-bytes) :index temp)))) + +(define-vop (read-control-stack-c) + (:translate stack-ref) + (:policy :fast-safe) + (:args (sap :scs (sap-reg))) + (:info index) + (:arg-types system-area-pointer (:constant (signed-byte 29))) + (:results (result :scs (descriptor-reg))) + (:result-types *) + (:generator 5 + (inst mov result (make-ea :qword :base sap + :disp (- (* (1+ index) n-word-bytes)))))) + +(define-vop (write-control-stack) + (:translate %set-stack-ref) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to :eval) + (offset :scs (any-reg) :target temp) + (value :scs (descriptor-reg) :to :result :target result)) + (:arg-types system-area-pointer positive-fixnum *) + (:temporary (:sc unsigned-reg :from (:argument 1) :to :result) temp) + (:results (result :scs (descriptor-reg))) + (:result-types *) + (:generator 9 + (move temp offset) + (inst neg temp) + (inst mov + (make-ea :qword :base sap :disp (- n-word-bytes) :index temp) value) + (move result value))) + +(define-vop (write-control-stack-c) + (:translate %set-stack-ref) + (:policy :fast-safe) + (:args (sap :scs (sap-reg)) + (value :scs (descriptor-reg) :target result)) + (:info index) + (:arg-types system-area-pointer (:constant (signed-byte 29)) *) + (:results (result :scs (descriptor-reg))) + (:result-types *) + (:generator 5 + (inst mov (make-ea :qword :base sap + :disp (- (* (1+ index) n-word-bytes))) + value) + (move result value))) + +(define-vop (code-from-mumble) + (:policy :fast-safe) + (:args (thing :scs (descriptor-reg))) + (:results (code :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg) temp) + (:variant-vars lowtag) + (:generator 5 + (let ((bogus (gen-label)) + (done (gen-label))) + (loadw temp thing 0 lowtag) + (inst shr temp n-widetag-bits) + (inst jmp :z bogus) + (inst shl temp (1- (integer-length n-word-bytes))) + (unless (= lowtag other-pointer-lowtag) + (inst add temp (- lowtag other-pointer-lowtag))) + (move code thing) + (inst sub code temp) + (emit-label done) + (assemble (*elsewhere*) + (emit-label bogus) + (inst mov code nil-value) + (inst jmp done))))) + +(define-vop (code-from-lra code-from-mumble) + (:translate sb!di::lra-code-header) + (:variant other-pointer-lowtag)) + +(define-vop (code-from-function code-from-mumble) + (:translate sb!di::fun-code-header) + (:variant fun-pointer-lowtag)) + +(define-vop (make-lisp-obj) + (:policy :fast-safe) + (:translate sb!di::make-lisp-obj) + (:args (value :scs (unsigned-reg unsigned-stack) :target result)) + (:arg-types unsigned-num) + (:results (result :scs (descriptor-reg) + :load-if (not (sc-is value unsigned-reg)) + )) + (:generator 1 + (move result value))) + +(define-vop (get-lisp-obj-address) + (:policy :fast-safe) + (:translate sb!di::get-lisp-obj-address) + (:args (thing :scs (descriptor-reg control-stack) :target result)) + (:results (result :scs (unsigned-reg) + :load-if (not (and (sc-is thing descriptor-reg) + (sc-is result unsigned-stack))))) + (:result-types unsigned-num) + (:generator 1 + (move result thing))) + + +(define-vop (fun-word-offset) + (:policy :fast-safe) + (:translate sb!di::fun-word-offset) + (:args (fun :scs (descriptor-reg))) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 5 + (loadw res fun 0 fun-pointer-lowtag) + (inst shr res n-widetag-bits))) diff --git a/src/compiler/x86-64/float.lisp b/src/compiler/x86-64/float.lisp new file mode 100644 index 0000000..6a15a52 --- /dev/null +++ b/src/compiler/x86-64/float.lisp @@ -0,0 +1,2859 @@ +;;;; floating point support for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn double-float-value-slot)) + ;; complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn complex-double-float-imag-slot))) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base rbp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + n-word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double))) + +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +(defun maybe-fp-wait (node &optional note-next-instruction) + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :dword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot (:real 1) (:imag 2)))) + n-word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn)) + (ea-for-cxf-stack tn :double :imag base))) + +;;; Abstract out the copying of a FP register to the FP stack top, and +;;; provide two alternatives for its implementation. Note: it's not +;;; necessary to distinguish between a single or double register move +;;; here. +;;; +;;; Using a Pop then load. +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fstp fr0-tn) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset reg))))) +;;; Using Fxch then Fst to restore the original reg contents. +#+nil +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fxch reg) + (inst fst reg)) + + +;;;; move functions + +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-stack x)))) + +(define-move-fun (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (cond ((zerop (tn-offset x)) + (inst fst (ea-for-sf-stack y))) + (t + (inst fxch x) + (inst fst (ea-for-sf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-fun (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-stack x)))) + +(define-move-fun (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (cond ((zerop (tn-offset x)) + (inst fstd (ea-for-df-stack y))) + (t + (inst fxch x) + (inst fstd (ea-for-df-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + + + +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg)) + (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) + (with-empty-tn@fp-top(y) + (cond ((zerop value) + (inst fldz)) + ((= value 1e0) + (inst fld1)) + ((= value (coerce pi *read-default-float-format*)) + (inst fldpi)) + ((= value (log 10e0 2e0)) + (inst fldl2t)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) + (inst fldl2e)) + ((= value (log 2e0 10e0)) + (inst fldlg2)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) + (inst fldln2)) + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) + +;;;; complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (1+ (tn-offset x)))) + +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (let ((real-tn (complex-single-reg-real-tn y))) + (with-empty-tn@fp-top (real-tn) + (inst fld (ea-for-csf-real-stack x)))) + (let ((imag-tn (complex-single-reg-imag-tn y))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (ea-for-csf-imag-stack x))))) + +(define-move-fun (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (let ((real-tn (complex-single-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fst (ea-for-csf-real-stack y))) + (t + (inst fxch real-tn) + (inst fst (ea-for-csf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fst (ea-for-csf-imag-stack y)) + (inst fxch imag-tn))) + +(define-move-fun (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldd (ea-for-cdf-real-stack x)))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldd (ea-for-cdf-imag-stack x))))) + +(define-move-fun (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fstd (ea-for-cdf-real-stack y))) + (t + (inst fxch real-tn) + (inst fstd (ea-for-cdf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fstd (ea-for-cdf-imag-stack y)) + (inst fxch imag-tn))) + + +;;;; move VOPs + +;;; float register to register moves +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note "float move") + (:generator 0 + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x)))))) + +(define-vop (single-move float-move) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +;;; complex float register to register moves +(define-vop (complex-float-move) + (:args (x :target y :load-if (not (location= x y)))) + (:results (y :load-if (not (location= x y)))) + (:note "complex float move") + (:generator 0 + (unless (location= x y) + ;; Note the complex-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag))))) + +(define-vop (complex-single-move complex-float-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move complex-float-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + + +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + single-float-widetag + single-float-size node) + (with-tn@fp-top(x) + (inst fst (ea-for-sf-desc y)))))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + double-float-widetag + double-float-size + node) + (with-tn@fp-top(x) + (inst fstd (ea-for-df-desc y)))))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (sb!c::constant-value (sb!c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + (1d0 (load-symbol-value y *fp-constant-1d0*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; Move from a descriptor to a float register. +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-desc x))))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-desc x))))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + + +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-single-float-widetag + complex-single-float-size + node) + (let ((real-tn (complex-single-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fst (ea-for-csf-real-desc y)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fst (ea-for-csf-imag-desc y))))))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-double-float-widetag + complex-double-float-size + node) + (let ((real-tn (complex-double-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fstd (ea-for-cdf-real-desc y)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fstd (ea-for-cdf-imag-desc y))))))) +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +;;; Move from a descriptor to a complex float register. +(macrolet ((frob (name sc format) + `(progn + (define-vop (,name) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (,sc))) + (:note "pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-real-desc x)))) + (:double '((inst fldd (ea-for-cdf-real-desc x))))))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-imag-desc x)))) + (:double '((inst fldd (ea-for-cdf-imag-desc x))))))))) + (define-move-vop ,name :move (descriptor-reg) (,sc))))) + (frob move-to-complex-single complex-single-reg :single) + (frob move-to-complex-double complex-double-reg :double)) + +;;;; the move argument vops +;;;; +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. + +;;; the general MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x))))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea)))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + n-word-bytes))))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))))))))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double)) + +;;;; complex float MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "complex float argument move") + (:generator ,(ecase format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag)))) + (,stack-sc + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))))) + (t + (inst fxch real-tn) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp))))) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + ,@(ecase format + (:single + '((inst fst (ea-for-csf-imag-stack y fp)))) + (:double + '((inst fstd (ea-for-cdf-imag-stack y fp))))) + (inst fxch imag-tn)))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-complex-single-float-arg + complex-single-reg complex-single-stack :single) + (frob move-complex-double-float-arg + complex-double-reg complex-double-stack :double)) + +(define-move-vop move-arg :move-arg + (single-reg double-reg + complex-single-reg complex-double-reg) + (descriptor-reg)) + + +;;;; arithmetic VOPs + +;;; dtc: the floating point arithmetic vops +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +#| +(defun test(a n) + (declare (type (simple-array double-float (*)) a) + (fixnum n)) + (let ((sum 0d0)) + (declare (type double-float sum)) + (dotimes (i n) + (incf sum (* (aref a i)(aref a i)))) + sum)) +|# +;;; So, disabling descriptor args until this can be fixed elsewhere. +(macrolet + ((frob (op fop-sti fopr-sti + fop fopr sname scost + fopd foprd dname dcost + lname lcost) + #!-long-float (declare (ignore lcost lname)) + `(progn + (define-vop (,sname) + (:translate ,op) + (:args (x :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval) + (y :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc single-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (single-reg single-stack))) + (:arg-types single-float single-float) + (:result-types single-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,scost + ;; Handle a few special cases + (cond + ;; x, y, and r are the same register. + ((and (sc-is x single-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch r) + (inst ,fop fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x single-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (single-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fop y)) + (single-stack + ;; ST(0) = ST(0) op Mem + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + (t + ;; y to ST0 + (sc-case y + (single-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y single-stack) + (inst fld (ea-for-sf-stack y)) + (inst fld (ea-for-sf-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y single-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (single-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,fopr x)) + (single-stack + ;; ST(0) = Mem op ST(0) + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x single-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + ;; y is in ST0 + ((and (sc-is y single-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (single-reg + (inst ,fopr x)) + (single-stack + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (copy-fp-reg-to-fr0 x)) + (single-stack + (inst fstp fr0) + (inst fld (ea-for-sf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fld (ea-for-sf-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (single-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (single-stack + (inst fst (ea-for-sf-stack r)))))))) + + (define-vop (,dname) + (:translate ,op) + (:args (x :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval) + (y :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc double-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (double-reg double-stack))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,dcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (sc-is x double-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x double-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (double-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (double-stack + ;; ST(0) = ST(0) op Mem + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + (t + ;; y to ST0 + (sc-case y + (double-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y double-stack) + (inst fldd (ea-for-df-stack y)) + (inst fldd (ea-for-df-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y double-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (double-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (double-stack + ;; ST(0) = Mem op ST(0) + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + ;; y is in ST0 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (double-reg + (inst ,foprd x)) + (double-stack + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (double-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (double-stack + (inst fstd (ea-for-df-stack r)))))))) + ))) + + (frob + fadd-sti fadd-sti + fadd fadd +/single-float 2 + faddd faddd +/double-float 2 + +/long-float 2) + (frob - fsub-sti fsubr-sti + fsub fsubr -/single-float 2 + fsubd fsubrd -/double-float 2 + -/long-float 2) + (frob * fmul-sti fmul-sti + fmul fmul */single-float 3 + fmuld fmuld */double-float 3 + */long-float 3) + (frob / fdiv-sti fdivr-sti + fdiv fdivr //single-float 12 + fdivd fdivrd //double-float 12 + //long-float 12)) + +(macrolet ((frob (name inst translate sc type) + `(define-vop (,name) + (:args (x :scs (,sc) :target fr0)) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. + (unless (zerop (tn-offset y)) + (inst fst y)))))) + + (frob abs/single-float fabs abs single-reg single-float) + (frob abs/double-float fabs abs double-reg double-float) + + (frob %negate/single-float fchs %negate single-reg single-float) + (frob %negate/double-float fchs %negate double-reg double-float)) + +;;;; comparison + +(define-vop (=/float) + (:args (x) (y)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0; y is in any reg. + ((zerop (tn-offset x)) + (inst fucom y)) + ;; y is in ST0; x is in another reg. + ((zerop (tn-offset y)) + (inst fucom x)) + ;; x and y are the same register, not ST0 + ((location= x y) + (inst fxch x) + (inst fucom fr0-tn) + (inst fxch x)) + ;; x and y are different registers, neither ST0. + (t + (inst fxch x) + (inst fucom y) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (inst cmp ah-tn #x40) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=/single-float =/float) + (:translate =) + (:args (x :scs (single-reg)) + (y :scs (single-reg))) + (:arg-types single-float single-float)) + +(define-vop (=/double-float =/float) + (:translate =) + (:args (x :scs (double-reg)) + (y :scs (double-reg))) + (:arg-types double-float double-float)) + +(define-vop (single-float) + (:translate >) + (:args (x :scs (single-reg single-stack descriptor-reg)) + (y :scs (single-reg single-stack descriptor-reg))) + (:arg-types single-float single-float) + (:temporary (:sc single-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y single-reg) (zerop (tn-offset y))) + (sc-case x + (single-reg + (inst fcom x)) + ((single-stack descriptor-reg) + (if (sc-is x single-stack) + (inst fcom (ea-for-sf-stack x)) + (inst fcom (ea-for-sf-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + (sc-case y + (single-reg + (inst fcom y)) + ((single-stack descriptor-reg) + (if (sc-is y single-stack) + (inst fcom (ea-for-sf-stack y)) + (inst fcom (ea-for-sf-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (>double-float) + (:translate >) + (:args (x :scs (double-reg double-stack descriptor-reg)) + (y :scs (double-reg double-stack descriptor-reg))) + (:arg-types double-float double-float) + (:temporary (:sc double-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (sc-case x + (double-reg + (inst fcomd x)) + ((double-stack descriptor-reg) + (if (sc-is x double-stack) + (inst fcomd (ea-for-df-stack x)) + (inst fcomd (ea-for-df-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (sc-case y + (double-reg + (inst fcomd y)) + ((double-stack descriptor-reg) + (if (sc-is y double-stack) + (inst fcomd (ea-for-df-stack y)) + (inst fcomd (ea-for-df-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +;;; Comparisons with 0 can use the FTST instruction. + +(define-vop (float-test) + (:args (x)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p y) + (:variant-vars code) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp y) + (:generator 2 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0 + ((zerop (tn-offset x)) + (inst ftst)) + ;; x not ST0 + (t + (inst fxch x) + (inst ftst) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (unless (zerop code) + (inst cmp ah-tn code)) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=0/single-float float-test) + (:translate =) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x40)) +(define-vop (=0/double-float float-test) + (:translate =) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x40)) + +(define-vop (<0/single-float float-test) + (:translate <) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x01)) +(define-vop (<0/double-float float-test) + (:translate <) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x01)) + +(define-vop (>0/single-float float-test) + (:translate >) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x00)) +(define-vop (>0/double-float float-test) + (:translate >) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x00)) + + +;;;; conversion + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc signed-stack) temp) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (inst mov temp x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild temp))) + (signed-stack + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild x)))))))) + (frob %single-float/signed %single-float single-reg single-float) + (frob %double-float/signed %double-float double-reg double-float)) + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (unsigned-reg))) + (:results (y :scs (,to-sc))) + (:arg-types unsigned-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (inst push 0) + (inst push x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fildl (make-ea :dword :base rsp-tn))) + (inst add rsp-tn 16))))) + (frob %single-float/unsigned %single-float single-reg single-float) + (frob %double-float/unsigned %double-float double-reg double-float)) + +;;; These should be no-ops but the compiler might want to move some +;;; things around. +(macrolet ((frob (name translate from-sc from-type to-sc to-type) + `(define-vop (,name) + (:args (x :scs (,from-sc) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (unless (location= x y) + (cond + ((zerop (tn-offset x)) + ;; x is in ST0, y is in another reg. not ST0 + (inst fst y)) + ((zerop (tn-offset y)) + ;; y is in ST0, x is in another reg. not ST0 + (copy-fp-reg-to-fr0 x)) + (t + ;; Neither x or y are in ST0, and they are not in + ;; the same reg. + (inst fxch x) + (inst fst y) + (inst fxch x)))))))) + + (frob %single-float/double-float %single-float double-reg + double-float single-reg single-float) + + (frob %double-float/single-float %double-float single-reg single-float + double-reg double-float)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc))) + (:temporary (:sc signed-stack) stack-temp) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + (,(if round-p 'progn 'pseudo-atomic) + ;; Normal mode (for now) is "round to best". + (with-tn@fp-top (x) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (sc-case y + (signed-stack + (inst fist y)) + (signed-reg + (inst fist stack-temp) + (inst mov y stack-temp))) + ,@(unless round-p + '((inst fldcw scw))))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type "=>UNSIGNED")) + (:args (x :scs (,from-sc) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) stack-temp) + (:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (unsigned-reg))) + (:arg-types ,from-type) + (:result-types unsigned-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + ;; Normal mode (for now) is "round to best". + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (inst sub rsp-tn 8) + (inst fistpl (make-ea :dword :base rsp-tn)) + (inst pop y) + (inst fld fr0) ; copy fr0 to at least restore stack. + (inst add rsp-tn 8) + ,@(unless round-p + '((inst fldcw scw))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t)) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:temporary (:sc signed-stack) stack-temp) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (aver (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + ;; source must be in memory + (inst mov stack-temp bits) + (with-empty-tn@fp-top(res) + (inst fld stack-temp))) + (signed-stack + (with-empty-tn@fp-top(res) + (inst fld bits)))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 2 + (let ((offset (1+ (tn-offset temp)))) + (storew hi-bits rbp-tn (- offset)) + (storew lo-bits rbp-tn (- (1+ offset))) + (with-empty-tn@fp-top(res) + (inst fldd (make-ea :dword :base rbp-tn + :disp (- (* (1+ offset) n-word-bytes)))))))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:temporary (:sc signed-stack :from :argument :to :result) stack-temp) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst stack-temp) + (inst mov bits stack-temp))) + (single-stack + (inst mov bits float)) + (descriptor-reg + (loadw + bits float single-float-value-slot + other-pointer-lowtag)))) + (signed-stack + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst bits)))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base rbp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw hi-bits rbp-tn (- (1+ (tn-offset temp))))) + (double-stack + (loadw hi-bits rbp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base rbp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw lo-bits rbp-tn (- (+ 2 (tn-offset temp))))) + (double-stack + (loadw lo-bits rbp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) + + +;;;; float mode hackery + +(sb!xc:deftype float-modes () '(unsigned-byte 64)) ; really only 16 +(defknown floating-point-modes () float-modes (flushable)) +(defknown ((setf floating-point-modes)) (float-modes) + float-modes) + +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) + +(define-vop (floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset :target res + :to :result) eax) + (:generator 8 + (inst sub rsp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions + (inst fstenv (make-ea :dword :base rsp-tn)) ; masks all exceptions + (inst fldenv (make-ea :dword :base rsp-tn)) ; Restore previous state. + ;; Move current status to high word. + (inst movzxd eax (make-ea :dword :base rsp-tn :disp (- npx-sw-offset 2))) + ;; Move exception mask to low word. + (inst mov ax-tn (make-ea :word :base rsp-tn :disp npx-cw-offset)) + (inst add rsp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. + (move res eax))) + +;;; XXX BROKEN +(define-vop (set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:generator 3 + (inst sub rsp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. + (inst fstenv (make-ea :dword :base rsp-tn)) + (inst mov eax new) + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. + (inst mov (make-ea :word :base rsp-tn :disp npx-cw-offset) ax-tn) + (inst shr eax 16) ; position status word + (inst mov (make-ea :word :base rsp-tn :disp npx-sw-offset) ax-tn) + (inst fldenv (make-ea :dword :base rsp-tn)) + (inst add rsp-tn npx-env-size) ; Pop stack. + (move res new))) + + +(progn + +;;; Let's use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of fsin and fcos that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; These versions of fsin, fcos, and ftan try to use argument +;;; reduction but to do this accurately requires greater precision and +;;; it is hopelessly inaccurate. +#+nil +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:temporary (:sc unsigned-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr1) ; Load 2*PI + (inst fldpi) + (inst fadd fr0) + (inst fxch fr1) + LOOP + (inst fprem1) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :nz LOOP) + (inst ,op) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + + + +;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if +;;; the argument is out of range 2^63 and would thus be hopelessly +;;; inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fldz) ; Load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +#+nil +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (cond ((zerop (tn-offset x)) + ;; x is in fr0 + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1)) + (t + ;; x is in a FP reg, not fr0 + (inst fstp fr0) + (inst fldl2e) + (inst fmul x)))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fldl2e) + (if (sc-is x double-stack) + (inst fmuld (ea-for-df-stack x)) + (inst fmuld (ea-for-df-desc x))))) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; Modified exp that handles the following special cases: +;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (double-reg))) + (:arg-types double-float signed-num) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (double-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (double-reg) :to :result)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base rsp-tn)) + (inst fcompp) + (inst add rsp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p with limited x range function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 4 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr1) + (y :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y)) + (sc-is x double-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) + ;; y in fr0; x not in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))) + ;; Load y to fr0 + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) +) ; PROGN #!-LONG-FLOAT + + +;;;; complex float VOPs + +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :result :target r + :load-if (not (location= real r))) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:note "inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-single-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fst (ea-for-csf-real-stack r))) + (t + (inst fxch real) + (inst fst (ea-for-csf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fst (ea-for-csf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :target r + :load-if (not (location= real r))) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:note "inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-double-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fstd (ea-for-cdf-real-stack r))) + (t + (inst fxch real) + (inst fstd (ea-for-cdf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fstd (ea-for-cdf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (complex-float-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x complex-single-reg complex-double-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (cond ((zerop (tn-offset r)) + (copy-fp-reg-to-fr0 value-tn)) + ((zerop (tn-offset value-tn)) + (inst fstd r)) + (t + (inst fxch value-tn) + (inst fstd r) + (inst fxch value-tn)))))) + ((sc-is r single-reg) + (let ((ea (sc-case x + (complex-single-stack + (ecase offset + (0 (ea-for-csf-real-stack x)) + (1 (ea-for-csf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-csf-real-desc x)) + (1 (ea-for-csf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fld ea)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (complex-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldd ea)))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) + +(define-vop (realpart/complex-single-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (realpart/complex-double-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (imagpart/complex-single-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float imagpart") + (:variant 1)) + +(define-vop (imagpart/complex-double-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float imagpart") + (:variant 1)) + + +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations +(defknown double-float-reg-bias (double-float) (values)) +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) +(defknown single-float-reg-bias (single-float) (values)) +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp new file mode 100644 index 0000000..0ae887b --- /dev/null +++ b/src/compiler/x86-64/insts.lisp @@ -0,0 +1,2863 @@ +;;;; that part of the description of the x86 instruction set (for +;;;; 80386 and above) which can live on the cross-compilation host + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") +;;; FIXME: SB!DISASSEM: prefixes are used so widely in this file that +;;; I wonder whether the separation of the disassembler from the +;;; virtual machine is valid or adds value. + +;;; Note: In CMU CL, this used to be a call to SET-DISASSEM-PARAMS. +(setf sb!disassem:*disassem-inst-alignment-bytes* 1) + +;;; this type is used mostly in disassembly and represents legacy +;;; registers only. r8-15 are handled separately +(deftype reg () '(unsigned-byte 3)) + +;;; default word size for the chip: if the operand size !=:dword +;;; we need to output #x66 (or REX) prefix +(def!constant +default-operand-size+ :dword) + +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) + +(defun offset-next (value dstate) + (declare (type integer value) + (type sb!disassem:disassem-state dstate)) + (+ (sb!disassem:dstate-next-addr dstate) value)) + +(defparameter *default-address-size* + ;; Again, this is the chip default, not the SBCL backend preference + ;; which must be set with prefixes if it's different. It's :dword; + ;; this is not negotiable + :dword) + +(defparameter *byte-reg-names* + #(al cl dl bl ah ch dh bh)) +(defparameter *word-reg-names* + #(ax cx dx bx sp bp si di)) +(defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) +(defparameter *qword-reg-names* + #(rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15)) + +(defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) + (:dword *dword-reg-names*) + (:qword *qword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes + ) + +(defun print-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (sb!disassem:dstate-get-prop dstate 'width) + stream + dstate)) + +(defun print-word-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+) + stream + dstate)) + +(defun print-byte-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value :byte stream dstate)) + +(defun print-addr-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value *default-address-size* stream dstate)) + +(defun print-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +;; Same as print-reg/mem, but prints an explicit size indicator for +;; memory references. +(defun print-sized-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-byte-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-byte-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-word-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-word-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +(defun print-label (value stream dstate) + (declare (ignore dstate)) + (sb!disassem:princ16 value stream)) + +;;; Returns either an integer, meaning a register, or a list of +;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component +;;; may be missing or nil to indicate that it's not used or has the +;;; obvious default value (e.g., 1 for the index-scale). +(defun prefilter-reg/mem (value dstate) + (declare (type list value) + (type sb!disassem:disassem-state dstate)) + (let ((mod (car value)) + (r/m (cadr value))) + (declare (type (unsigned-byte 2) mod) + (type (unsigned-byte 3) r/m)) + (cond ((= mod #b11) + ;; registers + r/m) + ((= r/m #b100) + ;; sib byte + (let ((sib (sb!disassem:read-suffix 8 dstate))) + (declare (type (unsigned-byte 8) sib)) + (let ((base-reg (ldb (byte 3 0) sib)) + (index-reg (ldb (byte 3 3) sib)) + (index-scale (ldb (byte 2 6) sib))) + (declare (type (unsigned-byte 3) base-reg index-reg) + (type (unsigned-byte 2) index-scale)) + (let* ((offset + (case mod + (#b00 + (if (= base-reg #b101) + (sb!disassem:read-signed-suffix 32 dstate) + nil)) + (#b01 + (sb!disassem:read-signed-suffix 8 dstate)) + (#b10 + (sb!disassem:read-signed-suffix 32 dstate))))) + (list (if (and (= mod #b00) (= base-reg #b101)) nil base-reg) + offset + (if (= index-reg #b100) nil index-reg) + (ash 1 index-scale)))))) + ((and (= mod #b00) (= r/m #b101)) + (list nil (sb!disassem:read-signed-suffix 32 dstate)) ) + ((= mod #b00) + (list r/m)) + ((= mod #b01) + (list r/m (sb!disassem:read-signed-suffix 8 dstate))) + (t ; (= mod #b10) + (list r/m (sb!disassem:read-signed-suffix 32 dstate)))))) + + +;;; This is a sort of bogus prefilter that just stores the info globally for +;;; other people to use; it probably never gets printed. +(defun prefilter-width (value dstate) + (setf (sb!disassem:dstate-get-prop dstate 'width) + (if (zerop value) + :byte + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (when (not (eql word-width +default-operand-size+)) + ;; Reset it. + (setf (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+)) + word-width)))) + +(defun read-address (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix (width-bits *default-address-size*) dstate)) + +(defun width-bits (width) + (ecase width + (:byte 8) + (:word 16) + (:dword 32) + (:float 32) + (:double 64))) + +) ; EVAL-WHEN + +;;;; disassembler argument types + +(sb!disassem:define-arg-type displacement + :sign-extend t + :use-label #'offset-next + :printer (lambda (value stream dstate) + (sb!disassem:maybe-note-assembler-routine value nil dstate) + (print-label value stream dstate))) + +(sb!disassem:define-arg-type accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg 0 stream dstate))) + +(sb!disassem:define-arg-type word-accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-word-reg 0 stream dstate))) + +(sb!disassem:define-arg-type reg + :printer #'print-reg) + +(sb!disassem:define-arg-type addr-reg + :printer #'print-addr-reg) + +(sb!disassem:define-arg-type word-reg + :printer #'print-word-reg) + +(sb!disassem:define-arg-type imm-addr + :prefilter #'read-address + :printer #'print-label) + +(sb!disassem:define-arg-type imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix + (width-bits (sb!disassem:dstate-get-prop dstate 'width)) + dstate))) + +(sb!disassem:define-arg-type signed-imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width (sb!disassem:dstate-get-prop dstate 'width))) + (sb!disassem:read-signed-suffix (width-bits width) dstate)))) + +(sb!disassem:define-arg-type signed-imm-byte + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 8 dstate))) + +(sb!disassem:define-arg-type signed-imm-dword + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate))) + +(sb!disassem:define-arg-type imm-word + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (sb!disassem:read-suffix (width-bits width) dstate)))) + +;;; needed for the ret imm16 instruction +(sb!disassem:define-arg-type imm-word-16 + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix 16 dstate))) + +(sb!disassem:define-arg-type reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-reg/mem) +(sb!disassem:define-arg-type sized-reg/mem + ;; Same as reg/mem, but prints an explicit size indicator for + ;; memory references. + :prefilter #'prefilter-reg/mem + :printer #'print-sized-reg/mem) +(sb!disassem:define-arg-type byte-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-byte-reg/mem) +(sb!disassem:define-arg-type word-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-word-reg/mem) + +;;; added by jrd +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) +(defun print-fp-reg (value stream dstate) + (declare (ignore dstate)) + (format stream "FR~D" value)) +(defun prefilter-fp-reg (value dstate) + ;; just return it + (declare (ignore dstate)) + value) +) ; EVAL-WHEN +(sb!disassem:define-arg-type fp-reg + :prefilter #'prefilter-fp-reg + :printer #'print-fp-reg) + +(sb!disassem:define-arg-type width + :prefilter #'prefilter-width + :printer (lambda (value stream dstate) + (if;; (zerop value) + (or (null value) + (and (numberp value) (zerop value))) ; zzz jrd + (princ 'b stream) + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (princ (schar (symbol-name word-width) 0) stream))))) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *conditions* + '((:o . 0) + (:no . 1) + (:b . 2) (:nae . 2) (:c . 2) + (:nb . 3) (:ae . 3) (:nc . 3) + (:eq . 4) (:e . 4) (:z . 4) + (:ne . 5) (:nz . 5) + (:be . 6) (:na . 6) + (:nbe . 7) (:a . 7) + (:s . 8) + (:ns . 9) + (:p . 10) (:pe . 10) + (:np . 11) (:po . 11) + (:l . 12) (:nge . 12) + (:nl . 13) (:ge . 13) + (:le . 14) (:ng . 14) + (:nle . 15) (:g . 15))) +(defparameter *condition-name-vec* + (let ((vec (make-array 16 :initial-element nil))) + (dolist (cond *conditions*) + (when (null (aref vec (cdr cond))) + (setf (aref vec (cdr cond)) (car cond)))) + vec)) +) ; EVAL-WHEN + +;;; Set assembler parameters. (In CMU CL, this was done with +;;; a call to a macro DEF-ASSEMBLER-PARAMS.) +(eval-when (:compile-toplevel :load-toplevel :execute) + (setf sb!assem:*assem-scheduler-p* nil)) + +(sb!disassem:define-arg-type condition-code + :printer *condition-name-vec*) + +(defun conditional-opcode (condition) + (cdr (assoc condition *conditions* :test #'eq))) + +;;;; disassembler instruction formats + +(eval-when (:compile-toplevel :execute) + (defun swap-if (direction field1 separator field2) + `(:if (,direction :constant 0) + (,field1 ,separator ,field2) + (,field2 ,separator ,field1)))) + +(sb!disassem:define-instruction-format (byte 8 :default-printer '(:name)) + (op :field (byte 8 0)) + ;; optional fields + (accum :type 'accum) + (imm)) + +(sb!disassem:define-instruction-format (simple 8) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + ;; optional fields + (accum :type 'accum) + (imm)) + +;;; Same as simple, but with direction bit +(sb!disassem:define-instruction-format (simple-dir 8 :include 'simple) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as simple, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (accum-imm 8 + :include 'simple + :default-printer '(:name + :tab accum ", " imm)) + (imm :type 'imm-data)) + +(sb!disassem:define-instruction-format (reg-no-width 8 + :default-printer '(:name :tab reg)) + (op :field (byte 5 3)) + (reg :field (byte 3 0) :type 'word-reg) + ;; optional fields + (accum :type 'word-accum) + (imm)) + +;;; adds a width field to reg-no-width +(sb!disassem:define-instruction-format (reg 8 + :default-printer '(:name :tab reg)) + (op :field (byte 4 4)) + (width :field (byte 1 3) :type 'width) + (reg :field (byte 3 0) :type 'reg) + ;; optional fields + (accum :type 'accum) + (imm) + ) + +;;; Same as reg, but with direction bit +(sb!disassem:define-instruction-format (reg-dir 8 :include 'reg) + (op :field (byte 3 5)) + (dir :field (byte 1 4))) + +(sb!disassem:define-instruction-format (two-bytes 16 + :default-printer '(:name)) + (op :fields (list (byte 8 0) (byte 8 8)))) + +(sb!disassem:define-instruction-format (reg-reg/mem 16 + :default-printer + `(:name :tab reg ", " reg/mem)) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'reg/mem) + (reg :field (byte 3 11) :type 'reg) + ;; optional fields + (imm)) + +;;; same as reg-reg/mem, but with direction bit +(sb!disassem:define-instruction-format (reg-reg/mem-dir 16 + :include 'reg-reg/mem + :default-printer + `(:name + :tab + ,(swap-if 'dir 'reg/mem ", " 'reg))) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as reg-rem/mem, but uses the reg field as a second op code. +(sb!disassem:define-instruction-format (reg/mem 16 + :default-printer '(:name :tab reg/mem)) + (op :fields (list (byte 7 1) (byte 3 11))) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +;;; Same as reg/mem, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (reg/mem-imm 16 + :include 'reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (reg/mem :type 'sized-reg/mem) + (imm :type 'imm-data)) + +;;; Same as reg/mem, but with using the accumulator in the default printer +(sb!disassem:define-instruction-format + (accum-reg/mem 16 + :include 'reg/mem :default-printer '(:name :tab accum ", " reg/mem)) + (reg/mem :type 'reg/mem) ; don't need a size + (accum :type 'accum)) + +;;; Same as reg-reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg-reg/mem 24 + :default-printer + `(:name :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 7 9)) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg) + ;; optional fields + (imm)) + +;;; reg-no-width with #x0f prefix +(sb!disassem:define-instruction-format (ext-reg-no-width 16 + :default-printer '(:name :tab reg)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 5 11)) + (reg :field (byte 3 8) :type 'word-reg)) + +;;; Same as reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg/mem 24 + :default-printer '(:name :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :fields (list (byte 7 9) (byte 3 19))) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +(sb!disassem:define-instruction-format (ext-reg/mem-imm 24 + :include 'ext-reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (imm :type 'imm-data)) + +;;;; This section was added by jrd, for fp instructions. + +;;; regular fp inst to/from registers/memory +(sb!disassem:define-instruction-format (floating-point 16 + :default-printer + `(:name :tab reg/mem)) + (prefix :field (byte 5 3) :value #b11011) + (op :fields (list (byte 3 0) (byte 3 11))) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) :type 'reg/mem)) + +;;; fp insn to/from fp reg +(sb!disassem:define-instruction-format (floating-point-fp 16 + :default-printer `(:name :tab fp-reg)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 3 11))) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + +;;; fp insn to/from fp reg, with the reversed source/destination flag. +(sb!disassem:define-instruction-format + (floating-point-fp-d 16 + :default-printer `(:name :tab ,(swap-if 'd "ST0" ", " 'fp-reg))) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 2 0) (byte 3 11))) + (d :field (byte 1 2)) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + + +;;; (added by (?) pfw) +;;; fp no operand isns +(sb!disassem:define-instruction-format (floating-point-no 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011001) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-3 16 + :default-printer '(:name)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 6 8)))) + +(sb!disassem:define-instruction-format (floating-point-5 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011011) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-st 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011111) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (string-op 8 + :include 'simple + :default-printer '(:name width))) + +(sb!disassem:define-instruction-format (short-cond-jump 16) + (op :field (byte 4 4)) + (cc :field (byte 4 0) :type 'condition-code) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (short-jump 16 + :default-printer '(:name :tab label)) + (const :field (byte 4 4) :value #b1110) + (op :field (byte 4 0)) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (near-cond-jump 16) + (op :fields (list (byte 8 0) (byte 4 12)) :value '(#b00001111 #b1000)) + (cc :field (byte 4 8) :type 'condition-code) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the offset. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + +(sb!disassem:define-instruction-format (near-jump 8 + :default-printer '(:name :tab label)) + (op :field (byte 8 0)) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the address. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + + +(sb!disassem:define-instruction-format (cond-set 24 + :default-printer '('set cc :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b1001) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'byte-reg/mem) + (reg :field (byte 3 19) :value #b000)) + +(sb!disassem:define-instruction-format (cond-move 24 + :default-printer + '('cmov cc :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b0100) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg)) + +(sb!disassem:define-instruction-format (enter-format 32 + :default-printer '(:name + :tab disp + (:unless (:constant 0) + ", " level))) + (op :field (byte 8 0)) + (disp :field (byte 16 8)) + (level :field (byte 8 24))) + +;;; Single byte instruction with an immediate byte argument. +(sb!disassem:define-instruction-format (byte-imm 16 + :default-printer '(:name :tab code)) + (op :field (byte 8 0)) + (code :field (byte 8 8))) + +;;;; primitive emitters + +(define-bitfield-emitter emit-word 16 + (byte 16 0)) + +(define-bitfield-emitter emit-dword 32 + (byte 32 0)) + +(define-bitfield-emitter emit-qword 64 + (byte 64 0)) + +(define-bitfield-emitter emit-byte-with-reg 8 + (byte 5 3) (byte 3 0)) + +(define-bitfield-emitter emit-mod-reg-r/m-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +(define-bitfield-emitter emit-sib-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +(define-bitfield-emitter emit-rex-byte 8 + (byte 4 4) (byte 1 3) (byte 1 2) (byte 1 1) (byte 1 0)) + + + +;;;; fixup emitters + +(defun emit-absolute-fixup (segment fixup &optional quad-p) + (note-fixup segment (if quad-p :absolute64 :absolute) fixup) + (let ((offset (fixup-offset fixup))) + (if (label-p offset) + (emit-back-patch segment + (if quad-p 8 4) + (lambda (segment posn) + (declare (ignore posn)) + (let ((val (- (+ (component-header-length) + (or (label-position offset) + 0)) + other-pointer-lowtag))) + (if quad-p + (emit-qword segment val ) + (emit-dword segment val ))))) + (if quad-p + (emit-qword segment (or offset 0)) + (emit-dword segment (or offset 0)))))) + +(defun emit-relative-fixup (segment fixup) + (note-fixup segment :relative fixup) + (emit-dword segment (or (fixup-offset fixup) 0))) + +;;;; the effective-address (ea) structure + +(defun reg-tn-encoding (tn) + (declare (type tn tn)) + (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + ;; ea only has space for three bits of register number: regs r8 + ;; and up are selected by a REX prefix byte which caller is responsible + ;; for having emitted where necessary already + (let ((offset (mod (tn-offset tn) 16))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) + +(defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) + ;; note that we can represent an EA qith a QWORD size, but EMIT-EA + ;; can't actually emit it on its own: caller also needs to emit REX + ;; prefix + (size nil :type (member :byte :word :dword :qword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) + (disp 0 :type (or (unsigned-byte 32) (signed-byte 32) fixup))) +(def!method print-object ((ea ea) stream) + (cond ((or *print-escape* *print-readably*) + (print-unreadable-object (ea stream :type t) + (format stream + "~S~@[ base=~S~]~@[ index=~S~]~@[ scale=~S~]~@[ disp=~S~]" + (ea-size ea) + (ea-base ea) + (ea-index ea) + (let ((scale (ea-scale ea))) + (if (= scale 1) nil scale)) + (ea-disp ea)))) + (t + (format stream "~A PTR [" (symbol-name (ea-size ea))) + (when (ea-base ea) + (write-string (sb!c::location-print-name (ea-base ea)) stream) + (when (ea-index ea) + (write-string "+" stream))) + (when (ea-index ea) + (write-string (sb!c::location-print-name (ea-index ea)) stream)) + (unless (= (ea-scale ea) 1) + (format stream "*~A" (ea-scale ea))) + (typecase (ea-disp ea) + (null) + (integer + (format stream "~@D" (ea-disp ea))) + (t + (format stream "+~A" (ea-disp ea)))) + (write-char #\] stream)))) + +(defun emit-ea (segment thing reg &optional allow-constants) + (etypecase thing + (tn + ;; this would be eleganter if we had a function that would create + ;; an ea given a tn + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off RBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) + (cond ((< -128 disp 127) + (emit-mod-reg-r/m-byte segment #b01 reg #b101) + (emit-byte segment disp)) + (t + (emit-mod-reg-r/m-byte segment #b10 reg #b101) + (emit-dword segment disp))))) + (constant + (unless allow-constants + (error + "Constant TNs can only be directly used in MOV, PUSH, and CMP.")) + (emit-mod-reg-r/m-byte segment #b00 reg #b100) + (emit-sib-byte segment 1 4 5) ;no base, no index + (emit-absolute-fixup segment + (make-fixup nil + :code-object + (- (* (tn-offset thing) n-word-bytes) + other-pointer-lowtag)))))) + (ea + (let* ((base (ea-base thing)) + (index (ea-index thing)) + (scale (ea-scale thing)) + (disp (ea-disp thing)) + (mod (cond ((or (null base) + (and (eql disp 0) + (not (= (reg-tn-encoding base) #b101)))) + #b00) + ((and (fixnump disp) (<= -128 disp 127)) + #b01) + (t + #b10))) + (r/m (cond (index #b100) + ((null base) #b101) + (t (reg-tn-encoding base))))) + (when (and (= mod 0) (= r/m #b101)) + ;; this is rip-relative in amd64, so we'll use a sib instead + (setf r/m #b100 scale 1)) + (emit-mod-reg-r/m-byte segment mod reg r/m) + (when (= r/m #b100) + (let ((ss (1- (integer-length scale))) + (index (if (null index) + #b100 + (let ((index (reg-tn-encoding index))) + (if (= index #b100) + (error "can't index off of ESP") + index)))) + (base (if (null base) + #b101 + (reg-tn-encoding base)))) + (emit-sib-byte segment ss index base))) + (cond ((= mod #b01) + (emit-byte segment disp)) + ((or (= mod #b10) (null base)) + (if (fixup-p disp) + (emit-absolute-fixup segment disp) + (emit-dword segment disp)))))) + (fixup + (emit-mod-reg-r/m-byte segment #b00 reg #b100) + (emit-sib-byte segment 0 #b100 #b101) + (emit-absolute-fixup segment thing)))) + +(defun fp-reg-tn-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'float-registers))) + +;;; like the above, but for fp-instructions--jrd +(defun emit-fp-op (segment thing op) + (if (fp-reg-tn-p thing) + (emit-byte segment (dpb op (byte 3 3) (dpb (tn-offset thing) + (byte 3 0) + #b11000000))) + (emit-ea segment thing op))) + +(defun byte-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *byte-sc-names*) + t)) + +(defun byte-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :byte)) + (tn + (and (member (sc-name (tn-sc thing)) *byte-sc-names*) t)) + (t nil))) + +(defun word-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *word-sc-names*) + t)) + +(defun word-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :word)) + (tn (and (member (sc-name (tn-sc thing)) *word-sc-names*) t)) + (t nil))) + +(defun dword-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *dword-sc-names*) + t)) + +(defun dword-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :dword)) + (tn + (and (member (sc-name (tn-sc thing)) *dword-sc-names*) t)) + (t nil))) + +(defun qword-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *qword-sc-names*) + t)) + +(defun qword-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :qword)) + (tn + (and (member (sc-name (tn-sc thing)) *qword-sc-names*) t)) + (t nil))) + + +(defun register-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + +(defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) + +;;;; utilities + +(def!constant +operand-size-prefix-byte+ #b01100110) + +(defun maybe-emit-operand-size-prefix (segment size) + (unless (or (eq size :byte) + (eq size :qword) ; REX prefix handles this + (eq size +default-operand-size+)) + (emit-byte segment +operand-size-prefix-byte+))) + +(defun maybe-emit-rex-prefix (segment operand-size r x b) + (labels ((if-hi (r) ;; offset of r8 is 16 + (if (and r (> (tn-offset r) 15)) 1 0))) + (let ((rex-w (if (eq operand-size :qword) 1 0)) + (rex-r (if-hi r)) + (rex-x (if-hi x)) + (rex-b (if-hi b))) + (when (not (zerop (logior rex-w rex-r rex-x rex-b))) + (emit-rex-byte segment #b0100 rex-w rex-r rex-x rex-b))))) + +(defun maybe-emit-rex-for-ea (segment ea reg) + (let ((ea-p (ea-p ea))) ;emit-ea can also be called with a tn + (maybe-emit-rex-prefix segment (operand-size ea) reg + (and ea-p (ea-index ea)) + (cond (ea-p (ea-base ea)) + ((and (tn-p ea) + (eql (sb-name (sc-sb (tn-sc ea))) + 'registers)) + ea) + (t nil))))) + +(defun operand-size (thing) + (typecase thing + (tn + ;; FIXME: might as well be COND instead of having to use #. readmacro + ;; to hack up the code + (case (sc-name (tn-sc thing)) + (#.*qword-sc-names* + :qword) + (#.*dword-sc-names* + :dword) + (#.*word-sc-names* + :word) + (#.*byte-sc-names* + :byte) + ;; added by jrd: float-registers is a separate size (?) + (#.*float-sc-names* + :float) + (#.*double-sc-names* + :double) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea + (ea-size thing)) + (t + nil))) + +(defun matching-operand-size (dst src) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src))) + (if dst-size + (if src-size + (if (eq dst-size src-size) + dst-size + (error "size mismatch: ~S is a ~S and ~S is a ~S." + dst dst-size src src-size)) + dst-size) + (if src-size + src-size + (error "can't tell the size of either ~S or ~S" dst src))))) + +(defun emit-sized-immediate (segment size value &optional quad-p) + (ecase size + (:byte + (emit-byte segment value)) + (:word + (emit-word segment value)) + ((:dword :qword) + ;; except in a very few cases (MOV instructions A1,A3,B8) we expect + ;; dword data bytes even when 64 bit work is being done. So, mostly + ;; we treat quad constants as dwords. + (if (and quad-p (eq size :qword)) + (emit-qword segment value) + (emit-dword segment value))))) + +;;;; general data transfer + +(define-instruction mov (segment dst src) + ;; immediate to register + (:printer reg ((op #b1011) (imm nil :type 'imm-data)) + '(:name :tab reg ", " imm)) + ;; absolute mem to/from accumulator + (:printer simple-dir ((op #b101000) (imm nil :type 'imm-addr)) + `(:name :tab ,(swap-if 'dir 'accum ", " '("[" imm "]")))) + ;; register to/from register/memory + (:printer reg-reg/mem-dir ((op #b100010))) + ;; immediate to register/memory + (:printer reg/mem-imm ((op '(#b1100011 #b000)))) + + (:emitter + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p dst) + (cond ((integerp src) + (maybe-emit-rex-prefix segment size nil nil dst) + (emit-byte-with-reg segment + (if (eq size :byte) + #b10110 + #b10111) + (reg-tn-encoding dst)) + (emit-sized-immediate segment size src (eq size :qword))) + ((and (fixup-p src) (accumulator-p dst)) + (maybe-emit-rex-prefix segment (operand-size src) + nil nil nil) + (emit-byte segment + (if (eq size :byte) + #b10100000 + #b10100001)) + (emit-absolute-fixup segment src (eq size :qword))) + (t + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment + (if (eq size :byte) + #b10001010 + #b10001011)) + (emit-ea segment src (reg-tn-encoding dst) t)))) + ((and (fixup-p dst) (accumulator-p src)) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10100010 #b10100011)) + (emit-absolute-fixup segment dst (eq size :qword))) + ((integerp src) + ;; C7 only deals with 32 bit immediates even if register is + ;; 64 bit: only b8-bf use 64 bit immediates + (maybe-emit-rex-for-ea segment dst nil) + (cond ((typep src '(or (signed-byte 32) (unsigned-byte 32))) + (emit-byte segment + (if (eq size :byte) #b11000110 #b11000111)) + (emit-ea segment dst #b000) + (emit-sized-immediate segment + (case size (:qword :dword) (t size)) + src)) + (t + (aver nil)))) + ((register-p src) + (maybe-emit-rex-for-ea segment dst src) + (emit-byte segment (if (eq size :byte) #b10001000 #b10001001)) + (emit-ea segment dst (reg-tn-encoding src))) + ((fixup-p src) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment #b11000111) + (emit-ea segment dst #b000) + (emit-absolute-fixup segment src)) + (t + (error "bogus arguments to MOV: ~S ~S" dst src)))))) + +(defun emit-move-with-extension (segment dst src signed-p) + (aver (register-p dst)) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src)) + (opcode (if signed-p #b10111110 #b10110110))) + (ecase dst-size + (:word + (aver (eq src-size :byte)) + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + ((:dword :qword) + (ecase src-size + (:byte + (maybe-emit-operand-size-prefix segment :dword) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + (:word + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b00001111) + (emit-byte segment (logior opcode 1)) + (emit-ea segment src (reg-tn-encoding dst))) + (:dword + (aver (eq dst-size :qword)) + ;; dst is in reg, src is in modrm + (let ((ea-p (ea-p src))) + (maybe-emit-rex-prefix segment (if signed-p :qword :dword) dst + (and ea-p (ea-index src)) + (cond (ea-p (ea-base src)) + ((tn-p src) src) + (t nil))) + (emit-byte segment #x63) ;movsxd + ;;(emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))))))))) + +(define-instruction movsx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011111) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src :signed))) + +(define-instruction movzx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011011) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src nil))) + +(define-instruction movsxd (segment dst src) + (:printer reg-reg/mem ((op #x63) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src :signed))) + +;;; this is not a real amd64 instruction, of course +(define-instruction movzxd (segment dst src) + (:printer reg-reg/mem ((op #x63) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src nil))) + +(define-instruction push (segment src) + ;; register + (:printer reg-no-width ((op #b01010))) + ;; register/memory + (:printer reg/mem ((op '(#b1111111 #b110)) (width 1))) + ;; immediate + (:printer byte ((op #b01101010) (imm nil :type 'signed-imm-byte)) + '(:name :tab imm)) + (:printer byte ((op #b01101000) (imm nil :type 'imm-word)) + '(:name :tab imm)) + ;; ### segment registers? + + (:emitter + (cond ((integerp src) + (cond ((<= -128 src 127) + (emit-byte segment #b01101010) + (emit-byte segment src)) + (t + ;; AMD64 manual says no REX needed but is unclear + ;; whether it expects 32 or 64 bit immediate here + (emit-byte segment #b01101000) + (emit-dword segment src)))) + ((fixup-p src) + ;; Interpret the fixup as an immediate dword to push. + (emit-byte segment #b01101000) + (emit-absolute-fixup segment src)) + (t + (let ((size (operand-size src))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src nil) + (cond ((register-p src) + (emit-byte-with-reg segment #b01010 (reg-tn-encoding src))) + (t + (emit-byte segment #b11111111) + (emit-ea segment src #b110 t)))))))) + +(define-instruction pusha (segment) + (:printer byte ((op #b01100000))) + (:emitter + (emit-byte segment #b01100000))) + +(define-instruction pop (segment dst) + (:printer reg-no-width ((op #b01011))) + (:printer reg/mem ((op '(#b1000111 #b000)) (width 1))) + (:emitter + (let ((size (operand-size dst))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst nil) + (cond ((register-p dst) + (emit-byte-with-reg segment #b01011 (reg-tn-encoding dst))) + (t + (emit-byte segment #b10001111) + (emit-ea segment dst #b000)))))) + +(define-instruction popa (segment) + (:printer byte ((op #b01100001))) + (:emitter + (emit-byte segment #b01100001))) + +(define-instruction xchg (segment operand1 operand2) + ;; Register with accumulator. + (:printer reg-no-width ((op #b10010)) '(:name :tab accum ", " reg)) + ;; Register/Memory with Register. + (:printer reg-reg/mem ((op #b1000011))) + (:emitter + (let ((size (matching-operand-size operand1 operand2))) + (maybe-emit-operand-size-prefix segment size) + (labels ((xchg-acc-with-something (acc something) + (if (and (not (eq size :byte)) (register-p something)) + (emit-byte-with-reg segment + #b10010 + (reg-tn-encoding something)) + (xchg-reg-with-something acc something))) + (xchg-reg-with-something (reg something) + (emit-byte segment (if (eq size :byte) #b10000110 #b10000111)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((accumulator-p operand1) + (xchg-acc-with-something operand1 operand2)) + ((accumulator-p operand2) + (xchg-acc-with-something operand2 operand1)) + ((register-p operand1) + (xchg-reg-with-something operand1 operand2)) + ((register-p operand2) + (xchg-reg-with-something operand2 operand1)) + (t + (error "bogus args to XCHG: ~S ~S" operand1 operand2))))))) + +(define-instruction lea (segment dst src) + (:printer reg-reg/mem ((op #b1000110) (width 1))) + (:emitter + (aver (or (dword-reg-p dst) (qword-reg-p dst))) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b10001101) + (emit-ea segment src (reg-tn-encoding dst)))) + +(define-instruction cmpxchg (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1011000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst src) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b10110000 #b10110001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + + +(define-instruction fs-segment-prefix (segment) + (:emitter + (emit-byte segment #x64))) + +;;;; flag control instructions + +;;; CLC -- Clear Carry Flag. +(define-instruction clc (segment) + (:printer byte ((op #b11111000))) + (:emitter + (emit-byte segment #b11111000))) + +;;; CLD -- Clear Direction Flag. +(define-instruction cld (segment) + (:printer byte ((op #b11111100))) + (:emitter + (emit-byte segment #b11111100))) + +;;; CLI -- Clear Iterrupt Enable Flag. +(define-instruction cli (segment) + (:printer byte ((op #b11111010))) + (:emitter + (emit-byte segment #b11111010))) + +;;; CMC -- Complement Carry Flag. +(define-instruction cmc (segment) + (:printer byte ((op #b11110101))) + (:emitter + (emit-byte segment #b11110101))) + +;;; LAHF -- Load AH into flags. +(define-instruction lahf (segment) + (:printer byte ((op #b10011111))) + (:emitter + (emit-byte segment #b10011111))) + +;;; POPF -- Pop flags. +(define-instruction popf (segment) + (:printer byte ((op #b10011101))) + (:emitter + (emit-byte segment #b10011101))) + +;;; PUSHF -- push flags. +(define-instruction pushf (segment) + (:printer byte ((op #b10011100))) + (:emitter + (emit-byte segment #b10011100))) + +;;; SAHF -- Store AH into flags. +(define-instruction sahf (segment) + (:printer byte ((op #b10011110))) + (:emitter + (emit-byte segment #b10011110))) + +;;; STC -- Set Carry Flag. +(define-instruction stc (segment) + (:printer byte ((op #b11111001))) + (:emitter + (emit-byte segment #b11111001))) + +;;; STD -- Set Direction Flag. +(define-instruction std (segment) + (:printer byte ((op #b11111101))) + (:emitter + (emit-byte segment #b11111101))) + +;;; STI -- Set Interrupt Enable Flag. +(define-instruction sti (segment) + (:printer byte ((op #b11111011))) + (:emitter + (emit-byte segment #b11111011))) + +;;;; arithmetic + +(defun emit-random-arith-inst (name segment dst src opcode + &optional allow-constants) + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond + ((integerp src) + (cond ((and (not (eq size :byte)) (<= -128 src 127)) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment #b10000011) + (emit-ea segment dst opcode allow-constants) + (emit-byte segment src)) + ((accumulator-p dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) + #b00000100 + #b00000101))) + (emit-sized-immediate segment size src)) + (t + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b10000000 #b10000001)) + (emit-ea segment dst opcode allow-constants) + (emit-sized-immediate segment size src)))) + ((register-p src) + (maybe-emit-rex-for-ea segment dst src) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000000 #b00000001))) + (emit-ea segment dst (reg-tn-encoding src) allow-constants)) + ((register-p dst) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000010 #b00000011))) + (emit-ea segment src (reg-tn-encoding dst) allow-constants)) + (t + (error "bogus operands to ~A" name))))) + +(eval-when (:compile-toplevel :execute) + (defun arith-inst-printer-list (subop) + `((accum-imm ((op ,(dpb subop (byte 3 2) #b0000010)))) + (reg/mem-imm ((op (#b1000000 ,subop)))) + (reg/mem-imm ((op (#b1000001 ,subop)) + (imm nil :type signed-imm-byte))) + (reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000)))))) + ) + +(define-instruction add (segment dst src) + (:printer-list (arith-inst-printer-list #b000)) + (:emitter (emit-random-arith-inst "ADD" segment dst src #b000))) + +(define-instruction adc (segment dst src) + (:printer-list (arith-inst-printer-list #b010)) + (:emitter (emit-random-arith-inst "ADC" segment dst src #b010))) + +(define-instruction sub (segment dst src) + (:printer-list (arith-inst-printer-list #b101)) + (:emitter (emit-random-arith-inst "SUB" segment dst src #b101))) + +(define-instruction sbb (segment dst src) + (:printer-list (arith-inst-printer-list #b011)) + (:emitter (emit-random-arith-inst "SBB" segment dst src #b011))) + +(define-instruction cmp (segment dst src) + (:printer-list (arith-inst-printer-list #b111)) + (:emitter (emit-random-arith-inst "CMP" segment dst src #b111 t))) + +(define-instruction inc (segment dst) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b000)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond #+nil ; these opcodes become REX prefixes in x86-64 + ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01000 (reg-tn-encoding dst))) + (t + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b000)))))) + +(define-instruction dec (segment dst) + ;; Register. + (:printer reg-no-width ((op #b01001))) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b001)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01001 (reg-tn-encoding dst))) + (t + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b001)))))) + +(define-instruction neg (segment dst) + (:printer reg/mem ((op '(#b1111011 #b011)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b011)))) + +(define-instruction aaa (segment) + (:printer byte ((op #b00110111))) + (:emitter + (emit-byte segment #b00110111))) + +(define-instruction aas (segment) + (:printer byte ((op #b00111111))) + (:emitter + (emit-byte segment #b00111111))) + +(define-instruction daa (segment) + (:printer byte ((op #b00100111))) + (:emitter + (emit-byte segment #b00100111))) + +(define-instruction das (segment) + (:printer byte ((op #b00101111))) + (:emitter + (emit-byte segment #b00101111))) + +(define-instruction mul (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b100)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b100)))) + +(define-instruction imul (segment dst &optional src1 src2) + (:printer accum-reg/mem ((op '(#b1111011 #b101)))) + (:printer ext-reg-reg/mem ((op #b1010111))) + (:printer reg-reg/mem ((op #b0110100) (width 1) (imm nil :type 'imm-word)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:printer reg-reg/mem ((op #b0110101) (width 1) + (imm nil :type 'signed-imm-byte)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:emitter + (flet ((r/m-with-immed-to-reg (reg r/m immed) + (let* ((size (matching-operand-size reg r/m)) + (sx (and (not (eq size :byte)) (<= -128 immed 127)))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment r/m reg) + (emit-byte segment (if sx #b01101011 #b01101001)) + (emit-ea segment r/m (reg-tn-encoding reg)) + (if sx + (emit-byte segment immed) + (emit-sized-immediate segment size immed))))) + (cond (src2 + (r/m-with-immed-to-reg dst src1 src2)) + (src1 + (if (integerp src1) + (r/m-with-immed-to-reg dst dst src1) + (let ((size (matching-operand-size dst src1))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src1 dst) + (emit-byte segment #b00001111) + (emit-byte segment #b10101111) + (emit-ea segment src1 (reg-tn-encoding dst))))) + (t + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b101))))))) + +(define-instruction div (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b110)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b110)))) + +(define-instruction idiv (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b111)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b111)))) + +(define-instruction bswap (segment dst) + (:printer ext-reg-no-width ((op #b11001))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-rex-prefix segment size nil nil dst) + (emit-byte segment #x0f) + (emit-byte-with-reg segment #b11001 (reg-tn-encoding dst))))) + + +(define-instruction aad (segment) + (:printer two-bytes ((op '(#b11010101 #b00001010)))) + (:emitter + (emit-byte segment #b11010101) + (emit-byte segment #b00001010))) + +(define-instruction aam (segment) + (:printer two-bytes ((op '(#b11010100 #b00001010)))) + (:emitter + (emit-byte segment #b11010100) + (emit-byte segment #b00001010))) + +;;; CBW -- Convert Byte to Word. AX <- sign_xtnd(AL) +(define-instruction cbw (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011000))) + +;;; CWDE -- Convert Word To Double Word Extened. EAX <- sign_xtnd(AX) +(define-instruction cwde (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011000))) + +;;; CWD -- Convert Word to Double Word. DX:AX <- sign_xtnd(AX) +(define-instruction cwd (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011001))) + +;;; CDQ -- Convert Double Word to Quad Word. EDX:EAX <- sign_xtnd(EAX) +(define-instruction cdq (segment) + (:printer byte ((op #b10011001))) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011001))) + +;;; CQO -- Convert Quad or Octaword. RDX:RAX <- sign_xtnd(RAX) +(define-instruction cqo (segment) + (:printer byte ((op #b10011001))) + (:emitter + (maybe-emit-rex-prefix segment :qword nil nil nil) + (emit-byte segment #b10011001))) + +(define-instruction xadd (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1100000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst src) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b11000000 #b11000001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + +;;;; logic + +(defun emit-shift-inst (segment dst amount opcode) + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (multiple-value-bind (major-opcode immed) + (case amount + (:cl (values #b11010010 nil)) + (1 (values #b11010000 nil)) + (t (values #b11000000 t))) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment + (if (eq size :byte) major-opcode (logior major-opcode 1))) + (emit-ea segment dst opcode) + (when immed + (emit-byte segment amount))))) + +(eval-when (:compile-toplevel :execute) + (defun shift-inst-printer-list (subop) + `((reg/mem ((op (#b1101000 ,subop))) + (:name :tab reg/mem ", 1")) + (reg/mem ((op (#b1101001 ,subop))) + (:name :tab reg/mem ", " 'cl)) + (reg/mem-imm ((op (#b1100000 ,subop)) + (imm nil :type signed-imm-byte)))))) + +(define-instruction rol (segment dst amount) + (:printer-list + (shift-inst-printer-list #b000)) + (:emitter + (emit-shift-inst segment dst amount #b000))) + +(define-instruction ror (segment dst amount) + (:printer-list + (shift-inst-printer-list #b001)) + (:emitter + (emit-shift-inst segment dst amount #b001))) + +(define-instruction rcl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b010)) + (:emitter + (emit-shift-inst segment dst amount #b010))) + +(define-instruction rcr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b011)) + (:emitter + (emit-shift-inst segment dst amount #b011))) + +(define-instruction shl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b100)) + (:emitter + (emit-shift-inst segment dst amount #b100))) + +(define-instruction shr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b101)) + (:emitter + (emit-shift-inst segment dst amount #b101))) + +(define-instruction sar (segment dst amount) + (:printer-list + (shift-inst-printer-list #b111)) + (:emitter + (emit-shift-inst segment dst amount #b111))) + +(defun emit-double-shift (segment opcode dst src amt) + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "Double shifts can only be used with words.")) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst src) + (emit-byte segment #b00001111) + (emit-byte segment (dpb opcode (byte 1 3) + (if (eq amt :cl) #b10100101 #b10100100))) + (emit-ea segment dst (reg-tn-encoding src)) + (unless (eq amt :cl) + (emit-byte segment amt)))) + +(eval-when (:compile-toplevel :execute) + (defun double-shift-inst-printer-list (op) + `(#+nil + (ext-reg-reg/mem-imm ((op ,(logior op #b100)) + (imm nil :type signed-imm-byte))) + (ext-reg-reg/mem ((op ,(logior op #b101))) + (:name :tab reg/mem ", " 'cl))))) + +(define-instruction shld (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b10100000)) + (:emitter + (emit-double-shift segment #b0 dst src amt))) + +(define-instruction shrd (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b10101000)) + (:emitter + (emit-double-shift segment #b1 dst src amt))) + +(define-instruction and (segment dst src) + (:printer-list + (arith-inst-printer-list #b100)) + (:emitter + (emit-random-arith-inst "AND" segment dst src #b100))) + +(define-instruction test (segment this that) + (:printer accum-imm ((op #b1010100))) + (:printer reg/mem-imm ((op '(#b1111011 #b000)))) + (:printer reg-reg/mem ((op #b1000010))) + (:emitter + (let ((size (matching-operand-size this that))) + (maybe-emit-operand-size-prefix segment size) + (flet ((test-immed-and-something (immed something) + (cond ((accumulator-p something) + (emit-byte segment + (if (eq size :byte) #b10101000 #b10101001)) + (emit-sized-immediate segment size immed)) + (t + (maybe-emit-rex-for-ea segment something nil) + (emit-byte segment + (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment something #b000) + (emit-sized-immediate segment size immed)))) + (test-reg-and-something (reg something) + (maybe-emit-rex-for-ea segment something reg) + (emit-byte segment (if (eq size :byte) #b10000100 #b10000101)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((integerp that) + (test-immed-and-something that this)) + ((integerp this) + (test-immed-and-something this that)) + ((register-p this) + (test-reg-and-something this that)) + ((register-p that) + (test-reg-and-something that this)) + (t + (error "bogus operands for TEST: ~S and ~S" this that))))))) + +(define-instruction or (segment dst src) + (:printer-list + (arith-inst-printer-list #b001)) + (:emitter + (emit-random-arith-inst "OR" segment dst src #b001))) + +(define-instruction xor (segment dst src) + (:printer-list + (arith-inst-printer-list #b110)) + (:emitter + (emit-random-arith-inst "XOR" segment dst src #b110))) + +(define-instruction not (segment dst) + (:printer reg/mem ((op '(#b1111011 #b010)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b010)))) + +;;;; string manipulation + +(define-instruction cmps (segment size) + (:printer string-op ((op #b1010011))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10100110 #b10100111)))) + +(define-instruction ins (segment acc) + (:printer string-op ((op #b0110110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b01101100 #b01101101))))) + +(define-instruction lods (segment acc) + (:printer string-op ((op #b1010110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10101100 #b10101101))))) + +(define-instruction movs (segment size) + (:printer string-op ((op #b1010010))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10100100 #b10100101)))) + +(define-instruction outs (segment acc) + (:printer string-op ((op #b0110111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b01101110 #b01101111))))) + +(define-instruction scas (segment acc) + (:printer string-op ((op #b1010111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10101110 #b10101111))))) + +(define-instruction stos (segment acc) + (:printer string-op ((op #b1010101))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-prefix segment size nil nil nil) + (emit-byte segment (if (eq size :byte) #b10101010 #b10101011))))) + +(define-instruction xlat (segment) + (:printer byte ((op #b11010111))) + (:emitter + (emit-byte segment #b11010111))) + +(define-instruction rep (segment) + (:emitter + (emit-byte segment #b11110010))) + +(define-instruction repe (segment) + (:printer byte ((op #b11110011))) + (:emitter + (emit-byte segment #b11110011))) + +(define-instruction repne (segment) + (:printer byte ((op #b11110010))) + (:emitter + (emit-byte segment #b11110010))) + + +;;;; bit manipulation + +(define-instruction bsf (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 0))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b00001111) + (emit-byte segment #b10111100) + (emit-ea segment src (reg-tn-encoding dst))))) + +(define-instruction bsr (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 1))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b00001111) + (emit-byte segment #b10111101) + (emit-ea segment src (reg-tn-encoding dst))))) + +(defun emit-bit-test-and-mumble (segment src index opcode) + (let ((size (operand-size src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (cond ((integerp index) + (maybe-emit-rex-for-ea segment src nil) + (emit-byte segment #b00001111) + (emit-byte segment #b10111010) + (emit-ea segment src opcode) + (emit-byte segment index)) + (t + (maybe-emit-rex-for-ea segment src index) + (emit-byte segment #b00001111) + (emit-byte segment (dpb opcode (byte 3 3) #b10000011)) + (emit-ea segment src (reg-tn-encoding index)))))) + +(eval-when (:compile-toplevel :execute) + (defun bit-test-inst-printer-list (subop) + `((ext-reg/mem-imm ((op (#b1011101 ,subop)) + (reg/mem nil :type word-reg/mem) + (imm nil :type imm-data) + (width 0))) + (ext-reg-reg/mem ((op ,(dpb subop (byte 3 2) #b1000001)) + (width 1)) + (:name :tab reg/mem ", " reg))))) + +(define-instruction bt (segment src index) + (:printer-list (bit-test-inst-printer-list #b100)) + (:emitter + (emit-bit-test-and-mumble segment src index #b100))) + +(define-instruction btc (segment src index) + (:printer-list (bit-test-inst-printer-list #b111)) + (:emitter + (emit-bit-test-and-mumble segment src index #b111))) + +(define-instruction btr (segment src index) + (:printer-list (bit-test-inst-printer-list #b110)) + (:emitter + (emit-bit-test-and-mumble segment src index #b110))) + +(define-instruction bts (segment src index) + (:printer-list (bit-test-inst-printer-list #b101)) + (:emitter + (emit-bit-test-and-mumble segment src index #b101))) + + +;;;; control transfer + +(define-instruction call (segment where) + (:printer near-jump ((op #b11101000))) + (:printer reg/mem ((op '(#b1111111 #b010)) (width 1))) + (:emitter + (typecase where + (label + (emit-byte segment #b11101000) ; 32 bit relative + (emit-back-patch segment + 4 + (lambda (segment posn) + (emit-dword segment + (- (label-position where) + (+ posn 4)))))) + (fixup + (emit-byte segment #b11101000) + (emit-relative-fixup segment where)) + (t + (emit-byte segment #b11111111) + (emit-ea segment where #b010))))) + +(defun emit-byte-displacement-backpatch (segment target) + (emit-back-patch segment + 1 + (lambda (segment posn) + (let ((disp (- (label-position target) (1+ posn)))) + (aver (<= -128 disp 127)) + (emit-byte segment disp))))) + +(define-instruction jmp (segment cond &optional where) + ;; conditional jumps + (:printer short-cond-jump ((op #b0111)) '('j cc :tab label)) + (:printer near-cond-jump () '('j cc :tab label)) + ;; unconditional jumps + (:printer short-jump ((op #b1011))) + (:printer near-jump ((op #b11101001)) ) + (:printer reg/mem ((op '(#b1111111 #b100)) (width 1))) + (:emitter + (cond (where + (emit-chooser + segment 6 2 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b01110000)) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 6)))) + (emit-byte segment #b00001111) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b10000000)) + (emit-dword segment disp))))) + ((label-p (setq where cond)) + (emit-chooser + segment 5 0 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 5)))) + (emit-byte segment #b11101001) + (emit-dword segment disp))))) + ((fixup-p where) + (emit-byte segment #b11101001) + (emit-relative-fixup segment where)) + (t + (unless (or (ea-p where) (tn-p where)) + (error "don't know what to do with ~A" where)) + (emit-byte segment #b11111111) + (emit-ea segment where #b100))))) + +(define-instruction jmp-short (segment label) + (:emitter + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment label))) + +(define-instruction ret (segment &optional stack-delta) + (:printer byte ((op #b11000011))) + (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16)) + '(:name :tab imm)) + (:emitter + (cond (stack-delta + (emit-byte segment #b11000010) + (emit-word segment stack-delta)) + (t + (emit-byte segment #b11000011))))) + +(define-instruction jecxz (segment target) + (:printer short-jump ((op #b0011))) + (:emitter + (emit-byte segment #b11100011) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loop (segment target) + (:printer short-jump ((op #b0010))) + (:emitter + (emit-byte segment #b11100010) ; pfw this was 11100011, or jecxz!!!! + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopz (segment target) + (:printer short-jump ((op #b0001))) + (:emitter + (emit-byte segment #b11100001) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopnz (segment target) + (:printer short-jump ((op #b0000))) + (:emitter + (emit-byte segment #b11100000) + (emit-byte-displacement-backpatch segment target))) + +;;;; conditional move +(define-instruction cmov (segment cond dst src) + (:printer cond-move ()) + (:emitter + (aver (register-p dst)) + (let ((size (matching-operand-size dst src))) + (aver (or (eq size :word) (eq size :dword) (eq size :qword) )) + (maybe-emit-operand-size-prefix segment size)) + (maybe-emit-rex-for-ea segment src dst) + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b01000000)) + (emit-ea segment src (reg-tn-encoding dst)))) + +;;;; conditional byte set + +(define-instruction set (segment dst cond) + (:printer cond-set ()) + (:emitter + (maybe-emit-rex-for-ea segment dst nil) + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b10010000)) + (emit-ea segment dst #b000))) + +;;;; enter/leave + +(define-instruction enter (segment disp &optional (level 0)) + (:declare (type (unsigned-byte 16) disp) + (type (unsigned-byte 8) level)) + (:printer enter-format ((op #b11001000))) + (:emitter + (emit-byte segment #b11001000) + (emit-word segment disp) + (emit-byte segment level))) + +(define-instruction leave (segment) + (:printer byte ((op #b11001001))) + (:emitter + (emit-byte segment #b11001001))) + +;;;; interrupt instructions + +(defun snarf-error-junk (sap offset &optional length-only) + (let* ((length (sb!sys:sap-ref-8 sap offset)) + (vector (make-array length :element-type '(unsigned-byte 8)))) + (declare (type sb!sys:system-area-pointer sap) + (type (unsigned-byte 8) length) + (type (simple-array (unsigned-byte 8) (*)) vector)) + (cond (length-only + (values 0 (1+ length) nil nil)) + (t + (sb!kernel:copy-from-system-area sap (* n-byte-bits (1+ offset)) + vector (* n-word-bits + vector-data-offset) + (* length n-byte-bits)) + (collect ((sc-offsets) + (lengths)) + (lengths 1) ; the length byte + (let* ((index 0) + (error-number (sb!c:read-var-integer vector index))) + (lengths index) + (loop + (when (>= index length) + (return)) + (let ((old-index index)) + (sc-offsets (sb!c:read-var-integer vector index)) + (lengths (- index old-index)))) + (values error-number + (1+ length) + (sc-offsets) + (lengths)))))))) + +#| +(defmacro break-cases (breaknum &body cases) + (let ((bn-temp (gensym))) + (collect ((clauses)) + (dolist (case cases) + (clauses `((= ,bn-temp ,(car case)) ,@(cdr case)))) + `(let ((,bn-temp ,breaknum)) + (cond ,@(clauses)))))) +|# + +(defun break-control (chunk inst stream dstate) + (declare (ignore inst)) + (flet ((nt (x) (if stream (sb!disassem:note x dstate)))) + ;; FIXME: Make sure that BYTE-IMM-CODE is defined. The genesis + ;; map has it undefined; and it should be easier to look in the target + ;; Lisp (with (DESCRIBE 'BYTE-IMM-CODE)) than to definitively deduce + ;; from first principles whether it's defined in some way that genesis + ;; can't grok. + (case (byte-imm-code chunk dstate) + (#.error-trap + (nt "error trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.cerror-trap + (nt "cerror trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.breakpoint-trap + (nt "breakpoint trap")) + (#.pending-interrupt-trap + (nt "pending interrupt trap")) + (#.halt-trap + (nt "halt trap")) + (#.fun-end-breakpoint-trap + (nt "function end breakpoint trap"))))) + +(define-instruction break (segment code) + (:declare (type (unsigned-byte 8) code)) + (:printer byte-imm ((op #b11001100)) '(:name :tab code) + :control #'break-control) + (:emitter + (emit-byte segment #b11001100) + (emit-byte segment code))) + +(define-instruction int (segment number) + (:declare (type (unsigned-byte 8) number)) + (:printer byte-imm ((op #b11001101))) + (:emitter + (etypecase number + ((member 3) + (emit-byte segment #b11001100)) + ((unsigned-byte 8) + (emit-byte segment #b11001101) + (emit-byte segment number))))) + +(define-instruction into (segment) + (:printer byte ((op #b11001110))) + (:emitter + (emit-byte segment #b11001110))) + +(define-instruction bound (segment reg bounds) + (:emitter + (let ((size (matching-operand-size reg bounds))) + (when (eq size :byte) + (error "can't bounds-test bytes: ~S" reg)) + (maybe-emit-operand-size-prefix segment size) + (maybe-emit-rex-for-ea segment bounds reg) + (emit-byte segment #b01100010) + (emit-ea segment bounds (reg-tn-encoding reg))))) + +(define-instruction iret (segment) + (:printer byte ((op #b11001111))) + (:emitter + (emit-byte segment #b11001111))) + +;;;; processor control + +(define-instruction hlt (segment) + (:printer byte ((op #b11110100))) + (:emitter + (emit-byte segment #b11110100))) + +(define-instruction nop (segment) + (:printer byte ((op #b10010000))) + (:emitter + (emit-byte segment #b10010000))) + +(define-instruction wait (segment) + (:printer byte ((op #b10011011))) + (:emitter + (emit-byte segment #b10011011))) + +(define-instruction lock (segment) + (:printer byte ((op #b11110000))) + (:emitter + (emit-byte segment #b11110000))) + +;;;; miscellaneous hackery + +(define-instruction byte (segment byte) + (:emitter + (emit-byte segment byte))) + +(define-instruction word (segment word) + (:emitter + (emit-word segment word))) + +(define-instruction dword (segment dword) + (:emitter + (emit-dword segment dword))) + +(defun emit-header-data (segment type) + (emit-back-patch segment + n-word-bytes + (lambda (segment posn) + (emit-qword segment + (logior type + (ash (+ posn + (component-header-length)) + (- n-widetag-bits + word-shift))))))) + +(define-instruction simple-fun-header-word (segment) + (:emitter + (emit-header-data segment simple-fun-header-widetag))) + +(define-instruction lra-header-word (segment) + (:emitter + (emit-header-data segment return-pc-header-widetag))) + +;;;; fp instructions +;;;; +;;;; Note: We treat the single-precision and double-precision variants +;;;; as separate instructions. + +;;; Load single to st(0). +(define-instruction fld (segment source) + (:printer floating-point ((op '(#b001 #b000)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011001) + (emit-fp-op segment source #b000))) + +;;; Load double to st(0). +(define-instruction fldd (segment source) + (:printer floating-point ((op '(#b101 #b000)))) + (:printer floating-point-fp ((op '(#b001 #b000)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011001) + (progn + (maybe-emit-rex-for-ea segment source nil) + (emit-byte segment #b11011101))) + (emit-fp-op segment source #b000))) + +;;; Load long to st(0). +(define-instruction fldl (segment source) + (:printer floating-point ((op '(#b011 #b101)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011011) + (emit-fp-op segment source #b101))) + +;;; Store single from st(0). +(define-instruction fst (segment dest) + (:printer floating-point ((op '(#b001 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (maybe-emit-rex-for-ea segment dest nil) + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b010))))) + +;;; Store double from st(0). +(define-instruction fstd (segment dest) + (:printer floating-point ((op '(#b101 #b010)))) + (:printer floating-point-fp ((op '(#b101 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (maybe-emit-rex-for-ea segment dest nil) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010))))) + +;;; Arithmetic ops are all done with at least one operand at top of +;;; stack. The other operand is is another register or a 32/64 bit +;;; memory loc. + +;;; dtc: I've tried to follow the Intel ASM386 conventions, but note +;;; that these conflict with the Gdb conventions for binops. To reduce +;;; the confusion I've added comments showing the mathamatical +;;; operation and the two syntaxes. By the ASM386 convention the +;;; instruction syntax is: +;;; +;;; Fop Source +;;; or Fop Destination, Source +;;; +;;; If only one operand is given then it is the source and the +;;; destination is ST(0). There are reversed forms of the fsub and +;;; fdiv instructions inducated by an 'R' suffix. +;;; +;;; The mathematical operation for the non-reverse form is always: +;;; destination = destination op source +;;; +;;; For the reversed form it is: +;;; destination = source op destination +;;; +;;; The instructions below only accept one operand at present which is +;;; usually the source. I've hack in extra instructions to implement +;;; the fops with a ST(i) destination, these have a -sti suffix and +;;; the operand is the destination with the source being ST(0). + +;;; Add single: +;;; st(0) = st(0) + memory or st(i). +(define-instruction fadd (segment source) + (:printer floating-point ((op '(#b000 #b000)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b000))) + +;;; Add double: +;;; st(0) = st(0) + memory or st(i). +(define-instruction faddd (segment source) + (:printer floating-point ((op '(#b100 #b000)))) + (:printer floating-point-fp ((op '(#b000 #b000)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b000))) + +;;; Add double destination st(i): +;;; st(i) = st(0) + st(i). +(define-instruction fadd-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b000))) +;;; with pop +(define-instruction faddp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b000))) + +;;; Subtract single: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsub (segment source) + (:printer floating-point ((op '(#b000 #b100)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b100))) + +;;; Subtract single, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubr (segment source) + (:printer floating-point ((op '(#b000 #b101)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b101))) + +;;; Subtract double: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsubd (segment source) + (:printer floating-point ((op '(#b100 #b100)))) + (:printer floating-point-fp ((op '(#b000 #b100)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (progn + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011100))) + (emit-fp-op segment source #b100))) + +;;; Subtract double, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubrd (segment source) + (:printer floating-point ((op '(#b100 #b101)))) + (:printer floating-point-fp ((op '(#b000 #b101)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (progn + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011100))) + (emit-fp-op segment source #b101))) + +;;; Subtract double, destination st(i): +;;; st(i) = st(i) - st(0). +;;; +;;; ASM386 syntax: FSUB ST(i), ST +;;; Gdb syntax: fsubr %st,%st(i) +(define-instruction fsub-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b101))) +;;; with a pop +(define-instruction fsubp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b101))) + +;;; Subtract double, reverse, destination st(i): +;;; st(i) = st(0) - st(i). +;;; +;;; ASM386 syntax: FSUBR ST(i), ST +;;; Gdb syntax: fsub %st,%st(i) +(define-instruction fsubr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b100))) +;;; with a pop +(define-instruction fsubrp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b100))) + +;;; Multiply single: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmul (segment source) + (:printer floating-point ((op '(#b000 #b001)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b001))) + +;;; Multiply double: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmuld (segment source) + (:printer floating-point ((op '(#b100 #b001)))) + (:printer floating-point-fp ((op '(#b000 #b001)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (progn + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011100))) + (emit-fp-op segment source #b001))) + +;;; Multiply double, destination st(i): +;;; st(i) = st(i) * st(0). +(define-instruction fmul-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b001)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b001))) + +;;; Divide single: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdiv (segment source) + (:printer floating-point ((op '(#b000 #b110)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b110))) + +;;; Divide single, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivr (segment source) + (:printer floating-point ((op '(#b000 #b111)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment source #b111))) + +;;; Divide double: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdivd (segment source) + (:printer floating-point ((op '(#b100 #b110)))) + (:printer floating-point-fp ((op '(#b000 #b110)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (progn + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011100))) + (emit-fp-op segment source #b110))) + +;;; Divide double, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivrd (segment source) + (:printer floating-point ((op '(#b100 #b111)))) + (:printer floating-point-fp ((op '(#b000 #b111)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (progn + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011100))) + (emit-fp-op segment source #b111))) + +;;; Divide double, destination st(i): +;;; st(i) = st(i) / st(0). +;;; +;;; ASM386 syntax: FDIV ST(i), ST +;;; Gdb syntax: fdivr %st,%st(i) +(define-instruction fdiv-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b111)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b111))) + +;;; Divide double, reverse, destination st(i): +;;; st(i) = st(0) / st(i). +;;; +;;; ASM386 syntax: FDIVR ST(i), ST +;;; Gdb syntax: fdiv %st,%st(i) +(define-instruction fdivr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b110)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b110))) + +;;; Exchange fr0 with fr(n). (There is no double precision variant.) +(define-instruction fxch (segment source) + (:printer floating-point-fp ((op '(#b001 #b001)))) + (:emitter + (unless (and (tn-p source) + (eq (sb-name (sc-sb (tn-sc source))) 'float-registers)) + (cl:break)) + (emit-byte segment #b11011001) + (emit-fp-op segment source #b001))) + +;;; Push 32-bit integer to st0. +(define-instruction fild (segment source) + (:printer floating-point ((op '(#b011 #b000)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011011) + (emit-fp-op segment source #b000))) + +;;; Push 64-bit integer to st0. +(define-instruction fildl (segment source) + (:printer floating-point ((op '(#b111 #b101)))) + (:emitter + (and (not (fp-reg-tn-p source)) + (maybe-emit-rex-for-ea segment source nil)) + (emit-byte segment #b11011111) + (emit-fp-op segment source #b101))) + +;;; Store 32-bit integer. +(define-instruction fist (segment dest) + (:printer floating-point ((op '(#b011 #b010)))) + (:emitter + (and (not (fp-reg-tn-p dest)) + (maybe-emit-rex-for-ea segment dest nil)) + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b010))) + +;;; Store and pop 32-bit integer. +(define-instruction fistp (segment dest) + (:printer floating-point ((op '(#b011 #b011)))) + (:emitter + (and (not (fp-reg-tn-p dest)) + (maybe-emit-rex-for-ea segment dest nil)) + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b011))) + +;;; Store and pop 64-bit integer. +(define-instruction fistpl (segment dest) + (:printer floating-point ((op '(#b111 #b111)))) + (:emitter + (and (not (fp-reg-tn-p dest)) + (maybe-emit-rex-for-ea segment dest nil)) + (emit-byte segment #b11011111) + (emit-fp-op segment dest #b111))) + +;;; Store single from st(0) and pop. +(define-instruction fstp (segment dest) + (:printer floating-point ((op '(#b001 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (maybe-emit-rex-for-ea segment dest nil) + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b011))))) + +;;; Store double from st(0) and pop. +(define-instruction fstpd (segment dest) + (:printer floating-point ((op '(#b101 #b011)))) + (:printer floating-point-fp ((op '(#b101 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (maybe-emit-rex-for-ea segment dest nil) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011))))) + +;;; Store long from st(0) and pop. +(define-instruction fstpl (segment dest) + (:printer floating-point ((op '(#b011 #b111)))) + (:emitter + (and (not (fp-reg-tn-p dest)) + (maybe-emit-rex-for-ea segment dest nil)) + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b111))) + +;;; Decrement stack-top pointer. +(define-instruction fdecstp (segment) + (:printer floating-point-no ((op #b10110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110110))) + +;;; Increment stack-top pointer. +(define-instruction fincstp (segment) + (:printer floating-point-no ((op #b10111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110111))) + +;;; Free fp register. +(define-instruction ffree (segment dest) + (:printer floating-point-fp ((op '(#b101 #b000)))) + (:emitter + (and (not (fp-reg-tn-p dest)) + (maybe-emit-rex-for-ea segment dest nil)) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b000))) + +(define-instruction fabs (segment) + (:printer floating-point-no ((op #b00001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100001))) + +(define-instruction fchs (segment) + (:printer floating-point-no ((op #b00000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100000))) + +(define-instruction frndint(segment) + (:printer floating-point-no ((op #b11100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111100))) + +;;; Initialize NPX. +(define-instruction fninit(segment) + (:printer floating-point-5 ((op #b00011))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100011))) + +;;; Store Status Word to AX. +(define-instruction fnstsw(segment) + (:printer floating-point-st ((op #b00000))) + (:emitter + (emit-byte segment #b11011111) + (emit-byte segment #b11100000))) + +;;; Load Control Word. +;;; +;;; src must be a memory location +(define-instruction fldcw(segment src) + (:printer floating-point ((op '(#b001 #b101)))) + (:emitter + (and (not (fp-reg-tn-p src)) + (maybe-emit-rex-for-ea segment src nil)) + (emit-byte segment #b11011001) + (emit-fp-op segment src #b101))) + +;;; Store Control Word. +(define-instruction fnstcw(segment dst) + (:printer floating-point ((op '(#b001 #b111)))) + (:emitter + (and (not (fp-reg-tn-p dst)) + (maybe-emit-rex-for-ea segment dst nil)) + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b111))) + +;;; Store FP Environment. +(define-instruction fstenv(segment dst) + (:printer floating-point ((op '(#b001 #b110)))) + (:emitter + (and (not (fp-reg-tn-p dst)) + (maybe-emit-rex-for-ea segment dst nil)) + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b110))) + +;;; Restore FP Environment. +(define-instruction fldenv(segment src) + (:printer floating-point ((op '(#b001 #b100)))) + (:emitter + (and (not (fp-reg-tn-p src)) + (maybe-emit-rex-for-ea segment src nil)) + (emit-byte segment #b11011001) + (emit-fp-op segment src #b100))) + +;;; Save FP State. +(define-instruction fsave(segment dst) + (:printer floating-point ((op '(#b101 #b110)))) + (:emitter + (and (not (fp-reg-tn-p dst)) + (maybe-emit-rex-for-ea segment dst nil)) + (emit-byte segment #b11011101) + (emit-fp-op segment dst #b110))) + +;;; Restore FP State. +(define-instruction frstor(segment src) + (:printer floating-point ((op '(#b101 #b100)))) + (:emitter + (and (not (fp-reg-tn-p src)) + (maybe-emit-rex-for-ea segment src nil)) + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +;;; Clear exceptions. +(define-instruction fnclex(segment) + (:printer floating-point-5 ((op #b00010))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100010))) + +;;; comparison +(define-instruction fcom (segment src) + (:printer floating-point ((op '(#b000 #b010)))) + (:emitter + (and (not (fp-reg-tn-p src)) + (maybe-emit-rex-for-ea segment src nil)) + (emit-byte segment #b11011000) + (emit-fp-op segment src #b010))) + +(define-instruction fcomd (segment src) + (:printer floating-point ((op '(#b100 #b010)))) + (:printer floating-point-fp ((op '(#b000 #b010)))) + (:emitter + (if (fp-reg-tn-p src) + (emit-byte segment #b11011000) + (progn + (maybe-emit-rex-for-ea segment src nil) + (emit-byte segment #b11011100))) + (emit-fp-op segment src #b010))) + +;;; Compare ST1 to ST0, popping the stack twice. +(define-instruction fcompp (segment) + (:printer floating-point-3 ((op '(#b110 #b011001)))) + (:emitter + (emit-byte segment #b11011110) + (emit-byte segment #b11011001))) + +;;; unordered comparison +(define-instruction fucom (segment src) + (:printer floating-point-fp ((op '(#b101 #b100)))) + (:emitter + (aver (fp-reg-tn-p src)) + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +(define-instruction ftst (segment) + (:printer floating-point-no ((op #b00100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100100))) + +;;;; 80387 specials + +(define-instruction fsqrt(segment) + (:printer floating-point-no ((op #b11010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111010))) + +(define-instruction fscale(segment) + (:printer floating-point-no ((op #b11101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111101))) + +(define-instruction fxtract(segment) + (:printer floating-point-no ((op #b10100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110100))) + +(define-instruction fsin(segment) + (:printer floating-point-no ((op #b11110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111110))) + +(define-instruction fcos(segment) + (:printer floating-point-no ((op #b11111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111111))) + +(define-instruction fprem1(segment) + (:printer floating-point-no ((op #b10101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110101))) + +(define-instruction fprem(segment) + (:printer floating-point-no ((op #b11000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111000))) + +(define-instruction fxam (segment) + (:printer floating-point-no ((op #b00101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100101))) + +;;; These do push/pop to stack and need special handling +;;; in any VOPs that use them. See the book. + +;;; st0 <- st1*log2(st0) +(define-instruction fyl2x(segment) ; pops stack + (:printer floating-point-no ((op #b10001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110001))) + +(define-instruction fyl2xp1(segment) + (:printer floating-point-no ((op #b11001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111001))) + +(define-instruction f2xm1(segment) + (:printer floating-point-no ((op #b10000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110000))) + +(define-instruction fptan(segment) ; st(0) <- 1; st(1) <- tan + (:printer floating-point-no ((op #b10010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110010))) + +(define-instruction fpatan(segment) ; POPS STACK + (:printer floating-point-no ((op #b10011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110011))) + +;;;; loading constants + +(define-instruction fldz(segment) + (:printer floating-point-no ((op #b01110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101110))) + +(define-instruction fld1(segment) + (:printer floating-point-no ((op #b01000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101000))) + +(define-instruction fldpi(segment) + (:printer floating-point-no ((op #b01011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101011))) + +(define-instruction fldl2t(segment) + (:printer floating-point-no ((op #b01001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101001))) + +(define-instruction fldl2e(segment) + (:printer floating-point-no ((op #b01010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101010))) + +(define-instruction fldlg2(segment) + (:printer floating-point-no ((op #b01100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101100))) + +(define-instruction fldln2(segment) + (:printer floating-point-no ((op #b01101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101101))) + \ No newline at end of file diff --git a/src/compiler/x86-64/macros.lisp b/src/compiler/x86-64/macros.lisp new file mode 100644 index 0000000..aa4944d --- /dev/null +++ b/src/compiler/x86-64/macros.lisp @@ -0,0 +1,364 @@ +;;;; a bunch of handy macros for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; We can load/store into fp registers through the top of stack +;;; %st(0) (fr0 here). Loads imply a push to an empty register which +;;; then changes all the reg numbers. These macros help manage that. + +;;; Use this when we don't have to load anything. It preserves old tos +;;; value, but probably destroys tn with operation. +(defmacro with-tn@fp-top((tn) &body body) + `(progn + (unless (zerop (tn-offset ,tn)) + (inst fxch ,tn)) + ,@body + (unless (zerop (tn-offset ,tn)) + (inst fxch ,tn)))) + +;;; Use this to prepare for load of new value from memory. This +;;; changes the register numbering so the next instruction had better +;;; be a FP load from memory; a register load from another register +;;; will probably be loading the wrong register! +(defmacro with-empty-tn@fp-top((tn) &body body) + `(progn + (inst fstp ,tn) + ,@body + (unless (zerop (tn-offset ,tn)) + (inst fxch ,tn)))) ; save into new dest and restore st(0) + +;;;; instruction-like macros + +(defmacro move (dst src) + #!+sb-doc + "Move SRC into DST unless they are location=." + (once-only ((n-dst dst) + (n-src src)) + `(unless (location= ,n-dst ,n-src) + (inst mov ,n-dst ,n-src)))) + +(defmacro make-ea-for-object-slot (ptr slot lowtag) + `(make-ea :qword :base ,ptr :disp (- (* ,slot n-word-bytes) ,lowtag))) + +(defmacro loadw (value ptr &optional (slot 0) (lowtag 0)) + `(inst mov ,value (make-ea-for-object-slot ,ptr ,slot ,lowtag))) + +(defmacro storew (value ptr &optional (slot 0) (lowtag 0)) + (once-only ((value value)) + `(cond ((and (integerp ,value) + (not (typep ,value + '(or (signed-byte 32) (unsigned-byte 32))))) + (multiple-value-bind (lo hi) (dwords-for-quad ,value) + (inst mov (make-ea-for-object-slot ,ptr ,slot ,lowtag) lo) + (inst mov (make-ea-for-object-slot ,ptr (floor (+ ,slot 0.5)) + ,lowtag) hi))) + (t + (inst mov (make-ea-for-object-slot ,ptr ,slot ,lowtag) ,value))))) + +(defmacro pushw (ptr &optional (slot 0) (lowtag 0)) + `(inst push (make-ea-for-object-slot ,ptr ,slot ,lowtag))) + +(defmacro popw (ptr &optional (slot 0) (lowtag 0)) + `(inst pop (make-ea-for-object-slot ,ptr ,slot ,lowtag))) + +;;;; macros to generate useful values + +(defmacro load-symbol (reg symbol) + `(inst mov ,reg (+ nil-value (static-symbol-offset ,symbol)))) + +(defmacro load-symbol-value (reg symbol) + `(inst mov ,reg + (make-ea :qword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))))) + +(defmacro store-symbol-value (reg symbol) + `(inst mov + (make-ea :qword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + ,reg)) + +#!+sb-thread +(defmacro load-tl-symbol-value (reg symbol) + `(progn + (inst mov ,reg + (make-ea :qword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-tls-index-slot word-shift) + (- other-pointer-lowtag)))) + (inst fs-segment-prefix) + (inst mov ,reg (make-ea :qword :scale 1 :index ,reg)))) +#!-sb-thread +(defmacro load-tl-symbol-value (reg symbol) `(load-symbol-value ,reg ,symbol)) + +#!+sb-thread +(defmacro store-tl-symbol-value (reg symbol temp) + `(progn + (inst mov ,temp + (make-ea :qword + :disp (+ nil-value + (static-symbol-offset ',symbol) + (ash symbol-tls-index-slot word-shift) + (- other-pointer-lowtag)))) + (inst fs-segment-prefix) + (inst mov (make-ea :qword :scale 1 :index ,temp) ,reg))) +#!-sb-thread +(defmacro store-tl-symbol-value (reg symbol temp) + (declare (ignore temp)) + `(store-symbol-value ,reg ,symbol)) + +(defmacro load-type (target source &optional (offset 0)) + #!+sb-doc + "Loads the type bits of a pointer into target independent of + byte-ordering issues." + (once-only ((n-target target) + (n-source source) + (n-offset offset)) + (ecase *backend-byte-order* + (:little-endian + `(inst mov ,n-target + (make-ea :byte :base ,n-source :disp ,n-offset))) + (:big-endian + `(inst mov ,n-target + (make-ea :byte :base ,n-source :disp (+ ,n-offset 4))))))) + +;;;; allocation helpers + +;;; All allocation is done by calls to assembler routines that +;;; eventually invoke the C alloc() function. + +;;; Emit code to allocate an object with a size in bytes given by +;;; Size. The size may be an integer of a TN. If Inline is a VOP +;;; node-var then it is used to make an appropriate speed vs size +;;; decision. + +;;; This macro should only be used inside a pseudo-atomic section, +;;; which should also cover subsequent initialization of the +;;; object. +(defun allocation (alloc-tn size &optional ignored) + (declare (ignore ignored)) + (inst push size) + (inst call (make-fixup (extern-alien-name "alloc_tramp") :foreign)) + (inst pop alloc-tn) + (values)) + +;;; Allocate an other-pointer object of fixed SIZE with a single word +;;; header having the specified WIDETAG value. The result is placed in +;;; RESULT-TN. +(defmacro with-fixed-allocation ((result-tn widetag size &optional inline) + &rest forms) + `(pseudo-atomic + (allocation ,result-tn (pad-data-block ,size) ,inline) + (storew (logior (ash (1- ,size) n-widetag-bits) ,widetag) + ,result-tn) + (inst lea ,result-tn + (make-ea :byte :base ,result-tn :disp other-pointer-lowtag)) + ,@forms)) + +;;;; error code +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) + (defun emit-error-break (vop kind code values) + (let ((vector (gensym))) + `((inst int 3) ; i386 breakpoint instruction + ;; The return PC points here; note the location for the debugger. + (let ((vop ,vop)) + (when vop + (note-this-location vop :internal-error))) + (inst byte ,kind) ; eg trap_Xyyy + (with-adjustable-vector (,vector) ; interr arguments + (write-var-integer (error-number-or-lose ',code) ,vector) + ,@(mapcar (lambda (tn) + `(let ((tn ,tn)) + ;; classic CMU CL comment: + ;; zzzzz jrd here. tn-offset is zero for constant + ;; tns. + (write-var-integer (make-sc-offset (sc-number + (tn-sc tn)) + (or (tn-offset tn) + 0)) + ,vector))) + values) + (inst byte (length ,vector)) + (dotimes (i (length ,vector)) + (inst byte (aref ,vector i)))))))) + +(defmacro error-call (vop error-code &rest values) + #!+sb-doc + "Cause an error. ERROR-CODE is the error to cause." + (cons 'progn + (emit-error-break vop error-trap error-code values))) + +(defmacro generate-error-code (vop error-code &rest values) + #!+sb-doc + "Generate-Error-Code Error-code Value* + Emit code for an error with the specified Error-Code and context Values." + `(assemble (*elsewhere*) + (let ((start-lab (gen-label))) + (emit-label start-lab) + (error-call ,vop ,error-code ,@values) + start-lab))) + + +;;;; PSEUDO-ATOMIC + +;;; This is used to wrap operations which leave untagged memory lying +;;; around. It's an operation which the AOP weenies would describe as +;;; having "cross-cutting concerns", meaning it appears all over the +;;; place and there's no logical single place to attach documentation. +;;; grep (mostly in src/runtime) is your friend + +;;; FIXME: *PSEUDO-ATOMIC-FOO* could be made into *PSEUDO-ATOMIC-BITS*, +;;; set with a single operation and cleared with SHR *PSEUDO-ATOMIC-BITS*,-2; +;;; the ATOMIC bit is bit 0, the INTERRUPTED bit is bit 1, and you check +;;; the C flag after the shift to see whether you were interrupted. + +(defmacro pseudo-atomic (&rest forms) + (with-unique-names (label) + `(let ((,label (gen-label))) + ;; FIXME: The MAKE-EA noise should become a MACROLET macro or + ;; something. (perhaps SVLB, for static variable low byte) + (inst mov (make-ea :byte :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-interrupted*) + (ash symbol-value-slot word-shift) + ;; FIXME: Use mask, not minus, to + ;; take out type bits. + (- other-pointer-lowtag))) + 0) + (inst mov (make-ea :byte :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-atomic*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + (fixnumize 1)) + ,@forms + (inst mov (make-ea :byte :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-atomic*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + 0) + ;; KLUDGE: Is there any requirement for interrupts to be + ;; handled in order? It seems as though an interrupt coming + ;; in at this point will be executed before any pending interrupts. + ;; Or do incoming interrupts check to see whether any interrupts + ;; are pending? I wish I could find the documentation for + ;; pseudo-atomics.. -- WHN 19991130 + (inst cmp (make-ea :byte + :disp (+ nil-value + (static-symbol-offset + '*pseudo-atomic-interrupted*) + (ash symbol-value-slot word-shift) + (- other-pointer-lowtag))) + 0) + (inst jmp :eq ,label) + ;; if PAI was set, interrupts were disabled at the same time + ;; using the process signal mask. + (inst break pending-interrupt-trap) + (emit-label ,label)))) + + + +;;;; indexed references + +(defmacro define-full-reffer (name type offset lowtag scs el-type &optional translate) + `(progn + (define-vop (,name) + ,@(when translate + `((:translate ,translate))) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types ,type tagged-num) + (:results (value :scs ,scs)) + (:result-types ,el-type) + (:generator 3 ; pw was 5 + (inst mov value (make-ea :qword :base object :index index + :disp (- (* ,offset n-word-bytes) + ,lowtag))))) + (define-vop (,(symbolicate name "-C")) + ,@(when translate + `((:translate ,translate))) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types ,type + (:constant (load/store-index ,n-word-bytes ,(eval lowtag) + ,(eval offset)))) + (:results (value :scs ,scs)) + (:result-types ,el-type) + (:generator 2 ; pw was 5 + (inst mov value (make-ea :qword :base object + :disp (- (* (+ ,offset index) n-word-bytes) + ,lowtag))))))) + +(defmacro define-full-setter (name type offset lowtag scs el-type &optional translate) + `(progn + (define-vop (,name) + ,@(when translate + `((:translate ,translate))) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs ,scs :target result)) + (:arg-types ,type tagged-num ,el-type) + (:results (result :scs ,scs)) + (:result-types ,el-type) + (:generator 4 ; was 5 + (inst mov (make-ea :qword :base object :index index + :disp (- (* ,offset n-word-bytes) ,lowtag)) + value) + (move result value))) + (define-vop (,(symbolicate name "-C")) + ,@(when translate + `((:translate ,translate))) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs ,scs :target result)) + (:info index) + (:arg-types ,type + (:constant (load/store-index ,n-word-bytes ,(eval lowtag) + ,(eval offset))) + ,el-type) + (:results (result :scs ,scs)) + (:result-types ,el-type) + (:generator 3 ; was 5 + (inst mov (make-ea :qword :base object + :disp (- (* (+ ,offset index) n-word-bytes) + ,lowtag)) + value) + (move result value))))) + +;;; helper for alien stuff. +(defmacro with-pinned-objects ((&rest objects) &body body) + "Arrange with the garbage collector that the pages occupied by +OBJECTS will not be moved in memory for the duration of BODY. +Useful for e.g. foreign calls where another thread may trigger +garbage collection" + `(multiple-value-prog1 + (progn + ,@(loop for p in objects + collect `(push-word-on-c-stack + (int-sap (sb!kernel:get-lisp-obj-address ,p)))) + ,@body) + ;; If the body returned normally, we should restore the stack pointer + ;; for the benefit of any following code in the same function. If + ;; there's a non-local exit in the body, sp is garbage anyway and + ;; will get set appropriately from {a, the} frame pointer before it's + ;; next needed + (pop-words-from-c-stack ,(length objects)))) diff --git a/src/compiler/x86-64/memory.lisp b/src/compiler/x86-64/memory.lisp new file mode 100644 index 0000000..ca8c2e2 --- /dev/null +++ b/src/compiler/x86-64/memory.lisp @@ -0,0 +1,153 @@ +;;;; the x86 definitions of some general purpose memory reference VOPs +;;;; inherited by basic memory reference operations + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; CELL-REF and CELL-SET are used to define VOPs like CAR, where the +;;; offset to be read or written is a property of the VOP used. +;;; CELL-SETF is similar to CELL-SET, but delivers the new value as +;;; the result. CELL-SETF-FUN takes its arguments as if it were a +;;; SETF function (new value first, as apposed to a SETF macro, which +;;; takes the new value last). +(define-vop (cell-ref) + (:args (object :scs (descriptor-reg))) + (:results (value :scs (descriptor-reg any-reg))) + (:variant-vars offset lowtag) + (:policy :fast-safe) + (:generator 4 + (loadw value object offset lowtag))) +(define-vop (cell-set) + (:args (object :scs (descriptor-reg)) + (value :scs (descriptor-reg any-reg))) + (:variant-vars offset lowtag) + (:policy :fast-safe) + (:generator 4 + (storew value object offset lowtag))) +(define-vop (cell-setf) + (:args (object :scs (descriptor-reg)) + (value :scs (descriptor-reg any-reg) :target result)) + (:results (result :scs (descriptor-reg any-reg))) + (:variant-vars offset lowtag) + (:policy :fast-safe) + (:generator 4 + (storew value object offset lowtag) + (move result value))) +(define-vop (cell-setf-fun) + (:args (value :scs (descriptor-reg any-reg) :target result) + (object :scs (descriptor-reg))) + (:results (result :scs (descriptor-reg any-reg))) + (:variant-vars offset lowtag) + (:policy :fast-safe) + (:generator 4 + (storew value object offset lowtag) + (move result value))) + +;;; Define accessor VOPs for some cells in an object. If the operation +;;; name is NIL, then that operation isn't defined. If the translate +;;; function is null, then we don't define a translation. +(defmacro define-cell-accessors (offset lowtag + ref-op ref-trans set-op set-trans) + `(progn + ,@(when ref-op + `((define-vop (,ref-op cell-ref) + (:variant ,offset ,lowtag) + ,@(when ref-trans + `((:translate ,ref-trans)))))) + ,@(when set-op + `((define-vop (,set-op cell-setf) + (:variant ,offset ,lowtag) + ,@(when set-trans + `((:translate ,set-trans)))))))) + +;;; X86 special +(define-vop (cell-xadd) + (:args (object :scs (descriptor-reg) :to :result) + (value :scs (any-reg) :target result)) + (:results (result :scs (any-reg) :from (:argument 1))) + (:result-types tagged-num) + (:variant-vars offset lowtag) + (:policy :fast-safe) + (:generator 4 + (move result value) + (inst xadd (make-ea :dword :base object + :disp (- (* offset n-word-bytes) lowtag)) + value))) + +;;; SLOT-REF and SLOT-SET are used to define VOPs like CLOSURE-REF, +;;; where the offset is constant at compile time, but varies for +;;; different uses. +(define-vop (slot-ref) + (:args (object :scs (descriptor-reg))) + (:results (value :scs (descriptor-reg any-reg))) + (:variant-vars base lowtag) + (:info offset) + (:generator 4 + (loadw value object (+ base offset) lowtag))) +(define-vop (slot-set) + (:args (object :scs (descriptor-reg)) + (value :scs (descriptor-reg any-reg immediate))) + (:variant-vars base lowtag) + (:info offset) + (:generator 4 + (if (sc-is value immediate) + (let ((val (tn-value value))) + (etypecase val + (integer + (inst mov + (make-ea :dword :base object + :disp (- (* (+ base offset) n-word-bytes) lowtag)) + (fixnumize val))) + (symbol + (inst mov + (make-ea :dword :base object + :disp (- (* (+ base offset) n-word-bytes) lowtag)) + (+ nil-value (static-symbol-offset val)))) + (character + (inst mov + (make-ea :dword :base object + :disp (- (* (+ base offset) n-word-bytes) lowtag)) + (logior (ash (char-code val) n-widetag-bits) + base-char-widetag))))) + ;; Else, value not immediate. + (storew value object (+ base offset) lowtag)))) + +(define-vop (slot-set-conditional) + (:args (object :scs (descriptor-reg) :to :eval) + (old-value :scs (descriptor-reg any-reg) :target eax) + (new-value :scs (descriptor-reg any-reg) :target temp)) + (:temporary (:sc descriptor-reg :offset eax-offset + :from (:argument 1) :to :result :target result) eax) + (:temporary (:sc descriptor-reg :from (:argument 2) :to :result) temp) + (:variant-vars base lowtag) + (:results (result :scs (descriptor-reg))) + (:info offset) + (:generator 4 + (move eax old-value) + (move temp new-value) + (inst cmpxchg (make-ea :dword :base object + :disp (- (* (+ base offset) n-word-bytes) lowtag)) + temp) + (move result eax))) + +;;; X86 special +(define-vop (slot-xadd) + (:args (object :scs (descriptor-reg) :to :result) + (value :scs (any-reg) :target result)) + (:results (result :scs (any-reg) :from (:argument 1))) + (:result-types tagged-num) + (:variant-vars base lowtag) + (:info offset) + (:generator 4 + (move result value) + (inst xadd (make-ea :dword :base object + :disp (- (* (+ base offset) n-word-bytes) lowtag)) + value))) diff --git a/src/compiler/x86-64/move.lisp b/src/compiler/x86-64/move.lisp new file mode 100644 index 0000000..856c7fe --- /dev/null +++ b/src/compiler/x86-64/move.lisp @@ -0,0 +1,402 @@ +;;;; the x86 VM definition of operand loading/saving and the MOVE vop + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(define-move-fun (load-immediate 1) (vop x y) + ((immediate) + (any-reg descriptor-reg)) + (let ((val (tn-value x))) + (etypecase val + (integer + (if (zerop val) + (inst xor y y) + (inst mov y (fixnumize val)))) + (symbol + (load-symbol y val)) + (character + (inst mov y (logior (ash (char-code val) n-widetag-bits) + base-char-widetag)))))) + +(define-move-fun (load-number 1) (vop x y) + ((immediate) (signed-reg unsigned-reg)) + (inst mov y (tn-value x))) + +(define-move-fun (load-base-char 1) (vop x y) + ((immediate) (base-char-reg)) + (inst mov y (char-code (tn-value x)))) + +(define-move-fun (load-system-area-pointer 1) (vop x y) + ((immediate) (sap-reg)) + (inst mov y (sap-int (tn-value x)))) + +(define-move-fun (load-constant 5) (vop x y) + ((constant) (descriptor-reg any-reg)) + (inst mov y x)) + +(define-move-fun (load-stack 5) (vop x y) + ((control-stack) (any-reg descriptor-reg) + (base-char-stack) (base-char-reg) + (sap-stack) (sap-reg) + (signed-stack) (signed-reg) + (unsigned-stack) (unsigned-reg)) + (inst mov y x)) + +(define-move-fun (store-stack 5) (vop x y) + ((any-reg descriptor-reg) (control-stack) + (base-char-reg) (base-char-stack) + (sap-reg) (sap-stack) + (signed-reg) (signed-stack) + (unsigned-reg) (unsigned-stack)) + (inst mov y x)) + +;;;; the MOVE VOP +(define-vop (move) + (:args (x :scs (any-reg descriptor-reg immediate) :target y + :load-if (not (location= x y)))) + (:results (y :scs (any-reg descriptor-reg) + :load-if + (not (or (location= x y) + (and (sc-is x any-reg descriptor-reg immediate) + (sc-is y control-stack)))))) + (:effects) + (:affected) + (:generator 0 + (if (and (sc-is x immediate) + (sc-is y any-reg descriptor-reg control-stack)) + (let ((val (tn-value x))) + (etypecase val + (integer + (if (and (zerop val) (sc-is y any-reg descriptor-reg)) + (inst xor y y) + (multiple-value-bind (lo hi) (dwords-for-quad (fixnumize val)) + (cond ((zerop hi) + (inst mov y lo)) + (t + (inst mov y hi) + (inst shl y 32) + (inst or y lo)))))) + (symbol + (inst mov y (+ nil-value (static-symbol-offset val)))) + (character + (inst mov y (logior (ash (char-code val) n-widetag-bits) + base-char-widetag))))) + (move y x)))) + +(define-move-vop move :move + (any-reg descriptor-reg immediate) + (any-reg descriptor-reg)) + +;;; Make MOVE the check VOP for T so that type check generation +;;; doesn't think it is a hairy type. This also allows checking of a +;;; few of the values in a continuation to fall out. +(primitive-type-vop move (:check) t) + +;;; The MOVE-ARG VOP is used for moving descriptor values into +;;; another frame for argument or known value passing. +;;; +;;; Note: It is not going to be possible to move a constant directly +;;; to another frame, except if the destination is a register and in +;;; this case the loading works out. +(define-vop (move-arg) + (:args (x :scs (any-reg descriptor-reg immediate) :target y + :load-if (not (and (sc-is y any-reg descriptor-reg) + (sc-is x control-stack)))) + (fp :scs (any-reg) + :load-if (not (sc-is y any-reg descriptor-reg)))) + (:results (y)) + (:generator 0 + (sc-case y + ((any-reg descriptor-reg) + (if (sc-is x immediate) + (let ((val (tn-value x))) + (etypecase val + ((integer 0 0) + (inst xor y y)) + ((or (signed-byte 29) (unsigned-byte 29)) + (inst mov y (fixnumize val))) + (integer + (multiple-value-bind (lo hi) + (dwords-for-quad (fixnumize val)) + (inst mov y hi) + (inst shl y 32) + (inst or y lo))) + (symbol + (load-symbol y val)) + (character + (inst mov y (logior (ash (char-code val) n-widetag-bits) + base-char-widetag))))) + (move y x))) + ((control-stack) + (if (sc-is x immediate) + (let ((val (tn-value x))) + (if (= (tn-offset fp) esp-offset) + ;; C-call + (etypecase val + (integer + (storew (fixnumize val) fp (tn-offset y))) + (symbol + (storew (+ nil-value (static-symbol-offset val)) + fp (tn-offset y))) + (character + (storew (logior (ash (char-code val) n-widetag-bits) + base-char-widetag) + fp (tn-offset y)))) + ;; Lisp stack + (etypecase val + (integer + (storew (fixnumize val) fp (- (1+ (tn-offset y))))) + (symbol + (storew (+ nil-value (static-symbol-offset val)) + fp (- (1+ (tn-offset y))))) + (character + (storew (logior (ash (char-code val) n-widetag-bits) + base-char-widetag) + fp (- (1+ (tn-offset y)))))))) + (if (= (tn-offset fp) esp-offset) + ;; C-call + (storew x fp (tn-offset y)) + ;; Lisp stack + (storew x fp (- (1+ (tn-offset y)))))))))) + +(define-move-vop move-arg :move-arg + (any-reg descriptor-reg) + (any-reg descriptor-reg)) + +;;;; ILLEGAL-MOVE + +;;; This VOP exists just to begin the lifetime of a TN that couldn't +;;; be written legally due to a type error. An error is signalled +;;; before this VOP is so we don't need to do anything (not that there +;;; would be anything sensible to do anyway.) +(define-vop (illegal-move) + (:args (x) (type)) + (:results (y)) + (:ignore y) + (:vop-var vop) + (:save-p :compute-only) + (:generator 666 + (error-call vop object-not-type-error x type))) + +;;;; moves and coercions + +;;; These MOVE-TO-WORD VOPs move a tagged integer to a raw full-word +;;; representation. Similarly, the MOVE-FROM-WORD VOPs converts a raw +;;; integer to a tagged bignum or fixnum. + +;;; Arg is a fixnum, so just shift it. We need a type restriction +;;; because some possible arg SCs (control-stack) overlap with +;;; possible bignum arg SCs. +(define-vop (move-to-word/fixnum) + (:args (x :scs (any-reg descriptor-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (signed-reg unsigned-reg) + :load-if (not (location= x y)))) + (:arg-types tagged-num) + (:note "fixnum untagging") + (:generator 1 + (move y x) + (inst sar y (1- n-lowtag-bits)))) +(define-move-vop move-to-word/fixnum :move + (any-reg descriptor-reg) (signed-reg unsigned-reg)) + +;;; Arg is a non-immediate constant, load it. +(define-vop (move-to-word-c) + (:args (x :scs (constant))) + (:results (y :scs (signed-reg unsigned-reg))) + (:note "constant load") + (:generator 1 + (inst mov y (tn-value x)))) +(define-move-vop move-to-word-c :move + (constant) (signed-reg unsigned-reg)) + + +;;; Arg is a fixnum or bignum, figure out which and load if necessary. +(define-vop (move-to-word/integer) + (:args (x :scs (descriptor-reg) :target eax)) + (:results (y :scs (signed-reg unsigned-reg))) + (:note "integer to untagged word coercion") + (:temporary (:sc unsigned-reg :offset eax-offset + :from (:argument 0) :to (:result 0) :target y) eax) + (:generator 4 + (move eax x) + (inst test al-tn 7) ; a symbolic constant for this + (inst jmp :z fixnum) ; would be nice + (loadw y eax bignum-digits-offset other-pointer-lowtag) + (inst jmp done) + FIXNUM + (inst sar eax (1- n-lowtag-bits)) + (move y eax) + DONE)) +(define-move-vop move-to-word/integer :move + (descriptor-reg) (signed-reg unsigned-reg)) + + +;;; Result is a fixnum, so we can just shift. We need the result type +;;; restriction because of the control-stack ambiguity noted above. +(define-vop (move-from-word/fixnum) + (:args (x :scs (signed-reg unsigned-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (any-reg descriptor-reg) + :load-if (not (location= x y)))) + (:result-types tagged-num) + (:note "fixnum tagging") + (:generator 1 + (cond ((and (sc-is x signed-reg unsigned-reg) + (not (location= x y))) + ;; Uses 7 bytes, but faster on the Pentium + (inst lea y (make-ea :qword :index x :scale 8))) + (t + ;; Uses: If x is a reg 2 + 3; if x = y uses only 3 bytes + (move y x) + (inst shl y (1- n-lowtag-bits)))))) +(define-move-vop move-from-word/fixnum :move + (signed-reg unsigned-reg) (any-reg descriptor-reg)) + +;;; Result may be a bignum, so we have to check. Use a worst-case cost +;;; to make sure people know they may be number consing. +;;; +;;; KLUDGE: I assume this is suppressed in favor of the "faster inline +;;; version" below. (See also mysterious comment "we don't want a VOP +;;; on this one" on DEFINE-ASSEMBLY-ROUTINE (MOVE-FROM-SIGNED) in +;;; "src/assembly/x86/alloc.lisp".) -- WHN 19990916 +#+nil +(define-vop (move-from-signed) + (:args (x :scs (signed-reg unsigned-reg) :target eax)) + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)) eax) + (:temporary (:sc unsigned-reg :offset ebx-offset :to (:result 0) :target y) + ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset + :from (:argument 0) :to (:result 0)) ecx) + (:ignore ecx) + (:results (y :scs (any-reg descriptor-reg))) + (:note "signed word to integer coercion") + (:generator 20 + (move eax x) + (inst call (make-fixup 'move-from-signed :assembly-routine)) + (move y ebx))) +;;; Faster inline version, +;;; KLUDGE: Do we really want the faster inline version? It's sorta big. +;;; It is nice that it doesn't use any temporaries, though. -- WHN 19990916 +(define-vop (move-from-signed) + (:args (x :scs (signed-reg unsigned-reg) :to :result)) + (:results (y :scs (any-reg descriptor-reg) :from :argument)) + (:note "signed word to integer coercion") + (:node-var node) + (:generator 20 + (aver (not (location= x y))) + (let ((bignum (gen-label)) + (done (gen-label))) + (inst mov y x) + (inst shl y 1) + (inst jmp :o bignum) + (inst shl y 1) + (inst jmp :o bignum) + (inst shl y 1) + (inst jmp :o bignum) + (emit-label done) + + (assemble (*elsewhere*) + (emit-label bignum) + (with-fixed-allocation + (y bignum-widetag (+ bignum-digits-offset 1) node) + (storew x y bignum-digits-offset other-pointer-lowtag)) + (inst jmp done))))) +(define-move-vop move-from-signed :move + (signed-reg) (descriptor-reg)) + +;;; Check for fixnum, and possibly allocate one or two word bignum +;;; result. Use a worst-case cost to make sure people know they may be +;;; number consing. + +(define-vop (move-from-unsigned) + (:args (x :scs (signed-reg unsigned-reg) :to :save)) + (:temporary (:sc unsigned-reg) alloc) + (:results (y :scs (any-reg descriptor-reg))) + (:node-var node) + (:note "unsigned word to integer coercion") + (:generator 20 + (aver (not (location= x y))) + (aver (not (location= x alloc))) + (aver (not (location= y alloc))) + (let ((bignum (gen-label)) + (done (gen-label)) + (one-word-bignum (gen-label)) + (L1 (gen-label))) + (inst bsr y x) ;find msb + (inst cmov :z y x) + (inst cmp y 60) + (inst jmp :ae bignum) + (inst lea y (make-ea :qword :index x :scale 8)) + (emit-label done) + (assemble (*elsewhere*) + (emit-label bignum) + ;; Note: As on the mips port, space for a two word bignum is + ;; always allocated and the header size is set to either one + ;; or two words as appropriate. + (inst jmp :ns one-word-bignum) + ;; two word bignum + (inst mov y (logior (ash (1- (+ bignum-digits-offset 2)) + n-widetag-bits) + bignum-widetag)) + (inst jmp L1) + (emit-label one-word-bignum) + (inst mov y (logior (ash (1- (+ bignum-digits-offset 1)) + n-widetag-bits) + bignum-widetag)) + (emit-label L1) + (pseudo-atomic + (allocation alloc (pad-data-block (+ bignum-digits-offset 2)) node) + (storew y alloc) + (inst lea y (make-ea :byte :base alloc :disp other-pointer-lowtag)) + (storew x y bignum-digits-offset other-pointer-lowtag)) + (inst jmp done))))) +(define-move-vop move-from-unsigned :move + (unsigned-reg) (descriptor-reg)) + +;;; Move untagged numbers. +(define-vop (word-move) + (:args (x :scs (signed-reg unsigned-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (signed-reg unsigned-reg) + :load-if + (not (or (location= x y) + (and (sc-is x signed-reg unsigned-reg) + (sc-is y signed-stack unsigned-stack)))))) + (:effects) + (:affected) + (:note "word integer move") + (:generator 0 + (move y x))) +(define-move-vop word-move :move + (signed-reg unsigned-reg) (signed-reg unsigned-reg)) + +;;; Move untagged number arguments/return-values. +(define-vop (move-word-arg) + (:args (x :scs (signed-reg unsigned-reg) :target y) + (fp :scs (any-reg) :load-if (not (sc-is y sap-reg)))) + (:results (y)) + (:note "word integer argument move") + (:generator 0 + (sc-case y + ((signed-reg unsigned-reg) + (move y x)) + ((signed-stack unsigned-stack) + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) +(define-move-vop move-word-arg :move-arg + (descriptor-reg any-reg signed-reg unsigned-reg) (signed-reg unsigned-reg)) + +;;; Use standard MOVE-ARG and coercion to move an untagged number +;;; to a descriptor passing location. +(define-move-vop move-arg :move-arg + (signed-reg unsigned-reg) (any-reg descriptor-reg)) diff --git a/src/compiler/x86-64/nlx.lisp b/src/compiler/x86-64/nlx.lisp new file mode 100644 index 0000000..57e7748 --- /dev/null +++ b/src/compiler/x86-64/nlx.lisp @@ -0,0 +1,230 @@ +;;;; the definition of non-local exit for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; Make an environment-live stack TN for saving the SP for NLX entry. +(!def-vm-support-routine make-nlx-sp-tn (env) + (physenv-live-tn + (make-representation-tn *fixnum-primitive-type* any-reg-sc-number) + env)) + +;;; Make a TN for the argument count passing location for a non-local entry. +(!def-vm-support-routine make-nlx-entry-arg-start-location () + (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rbx-offset)) + +(defun catch-block-ea (tn) + (aver (sc-is tn catch-block)) + (make-ea :qword :base rbp-tn + :disp (- (* (+ (tn-offset tn) catch-block-size) n-word-bytes)))) + + +;;;; Save and restore dynamic environment. +;;;; +;;;; These VOPs are used in the reentered function to restore the +;;;; appropriate dynamic environment. Currently we only save the +;;;; Current-Catch and the alien stack pointer. (Before sbcl-0.7.0, +;;;; when there were IR1 and byte interpreters, we had to save +;;;; the interpreter "eval stack" too.) +;;;; +;;;; We don't need to save/restore the current UNWIND-PROTECT, since +;;;; UNWIND-PROTECTs are implicitly processed during unwinding. +;;;; +;;;; We don't need to save the BSP, because that is handled automatically. + +(define-vop (save-dynamic-state) + (:results (catch :scs (descriptor-reg)) + (alien-stack :scs (descriptor-reg))) + (:generator 13 + (load-tl-symbol-value catch *current-catch-block*) + (load-tl-symbol-value alien-stack *alien-stack*))) + +(define-vop (restore-dynamic-state) + (:args (catch :scs (descriptor-reg)) + (alien-stack :scs (descriptor-reg))) + #!+sb-thread (:temporary (:sc unsigned-reg) temp) + (:generator 10 + (store-tl-symbol-value catch *current-catch-block* temp) + (store-tl-symbol-value alien-stack *alien-stack* temp))) + +(define-vop (current-stack-pointer) + (:results (res :scs (any-reg control-stack))) + (:generator 1 + (move res rsp-tn))) + +(define-vop (current-binding-pointer) + (:results (res :scs (any-reg descriptor-reg))) + (:generator 1 + (load-tl-symbol-value res *binding-stack-pointer*))) + +;;;; unwind block hackery + +;;; Compute the address of the catch block from its TN, then store into the +;;; block the current Fp, Env, Unwind-Protect, and the entry PC. +(define-vop (make-unwind-block) + (:args (tn)) + (:info entry-label) + (:temporary (:sc unsigned-reg) temp) + (:results (block :scs (any-reg))) + (:generator 22 + (inst lea block (catch-block-ea tn)) + (load-tl-symbol-value temp *current-unwind-protect-block*) + (storew temp block unwind-block-current-uwp-slot) + (storew rbp-tn block unwind-block-current-cont-slot) + (storew (make-fixup nil :code-object entry-label) + block catch-block-entry-pc-slot))) + +;;; like MAKE-UNWIND-BLOCK, except that we also store in the specified +;;; tag, and link the block into the CURRENT-CATCH list +(define-vop (make-catch-block) + (:args (tn) + (tag :scs (any-reg descriptor-reg) :to (:result 1))) + (:info entry-label) + (:results (block :scs (any-reg))) + (:temporary (:sc descriptor-reg) temp) + (:generator 44 + (inst lea block (catch-block-ea tn)) + (load-tl-symbol-value temp *current-unwind-protect-block*) + (storew temp block unwind-block-current-uwp-slot) + (storew rbp-tn block unwind-block-current-cont-slot) + (storew (make-fixup nil :code-object entry-label) + block catch-block-entry-pc-slot) + (storew tag block catch-block-tag-slot) + (load-tl-symbol-value temp *current-catch-block*) + (storew temp block catch-block-previous-catch-slot) + (store-tl-symbol-value block *current-catch-block* temp))) + +;;; Just set the current unwind-protect to TN's address. This instantiates an +;;; unwind block as an unwind-protect. +(define-vop (set-unwind-protect) + (:args (tn)) + (:temporary (:sc unsigned-reg) new-uwp #!+sb-thread tls) + (:generator 7 + (inst lea new-uwp (catch-block-ea tn)) + (store-tl-symbol-value new-uwp *current-unwind-protect-block* tls))) + +(define-vop (unlink-catch-block) + (:temporary (:sc unsigned-reg) #!+sb-thread tls block) + (:policy :fast-safe) + (:translate %catch-breakup) + (:generator 17 + (load-tl-symbol-value block *current-catch-block*) + (loadw block block catch-block-previous-catch-slot) + (store-tl-symbol-value block *current-catch-block* tls))) + +(define-vop (unlink-unwind-protect) + (:temporary (:sc unsigned-reg) block #!+sb-thread tls) + (:policy :fast-safe) + (:translate %unwind-protect-breakup) + (:generator 17 + (load-tl-symbol-value block *current-unwind-protect-block*) + (loadw block block unwind-block-current-uwp-slot) + (store-tl-symbol-value block *current-unwind-protect-block* tls))) + +;;;; NLX entry VOPs +(define-vop (nlx-entry) + ;; Note: we can't list an sc-restriction, 'cause any load vops would + ;; be inserted before the return-pc label. + (:args (sp) + (start) + (count)) + (:results (values :more t)) + (:temporary (:sc descriptor-reg) move-temp) + (:info label nvals) + (:save-p :force-to-stack) + (:vop-var vop) + (:generator 30 + (emit-label label) + (note-this-location vop :non-local-entry) + (cond ((zerop nvals)) + ((= nvals 1) + (let ((no-values (gen-label))) + (inst mov (tn-ref-tn values) nil-value) + (inst jecxz no-values) + (loadw (tn-ref-tn values) start -1) + (emit-label no-values))) + (t + (collect ((defaults)) + (do ((i 0 (1+ i)) + (tn-ref values (tn-ref-across tn-ref))) + ((null tn-ref)) + (let ((default-lab (gen-label)) + (tn (tn-ref-tn tn-ref))) + (defaults (cons default-lab tn)) + + (inst cmp count (fixnumize i)) + (inst jmp :le default-lab) + (sc-case tn + ((descriptor-reg any-reg) + (loadw tn start (- (1+ i)))) + ((control-stack) + (loadw move-temp start (- (1+ i))) + (inst mov tn move-temp))))) + (let ((defaulting-done (gen-label))) + (emit-label defaulting-done) + (assemble (*elsewhere*) + (dolist (def (defaults)) + (emit-label (car def)) + (inst mov (cdr def) nil-value)) + (inst jmp defaulting-done)))))) + (inst mov rsp-tn sp))) + +(define-vop (nlx-entry-multiple) + (:args (top) + (source) + (count :target rcx)) + ;; Again, no SC restrictions for the args, 'cause the loading would + ;; happen before the entry label. + (:info label) + (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 2)) rcx) + (:temporary (:sc unsigned-reg :offset rsi-offset) rsi) + (:temporary (:sc unsigned-reg :offset rdi-offset) rdi) + (:results (result :scs (any-reg) :from (:argument 0)) + (num :scs (any-reg control-stack))) + (:save-p :force-to-stack) + (:vop-var vop) + (:generator 30 + (emit-label label) + (note-this-location vop :non-local-entry) + + (inst lea rsi (make-ea :qword :base source :disp (- n-word-bytes))) + ;; The 'top' arg contains the %esp value saved at the time the + ;; catch block was created and points to where the thrown values + ;; should sit. + (move rdi top) + (move result rdi) + + (inst sub rdi n-word-bytes) + (move rcx count) ; fixnum words == bytes + (move num rcx) + (inst shr rcx word-shift) ; word count for + ;; If we got zero, we be done. + (inst jecxz done) + ;; Copy them down. + (inst std) + (inst rep) + (inst movs :dword) + + DONE + ;; Reset the CSP at last moved arg. + (inst lea rsp-tn (make-ea :qword :base rdi :disp n-word-bytes)))) + + +;;; This VOP is just to force the TNs used in the cleanup onto the stack. +(define-vop (uwp-entry) + (:info label) + (:save-p :force-to-stack) + (:results (block) (start) (count)) + (:ignore block start count) + (:vop-var vop) + (:generator 0 + (emit-label label) + (note-this-location vop :non-local-entry))) diff --git a/src/compiler/x86-64/parms.lisp b/src/compiler/x86-64/parms.lisp new file mode 100644 index 0000000..702b334 --- /dev/null +++ b/src/compiler/x86-64/parms.lisp @@ -0,0 +1,255 @@ +;;;; This file contains some parameterizations of various VM +;;;; attributes for the x86. This file is separate from other stuff so +;;;; that it can be compiled and loaded earlier. + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; ### Note: we simultaneously use ``word'' to mean a 32 bit quantity +;;; and a 16 bit quantity depending on context. This is because Intel +;;; insists on calling 16 bit things words and 32 bit things +;;; double-words (or dwords). Therefore, in the instruction definition +;;; and register specs, we use the Intel convention. But whenever we +;;; are talking about stuff the rest of the lisp system might be +;;; interested in, we use ``word'' to mean the size of a descriptor +;;; object, which is 32 bits. + +;;;; machine architecture parameters + +;;; the number of bits per word, where a word holds one lisp descriptor +(def!constant n-word-bits 64) + +;;; the natural width of a machine word (as seen in e.g. register width, +;;; address space) +(def!constant n-machine-word-bits 64) + +;;; the number of bits per byte, where a byte is the smallest +;;; addressable object +(def!constant n-byte-bits 8) + +;;; the number of bits to shift between word addresses and byte addresses +(def!constant word-shift (1- (integer-length (/ n-word-bits n-byte-bits)))) + +;;; the number of bytes in a word +(def!constant n-word-bytes (/ n-word-bits n-byte-bits)) + +(def!constant float-sign-shift 31) + +;;; comment from CMU CL: +;;; These values were taken from the alpha code. The values for +;;; bias and exponent min/max are not the same as shown in the 486 book. +;;; They may be correct for how Python uses them. +(def!constant single-float-bias 126) ; Intel says 127. +(defconstant-eqx single-float-exponent-byte (byte 8 23) #'equalp) +(defconstant-eqx single-float-significand-byte (byte 23 0) #'equalp) +;;; comment from CMU CL: +;;; The 486 book shows the exponent range -126 to +127. The Lisp +;;; code that uses these values seems to want already biased numbers. +(def!constant single-float-normal-exponent-min 1) +(def!constant single-float-normal-exponent-max 254) +(def!constant single-float-hidden-bit (ash 1 23)) +(def!constant single-float-trapping-nan-bit (ash 1 22)) + +(def!constant double-float-bias 1022) +(defconstant-eqx double-float-exponent-byte (byte 11 20) #'equalp) +(defconstant-eqx double-float-significand-byte (byte 20 0) #'equalp) +(def!constant double-float-normal-exponent-min 1) +(def!constant double-float-normal-exponent-max #x7FE) +(def!constant double-float-hidden-bit (ash 1 20)) +(def!constant double-float-trapping-nan-bit (ash 1 19)) + +(def!constant long-float-bias 16382) +(defconstant-eqx long-float-exponent-byte (byte 15 0) #'equalp) +(defconstant-eqx long-float-significand-byte (byte 31 0) #'equalp) +(def!constant long-float-normal-exponent-min 1) +(def!constant long-float-normal-exponent-max #x7FFE) +(def!constant long-float-hidden-bit (ash 1 31)) ; actually not hidden +(def!constant long-float-trapping-nan-bit (ash 1 30)) + +(def!constant single-float-digits + (+ (byte-size single-float-significand-byte) 1)) + +(def!constant double-float-digits + (+ (byte-size double-float-significand-byte) 32 1)) + +(def!constant long-float-digits + (+ (byte-size long-float-significand-byte) 32 1)) + +;;; pfw -- from i486 microprocessor programmer's reference manual +(def!constant float-invalid-trap-bit (ash 1 0)) +(def!constant float-denormal-trap-bit (ash 1 1)) +(def!constant float-divide-by-zero-trap-bit (ash 1 2)) +(def!constant float-overflow-trap-bit (ash 1 3)) +(def!constant float-underflow-trap-bit (ash 1 4)) +(def!constant float-inexact-trap-bit (ash 1 5)) + +(def!constant float-round-to-nearest 0) +(def!constant float-round-to-negative 1) +(def!constant float-round-to-positive 2) +(def!constant float-round-to-zero 3) + +(defconstant-eqx float-rounding-mode (byte 2 10) #'equalp) +(defconstant-eqx float-sticky-bits (byte 6 16) #'equalp) +(defconstant-eqx float-traps-byte (byte 6 0) #'equalp) +(defconstant-eqx float-exceptions-byte (byte 6 16) #'equalp) +(defconstant-eqx float-precision-control (byte 2 8) #'equalp) +(def!constant float-fast-bit 0) ; no fast mode on x86 + +;;;; description of the target address space + +;;; where to put the different spaces. untested (copied from x86, in fact) + + +(def!constant read-only-space-start #x01000000) +(def!constant read-only-space-end #x037ff000) + +(def!constant static-space-start #x05000000) +(def!constant static-space-end #x07fff000) + +(def!constant dynamic-space-start #x09000000) +(def!constant dynamic-space-end #x29000000) + + +;;;; other miscellaneous constants + +(defenum (:suffix -trap :start 8) + halt + pending-interrupt + error + cerror + breakpoint + fun-end-breakpoint + single-step-breakpoint) +;;; FIXME: It'd be nice to replace all the DEFENUMs with something like +;;; (WITH-DEF-ENUM (:START 8) +;;; (DEF-ENUM HALT-TRAP) +;;; (DEF-ENUM PENDING-INTERRUPT-TRAP) +;;; ..) +;;; for the benefit of anyone doing a lexical search for definitions +;;; of these symbols. + +(defenum (:prefix object-not- :suffix -trap :start 16) + list + instance) + +(defenum (:prefix trace-table-) + normal + call-site + fun-prologue + fun-epilogue) + +;;;; static symbols + +;;; These symbols are loaded into static space directly after NIL so +;;; that the system can compute their address by adding a constant +;;; amount to NIL. +;;; +;;; The fdefn objects for the static functions are loaded into static +;;; space directly after the static symbols. That way, the raw-addr +;;; can be loaded directly out of them by indirecting relative to NIL. +;;; +;;; we could profitably keep these in registers on x86-64 now we have +;;; r8-r15 as well +;;; Note these spaces grow from low to high addresses. +(defvar *allocation-pointer*) +(defvar *binding-stack-pointer*) + +;;; FIXME: !COLD-INIT probably doesn't need +;;; to be in the static symbols table any more. +(defparameter *static-symbols* + '(t + + ;; The C startup code must fill these in. + *posix-argv* + + ;; functions that the C code needs to call. When adding to this list, + ;; also add a `frob' form in genesis.lisp finish-symbols. + sub-gc + sb!kernel::internal-error + sb!kernel::control-stack-exhausted-error + sb!di::handle-breakpoint + fdefinition-object + #!+sb-thread sb!thread::handle-thread-exit + + ;; free pointers + ;; + ;; Note that these are FIXNUM word counts, not (as one might + ;; expect) byte counts or SAPs. The reason seems to be that by + ;; representing them this way, we can avoid consing bignums. + ;; -- WHN 2000-10-02 + *read-only-space-free-pointer* + *static-space-free-pointer* + *initial-dynamic-space-free-pointer* + + ;; things needed for non-local exit + *current-catch-block* + *current-unwind-protect-block* + *alien-stack* + + ;; interrupt handling + *pseudo-atomic-atomic* + *pseudo-atomic-interrupted* + sb!unix::*interrupts-enabled* + sb!unix::*interrupt-pending* + *free-interrupt-context-index* + + *free-tls-index* + + *allocation-pointer* + *binding-stack-pointer* + *binding-stack-start* + *control-stack-start* + *control-stack-end* + + ;; the floating point constants + *fp-constant-0d0* + *fp-constant-1d0* + *fp-constant-0f0* + *fp-constant-1f0* + ;; The following are all long-floats. + *fp-constant-0l0* + *fp-constant-1l0* + *fp-constant-pi* + *fp-constant-l2t* + *fp-constant-l2e* + *fp-constant-lg2* + *fp-constant-ln2* + + ;; The ..SLOT-UNBOUND.. symbol is static in order to optimise the + ;; common slot unbound check. + ;; + ;; FIXME: In SBCL, the CLOS code has become sufficiently tightly + ;; integrated into the system that it'd probably make sense to use + ;; the ordinary unbound marker for this. + sb!pcl::..slot-unbound..)) + +(defparameter *static-funs* + '(length + sb!kernel:two-arg-+ + sb!kernel:two-arg-- + sb!kernel:two-arg-* + sb!kernel:two-arg-/ + sb!kernel:two-arg-< + sb!kernel:two-arg-> + sb!kernel:two-arg-= + eql + sb!kernel:%negate + sb!kernel:two-arg-and + sb!kernel:two-arg-ior + sb!kernel:two-arg-xor + sb!kernel:two-arg-gcd + sb!kernel:two-arg-lcm)) + +;;;; stuff added by jrd + +;;; FIXME: Is this used? Delete it or document it. +;;; cf the sparc PARMS.LISP +(defparameter *assembly-unit-length* 8) diff --git a/src/compiler/x86-64/pred.lisp b/src/compiler/x86-64/pred.lisp new file mode 100644 index 0000000..6babdd8 --- /dev/null +++ b/src/compiler/x86-64/pred.lisp @@ -0,0 +1,70 @@ +;;;; predicate VOPs for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; the branch VOP + +;;; The unconditional branch, emitted when we can't drop through to the desired +;;; destination. Dest is the continuation we transfer control to. +(define-vop (branch) + (:info dest) + (:generator 5 + (inst jmp dest))) + + +;;;; conditional VOPs + +;;; Note: a constant-tn is allowed in CMP; it uses an EA displacement, +;;; not immediate data. +(define-vop (if-eq) + (:args (x :scs (any-reg descriptor-reg control-stack constant) + :load-if (not (and (sc-is x immediate) + (sc-is y any-reg descriptor-reg + control-stack constant)))) + (y :scs (any-reg descriptor-reg immediate) + :load-if (not (and (sc-is x any-reg descriptor-reg immediate) + (sc-is y control-stack constant))))) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:translate eq) + (:generator 3 + (cond + ((sc-is y immediate) + (let ((val (tn-value y))) + (etypecase val + (integer + (if (and (zerop val) (sc-is x any-reg descriptor-reg)) + (inst test x x) ; smaller + (inst cmp x (fixnumize val)))) + (symbol + (inst cmp x (+ nil-value (static-symbol-offset val)))) + (character + (inst cmp x (logior (ash (char-code val) n-widetag-bits) + base-char-widetag)))))) + ((sc-is x immediate) ; and y not immediate + ;; Swap the order to fit the compare instruction. + (let ((val (tn-value x))) + (etypecase val + (integer + (if (and (zerop val) (sc-is y any-reg descriptor-reg)) + (inst test y y) ; smaller + (inst cmp y (fixnumize val)))) + (symbol + (inst cmp y (+ nil-value (static-symbol-offset val)))) + (character + (inst cmp y (logior (ash (char-code val) n-widetag-bits) + base-char-widetag)))))) + (t + (inst cmp x y))) + + (inst jmp (if not-p :ne :e) target))) diff --git a/src/compiler/x86-64/sanctify.lisp b/src/compiler/x86-64/sanctify.lisp new file mode 100644 index 0000000..87e5d5e --- /dev/null +++ b/src/compiler/x86-64/sanctify.lisp @@ -0,0 +1,20 @@ +;;;; Do whatever is necessary to make the given code component +;;;; executable. + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; While most of SBCL is derived from the CMU CL system, the test +;;;; files (like this one) were written from scratch after the fork +;;;; from CMU CL. +;;;; +;;;; This software is in the public domain and is provided with +;;;; absolutely no warranty. See the COPYING and CREDITS files for +;;;; more information. + +(in-package :sb!vm) + +(defun sanctify-for-execution (component) + (declare (ignore component)) + nil) + diff --git a/src/compiler/x86-64/sap.lisp b/src/compiler/x86-64/sap.lisp new file mode 100644 index 0000000..2189b1e --- /dev/null +++ b/src/compiler/x86-64/sap.lisp @@ -0,0 +1,478 @@ +;;;; SAP operations for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; moves and coercions + +;;; Move a tagged SAP to an untagged representation. +(define-vop (move-to-sap) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (sap-reg))) + (:note "pointer to SAP coercion") + (:generator 1 + (loadw y x sap-pointer-slot other-pointer-lowtag))) +(define-move-vop move-to-sap :move + (descriptor-reg) (sap-reg)) + +;;; Move an untagged SAP to a tagged representation. +(define-vop (move-from-sap) + (:args (sap :scs (sap-reg) :to :result)) + (:results (res :scs (descriptor-reg) :from :argument)) + (:note "SAP to pointer coercion") + (:node-var node) + (:generator 20 + (with-fixed-allocation (res sap-widetag sap-size node) + (storew sap res sap-pointer-slot other-pointer-lowtag)))) +(define-move-vop move-from-sap :move + (sap-reg) (descriptor-reg)) + +;;; Move untagged sap values. +(define-vop (sap-move) + (:args (x :target y + :scs (sap-reg) + :load-if (not (location= x y)))) + (:results (y :scs (sap-reg) + :load-if (not (location= x y)))) + (:note "SAP move") + (:effects) + (:affected) + (:generator 0 + (move y x))) +(define-move-vop sap-move :move + (sap-reg) (sap-reg)) + +;;; Move untagged sap arguments/return-values. +(define-vop (move-sap-arg) + (:args (x :target y + :scs (sap-reg)) + (fp :scs (any-reg) + :load-if (not (sc-is y sap-reg)))) + (:results (y)) + (:note "SAP argument move") + (:generator 0 + (sc-case y + (sap-reg + (move y x)) + (sap-stack + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) +(define-move-vop move-sap-arg :move-arg + (descriptor-reg sap-reg) (sap-reg)) + +;;; Use standard MOVE-ARG + coercion to move an untagged sap to a +;;; descriptor passing location. +(define-move-vop move-arg :move-arg + (sap-reg) (descriptor-reg)) + +;;;; SAP-INT and INT-SAP + +;;; The function SAP-INT is used to generate an integer corresponding +;;; to the system area pointer, suitable for passing to the kernel +;;; interfaces (which want all addresses specified as integers). The +;;; function INT-SAP is used to do the opposite conversion. The +;;; integer representation of a SAP is the byte offset of the SAP from +;;; the start of the address space. +(define-vop (sap-int) + (:args (sap :scs (sap-reg) :target int)) + (:arg-types system-area-pointer) + (:results (int :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate sap-int) + (:policy :fast-safe) + (:generator 1 + (move int sap))) +(define-vop (int-sap) + (:args (int :scs (unsigned-reg) :target sap)) + (:arg-types unsigned-num) + (:results (sap :scs (sap-reg))) + (:result-types system-area-pointer) + (:translate int-sap) + (:policy :fast-safe) + (:generator 1 + (move sap int))) + +;;;; POINTER+ and POINTER- + +(define-vop (pointer+) + (:translate sap+) + (:args (ptr :scs (sap-reg) :target res + :load-if (not (location= ptr res))) + (offset :scs (signed-reg immediate))) + (:arg-types system-area-pointer signed-num) + (:results (res :scs (sap-reg) :from (:argument 0) + :load-if (not (location= ptr res)))) + (:result-types system-area-pointer) + (:policy :fast-safe) + (:generator 1 + (cond ((and (sc-is ptr sap-reg) (sc-is res sap-reg) + (not (location= ptr res))) + (sc-case offset + (signed-reg + (inst lea res (make-ea :qword :base ptr :index offset :scale 1))) + (immediate + (inst lea res (make-ea :qword :base ptr + :disp (tn-value offset)))))) + (t + (move res ptr) + (sc-case offset + (signed-reg + (inst add res offset)) + (immediate + (inst add res (tn-value offset)))))))) + +(define-vop (pointer-) + (:translate sap-) + (:args (ptr1 :scs (sap-reg) :target res) + (ptr2 :scs (sap-reg))) + (:arg-types system-area-pointer system-area-pointer) + (:policy :fast-safe) + (:results (res :scs (signed-reg) :from (:argument 0))) + (:result-types signed-num) + (:generator 1 + (move res ptr1) + (inst sub res ptr2))) + +;;;; mumble-SYSTEM-REF and mumble-SYSTEM-SET + +(macrolet ((def-system-ref-and-set (ref-name + set-name + sc + type + size + &optional signed) + (let ((ref-name-c (symbolicate ref-name "-C")) + (set-name-c (symbolicate set-name "-C")) + (temp-sc (symbolicate size "-REG"))) + `(progn + (define-vop (,ref-name) + (:translate ,ref-name) + (:policy :fast-safe) + (:args (sap :scs (sap-reg)) + (offset :scs (signed-reg))) + (:arg-types system-area-pointer signed-num) + ,@(unless (eq size :qword) + `((:temporary (:sc ,temp-sc + :from (:eval 0) + :to (:eval 1)) + temp))) + (:results (result :scs (,sc))) + (:result-types ,type) + (:generator 5 + (inst mov ,(if (eq size :qword) 'result 'temp) + (make-ea ,size :base sap :index offset)) + ,@(unless (eq size :qword) + `((inst ,(if signed 'movsx 'movzx) + result temp))))) + (define-vop (,ref-name-c) + (:translate ,ref-name) + (:policy :fast-safe) + (:args (sap :scs (sap-reg))) + (:arg-types system-area-pointer + (:constant (signed-byte 64))) + (:info offset) + ,@(unless (eq size :qword) + `((:temporary (:sc ,temp-sc + :from (:eval 0) + :to (:eval 1)) + temp))) + (:results (result :scs (,sc))) + (:result-types ,type) + (:generator 4 + (inst mov ,(if (eq size :qword) 'result 'temp) + (make-ea ,size :base sap :disp offset)) + ,@(unless (eq size :qword) + `((inst ,(if signed 'movsx 'movzx) + result temp))))) + (define-vop (,set-name) + (:translate ,set-name) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (offset :scs (signed-reg) :to (:eval 0)) + (value :scs (,sc) + :target ,(if (eq size :qword) + 'result + 'temp))) + (:arg-types system-area-pointer signed-num ,type) + ,@(unless (eq size :qword) + `((:temporary (:sc ,temp-sc :offset rax-offset + :from (:argument 2) :to (:result 0) + :target result) + temp))) + (:results (result :scs (,sc))) + (:result-types ,type) + (:generator 5 + ,@(unless (eq size :qword) + `((move rax-tn value))) + (inst mov (make-ea ,size + :base sap + :index offset) + ,(if (eq size :qword) 'value 'temp)) + (move result + ,(if (eq size :qword) 'value 'rax-tn)))) + (define-vop (,set-name-c) + (:translate ,set-name) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (value :scs (,sc) + :target ,(if (eq size :qword) + 'result + 'temp))) + (:arg-types system-area-pointer + (:constant (signed-byte 64)) ,type) + (:info offset) + ,@(unless (eq size :qword) + `((:temporary (:sc ,temp-sc :offset rax-offset + :from (:argument 2) :to (:result 0) + :target result) + temp))) + (:results (result :scs (,sc))) + (:result-types ,type) + (:generator 4 + ,@(unless (eq size :qword) + `((move rax-tn value))) + (inst mov + (make-ea ,size :base sap :disp offset) + ,(if (eq size :qword) 'value 'temp)) + (move result ,(if (eq size :qword) + 'value + 'rax-tn)))))))) + + (def-system-ref-and-set sap-ref-8 %set-sap-ref-8 + unsigned-reg positive-fixnum :byte nil) + (def-system-ref-and-set signed-sap-ref-8 %set-signed-sap-ref-8 + signed-reg tagged-num :byte t) + (def-system-ref-and-set sap-ref-16 %set-sap-ref-16 + unsigned-reg positive-fixnum :word nil) + (def-system-ref-and-set signed-sap-ref-16 %set-signed-sap-ref-16 + signed-reg tagged-num :word t) + (def-system-ref-and-set sap-ref-32 %set-sap-ref-32 + unsigned-reg unsigned-num :dword nil) + (def-system-ref-and-set signed-sap-ref-32 %set-signed-sap-ref-32 + signed-reg signed-num :dword t) + (def-system-ref-and-set sap-ref-64 %set-sap-ref-64 + unsigned-reg unsigned-num :qword nil) + (def-system-ref-and-set signed-sap-ref-64 %set-signed-sap-ref-64 + signed-reg signed-num :qword t) + (def-system-ref-and-set sap-ref-sap %set-sap-ref-sap + sap-reg system-area-pointer :qword)) + +;;;; SAP-REF-DOUBLE + +(define-vop (sap-ref-double) + (:translate sap-ref-double) + (:policy :fast-safe) + (:args (sap :scs (sap-reg)) + (offset :scs (signed-reg))) + (:arg-types system-area-pointer signed-num) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 5 + (with-empty-tn@fp-top(result) + (inst fldd (make-ea :dword :base sap :index offset))))) + +(define-vop (sap-ref-double-c) + (:translate sap-ref-double) + (:policy :fast-safe) + (:args (sap :scs (sap-reg))) + (:arg-types system-area-pointer (:constant (signed-byte 64))) + (:info offset) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 4 + (with-empty-tn@fp-top(result) + (inst fldd (make-ea :dword :base sap :disp offset))))) + +(define-vop (%set-sap-ref-double) + (:translate %set-sap-ref-double) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (offset :scs (signed-reg) :to (:eval 0)) + (value :scs (double-reg))) + (:arg-types system-area-pointer signed-num double-float) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 5 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fstd (make-ea :dword :base sap :index offset)) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fstd result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fstd (make-ea :dword :base sap :index offset)) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fstd value)) + (t + ;; Neither value or result are in ST0. + (unless (location= value result) + (inst fstd result)) + (inst fxch value))))))) + +(define-vop (%set-sap-ref-double-c) + (:translate %set-sap-ref-double) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (value :scs (double-reg))) + (:arg-types system-area-pointer (:constant (signed-byte 64)) double-float) + (:info offset) + (:results (result :scs (double-reg))) + (:result-types double-float) + (:generator 4 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0. + (inst fstd (make-ea :qword :base sap :disp offset)) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fstd result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fstd (make-ea :qword :base sap :disp offset)) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fstd value)) + (t + ;; Neither value or result are in ST0. + (unless (location= value result) + (inst fstd result)) + (inst fxch value))))))) + +;;;; SAP-REF-SINGLE + +(define-vop (sap-ref-single) + (:translate sap-ref-single) + (:policy :fast-safe) + (:args (sap :scs (sap-reg)) + (offset :scs (signed-reg))) + (:arg-types system-area-pointer signed-num) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 5 + (with-empty-tn@fp-top(result) + (inst fld (make-ea :dword :base sap :index offset))))) + +(define-vop (sap-ref-single-c) + (:translate sap-ref-single) + (:policy :fast-safe) + (:args (sap :scs (sap-reg))) + (:arg-types system-area-pointer (:constant (signed-byte 32))) + (:info offset) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 4 + (with-empty-tn@fp-top(result) + (inst fld (make-ea :dword :base sap :disp offset))))) + +(define-vop (%set-sap-ref-single) + (:translate %set-sap-ref-single) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (offset :scs (signed-reg) :to (:eval 0)) + (value :scs (single-reg))) + (:arg-types system-area-pointer signed-num single-float) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 5 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0 + (inst fst (make-ea :dword :base sap :index offset)) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fst result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fst (make-ea :dword :base sap :index offset)) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fst value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fst result)) + (inst fxch value))))))) + +(define-vop (%set-sap-ref-single-c) + (:translate %set-sap-ref-single) + (:policy :fast-safe) + (:args (sap :scs (sap-reg) :to (:eval 0)) + (value :scs (single-reg))) + (:arg-types system-area-pointer (:constant (signed-byte 32)) single-float) + (:info offset) + (:results (result :scs (single-reg))) + (:result-types single-float) + (:generator 4 + (cond ((zerop (tn-offset value)) + ;; Value is in ST0 + (inst fst (make-ea :dword :base sap :disp offset)) + (unless (zerop (tn-offset result)) + ;; Value is in ST0 but not result. + (inst fst result))) + (t + ;; Value is not in ST0. + (inst fxch value) + (inst fst (make-ea :dword :base sap :disp offset)) + (cond ((zerop (tn-offset result)) + ;; The result is in ST0. + (inst fst value)) + (t + ;; Neither value or result are in ST0 + (unless (location= value result) + (inst fst result)) + (inst fxch value))))))) + +;;;; SAP-REF-LONG +#+nil +(define-vop (sap-ref-long) + (:translate sap-ref-long) + (:policy :fast-safe) + (:args (sap :scs (sap-reg)) + (offset :scs (signed-reg))) + (:arg-types system-area-pointer signed-num) + (:results (result :scs (#!+long-float long-reg #!-long-float double-reg))) + (:result-types #!+long-float long-float #!-long-float double-float) + (:generator 5 + (with-empty-tn@fp-top(result) + (inst fldl (make-ea :qword :base sap :index offset))))) +#+nil +(define-vop (sap-ref-long-c) + (:translate sap-ref-long) + (:policy :fast-safe) + (:args (sap :scs (sap-reg))) + (:arg-types system-area-pointer (:constant (signed-byte 64))) + (:info offset) + (:results (result :scs (#!+long-float long-reg #!-long-float double-reg))) + (:result-types #!+long-float long-float #!-long-float double-float) + (:generator 4 + (with-empty-tn@fp-top(result) + (inst fldl (make-ea :qword :base sap :disp offset))))) + + +;;; noise to convert normal lisp data objects into SAPs + +(define-vop (vector-sap) + (:translate vector-sap) + (:policy :fast-safe) + (:args (vector :scs (descriptor-reg) :target sap)) + (:results (sap :scs (sap-reg))) + (:result-types system-area-pointer) + (:generator 2 + (move sap vector) + (inst add + sap + (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)))) + + diff --git a/src/compiler/x86-64/show.lisp b/src/compiler/x86-64/show.lisp new file mode 100644 index 0000000..bc475e4 --- /dev/null +++ b/src/compiler/x86-64/show.lisp @@ -0,0 +1,32 @@ +;;;; VOPs which are useful for following the progress of the system +;;;; early in boot + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; FIXME: should probably become conditional on #!+SB-SHOW +;;; FIXME: should be called DEBUG-PRINT or COLD-PRINT +(define-vop (print) + (:args (object :scs (descriptor-reg any-reg))) + (:temporary (:sc unsigned-reg + :offset rax-offset + :target result + :from :eval + :to (:result 0)) + rax) + (:results (result :scs (descriptor-reg))) + (:save-p t) + (:generator 100 + (inst push object) + (inst lea rax (make-fixup (extern-alien-name "debug_print") :foreign)) + (inst call (make-fixup (extern-alien-name "call_into_c") :foreign)) + (inst add rsp-tn n-word-bytes) + (move result rax))) diff --git a/src/compiler/x86-64/static-fn.lisp b/src/compiler/x86-64/static-fn.lisp new file mode 100644 index 0000000..1842dff --- /dev/null +++ b/src/compiler/x86-64/static-fn.lisp @@ -0,0 +1,160 @@ +;;;; the VOPs and macro magic required to call static functions + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(define-vop (static-fun-template) + (:save-p t) + (:policy :safe) + (:variant-vars function) + (:vop-var vop) + (:node-var node) + (:temporary (:sc unsigned-reg :offset ebx-offset + :from (:eval 0) :to (:eval 2)) ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset + :from (:eval 0) :to (:eval 2)) ecx)) + +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) + +(defun static-fun-template-name (num-args num-results) + (intern (format nil "~:@(~R-arg-~R-result-static-fun~)" + num-args num-results))) + +(defun moves (dst src) + (collect ((moves)) + (do ((dst dst (cdr dst)) + (src src (cdr src))) + ((or (null dst) (null src))) + (moves `(move ,(car dst) ,(car src)))) + (moves))) + +(defun static-fun-template-vop (num-args num-results) + (unless (and (<= num-args register-arg-count) + (<= num-results register-arg-count)) + (error "either too many args (~W) or too many results (~W); max = ~W" + num-args num-results register-arg-count)) + (let ((num-temps (max num-args num-results))) + (collect ((temp-names) (temps) (arg-names) (args) (result-names) (results)) + (dotimes (i num-results) + (let ((result-name (intern (format nil "RESULT-~D" i)))) + (result-names result-name) + (results `(,result-name :scs (any-reg descriptor-reg))))) + (dotimes (i num-temps) + (let ((temp-name (intern (format nil "TEMP-~D" i)))) + (temp-names temp-name) + (temps `(:temporary (:sc descriptor-reg + :offset ,(nth i *register-arg-offsets*) + :from ,(if (< i num-args) + `(:argument ,i) + '(:eval 1)) + :to ,(if (< i num-results) + `(:result ,i) + '(:eval 1)) + ,@(when (< i num-results) + `(:target ,(nth i (result-names))))) + ,temp-name)))) + (dotimes (i num-args) + (let ((arg-name (intern (format nil "ARG-~D" i)))) + (arg-names arg-name) + (args `(,arg-name + :scs (any-reg descriptor-reg) + :target ,(nth i (temp-names)))))) + `(define-vop (,(static-fun-template-name num-args num-results) + static-fun-template) + (:args ,@(args)) + ,@(temps) + (:results ,@(results)) + (:generator ,(+ 50 num-args num-results) + ,@(moves (temp-names) (arg-names)) + + ;; If speed not more important than size, duplicate the + ;; effect of the ENTER with discrete instructions. Takes + ;; 2+1+3+2=8 bytes as opposed to 4+3=7 bytes. + (cond ((policy node (>= speed space)) + (inst mov ebx rsp-tn) + ;; Save the old-fp + (inst push rbp-tn) + ;; Ensure that at least three slots are available; one + ;; above, two more needed. + (inst sub rsp-tn (fixnumize 2)) + (inst mov rbp-tn ebx)) + (t + (inst enter (fixnumize 2)) + ;; The enter instruction pushes EBP and then copies + ;; ESP into EBP. We want the new EBP to be the + ;; original ESP, so we fix it up afterwards. + (inst add rbp-tn (fixnumize 1)))) + + ,(if (zerop num-args) + '(inst xor ecx ecx) + `(inst mov ecx (fixnumize ,num-args))) + + (note-this-location vop :call-site) + ;; Old CMU CL comment: + ;; STATIC-FUN-OFFSET gives the offset from the start of + ;; the NIL object to the static function FDEFN and has the + ;; low tag of 1 added. When the NIL symbol value with its + ;; low tag of 3 is added the resulting value points to the + ;; raw address slot of the fdefn (at +4). + ;; FIXME: Since the fork from CMU CL, we've swapped + ;; FUN-POINTER-LOWTAG and INSTANCE-POINTER-LOWTAG, so the + ;; text above is no longer right. Mysteriously, things still + ;; work. It would be good to explain why. (Is this code no + ;; longer executed? Does it not depend on the + ;; 1+3=4=fdefn_raw_address_offset relationship above? + ;; Is something else going on?) + (inst call (make-ea :qword + :disp (+ nil-value + (static-fun-offset function)))) + ,(collect ((bindings) (links)) + (do ((temp (temp-names) (cdr temp)) + (name 'values (gensym)) + (prev nil name) + (i 0 (1+ i))) + ((= i num-results)) + (bindings `(,name + (make-tn-ref ,(car temp) nil))) + (when prev + (links `(setf (tn-ref-across ,prev) ,name)))) + `(let ,(bindings) + ,@(links) + (default-unknown-values + vop + ,(if (zerop num-results) nil 'values) + ,num-results))) + ,@(moves (result-names) (temp-names))))))) + +) ; EVAL-WHEN + +(macrolet ((frob (num-args num-res) + (static-fun-template-vop (eval num-args) (eval num-res)))) + (frob 0 1) + (frob 1 1) + (frob 2 1) + (frob 3 1)) + +(defmacro define-static-fun (name args &key (results '(x)) translate + policy cost arg-types result-types) + `(define-vop (,name + ,(static-fun-template-name (length args) + (length results))) + (:variant ',name) + (:note ,(format nil "static-fun ~@(~S~)" name)) + ,@(when translate + `((:translate ,translate))) + ,@(when policy + `((:policy ,policy))) + ,@(when cost + `((:generator-cost ,cost))) + ,@(when arg-types + `((:arg-types ,@arg-types))) + ,@(when result-types + `((:result-types ,@result-types))))) diff --git a/src/compiler/x86-64/subprim.lisp b/src/compiler/x86-64/subprim.lisp new file mode 100644 index 0000000..1e9e532 --- /dev/null +++ b/src/compiler/x86-64/subprim.lisp @@ -0,0 +1,82 @@ +;;;; linkage information for standard static functions, and +;;;; miscellaneous VOPs + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; LENGTH + +(define-vop (length/list) + (:translate length) + (:args (object :scs (descriptor-reg control-stack) :target ptr)) + (:arg-types list) + (:temporary (:sc unsigned-reg :offset eax-offset) eax) + (:temporary (:sc descriptor-reg :from (:argument 0)) ptr) + (:results (count :scs (any-reg))) + (:result-types positive-fixnum) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:generator 40 + ;; Move OBJECT into a temp we can bash on, and initialize the count. + (move ptr object) + (inst xor count count) + ;; If we are starting with NIL, then it's really easy. + (inst cmp ptr nil-value) + (inst jmp :e done) + ;; Note: we don't have to test to see whether the original argument is a + ;; list, because this is a :fast-safe vop. + LOOP + ;; Get the CDR and boost the count. + (loadw ptr ptr cons-cdr-slot list-pointer-lowtag) + (inst add count (fixnumize 1)) + ;; If we hit NIL, then we are done. + (inst cmp ptr nil-value) + (inst jmp :e done) + ;; Otherwise, check to see whether we hit the end of a dotted list. If + ;; not, loop back for more. + (move eax ptr) + (inst and al-tn lowtag-mask) + (inst cmp al-tn list-pointer-lowtag) + (inst jmp :e loop) + ;; It's dotted all right. Flame out. + (error-call vop object-not-list-error ptr) + ;; We be done. + DONE)) + +(define-vop (fast-length/list) + (:translate length) + (:args (object :scs (descriptor-reg control-stack) :target ptr)) + (:arg-types list) + (:temporary (:sc descriptor-reg :from (:argument 0)) ptr) + (:results (count :scs (any-reg))) + (:result-types positive-fixnum) + (:policy :fast) + (:vop-var vop) + (:save-p :compute-only) + (:generator 30 + ;; Get a copy of OBJECT in a register we can bash on, and + ;; initialize COUNT. + (move ptr object) + (inst xor count count) + ;; If we are starting with NIL, we be done. + (inst cmp ptr nil-value) + (inst jmp :e done) + ;; Indirect the next cons cell, and boost the count. + LOOP + (loadw ptr ptr cons-cdr-slot list-pointer-lowtag) + (inst add count (fixnumize 1)) + ;; If we aren't done, go back for more. + (inst cmp ptr nil-value) + (inst jmp :ne loop) + DONE)) + +(define-static-fun length (object) :translate length) diff --git a/src/compiler/x86-64/system.lisp b/src/compiler/x86-64/system.lisp new file mode 100644 index 0000000..c9f111d --- /dev/null +++ b/src/compiler/x86-64/system.lisp @@ -0,0 +1,318 @@ +;;;; x86 VM definitions of various system hacking operations + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; type frobbing VOPs + +(define-vop (lowtag-of) + (:translate lowtag-of) + (:policy :fast-safe) + (:args (object :scs (any-reg descriptor-reg control-stack) + :target result)) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 1 + (move result object) + (inst and result lowtag-mask))) + +(define-vop (widetag-of) + (:translate widetag-of) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg :offset eax-offset :to (:result 0)) rax) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (inst mov rax object) + (inst and al-tn lowtag-mask) + (inst cmp al-tn other-pointer-lowtag) + (inst jmp :e other-ptr) + (inst cmp al-tn fun-pointer-lowtag) + (inst jmp :e function-ptr) + + ;; Pick off structures and list pointers. + (inst test al-tn 1) + (inst jmp :ne done) + + ;; Pick off fixnums. + (inst and al-tn 7) + (inst jmp :e done) + + ;; must be an other immediate + (inst mov rax object) + (inst jmp done) + + FUNCTION-PTR + (load-type al-tn object (- fun-pointer-lowtag)) + (inst jmp done) + + OTHER-PTR + (load-type al-tn object (- other-pointer-lowtag)) + + DONE + (inst movzx result al-tn))) + +(define-vop (fun-subtype) + (:translate fun-subtype) + (:policy :fast-safe) + (:args (function :scs (descriptor-reg))) + (:temporary (:sc byte-reg :from (:eval 0) :to (:eval 1)) temp) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (load-type temp function (- fun-pointer-lowtag)) + (inst movzx result temp))) + +(define-vop (set-fun-subtype) + (:translate (setf fun-subtype)) + (:policy :fast-safe) + (:args (type :scs (unsigned-reg) :target eax) + (function :scs (descriptor-reg))) + (:arg-types positive-fixnum *) + (:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 0) + :to (:result 0) :target result) + eax) + (:results (result :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (move eax type) + (inst mov + (make-ea :byte :base function :disp (- fun-pointer-lowtag)) + al-tn) + (move result eax))) + +(define-vop (get-header-data) + (:translate get-header-data) + (:policy :fast-safe) + (:args (x :scs (descriptor-reg))) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (loadw res x 0 other-pointer-lowtag) + (inst shr res n-widetag-bits))) + +(define-vop (get-closure-length) + (:translate get-closure-length) + (:policy :fast-safe) + (:args (x :scs (descriptor-reg))) + (:results (res :scs (unsigned-reg))) + (:result-types positive-fixnum) + (:generator 6 + (loadw res x 0 fun-pointer-lowtag) + (inst shr res n-widetag-bits))) + +(define-vop (set-header-data) + (:translate set-header-data) + (:policy :fast-safe) + (:args (x :scs (descriptor-reg) :target res :to (:result 0)) + (data :scs (any-reg) :target eax)) + (:arg-types * positive-fixnum) + (:results (res :scs (descriptor-reg))) + (:temporary (:sc unsigned-reg :offset eax-offset + :from (:argument 1) :to (:result 0)) eax) + (:generator 6 + (move eax data) + (inst shl eax (- n-widetag-bits 2)) + (inst mov al-tn (make-ea :byte :base x :disp (- other-pointer-lowtag))) + (storew eax x 0 other-pointer-lowtag) + (move res x))) + +(define-vop (make-fixnum) + (:args (ptr :scs (any-reg descriptor-reg) :target res)) + (:results (res :scs (any-reg descriptor-reg))) + (:generator 1 + ;; Some code (the hash table code) depends on this returning a + ;; positive number so make sure it does. + (move res ptr) + (inst shl res 4) + (inst shr res 1))) + +(define-vop (make-other-immediate-type) + (:args (val :scs (any-reg descriptor-reg) :target res) + (type :scs (unsigned-reg immediate))) + (:results (res :scs (any-reg descriptor-reg) :from (:argument 0))) + (:generator 2 + (move res val) + (inst shl res (- n-widetag-bits 2)) + (inst or res (sc-case type + (unsigned-reg type) + (immediate (tn-value type)))))) + +;;;; allocation + +(define-vop (dynamic-space-free-pointer) + (:results (int :scs (sap-reg))) + (:result-types system-area-pointer) + (:translate dynamic-space-free-pointer) + (:policy :fast-safe) + (:generator 1 + (load-symbol-value int *allocation-pointer*))) + +(define-vop (binding-stack-pointer-sap) + (:results (int :scs (sap-reg))) + (:result-types system-area-pointer) + (:translate binding-stack-pointer-sap) + (:policy :fast-safe) + (:generator 1 + (load-tl-symbol-value int *binding-stack-pointer*))) + +(defknown (setf binding-stack-pointer-sap) + (system-area-pointer) system-area-pointer ()) + +(define-vop (set-binding-stack-pointer-sap) + (:args (new-value :scs (sap-reg) :target int)) + (:arg-types system-area-pointer) + (:results (int :scs (sap-reg))) + (:result-types system-area-pointer) + #!+sb-thread (:temporary (:sc any-reg) temp) + (:translate (setf binding-stack-pointer-sap)) + (:policy :fast-safe) + (:generator 1 + (store-tl-symbol-value new-value *binding-stack-pointer* temp) + (move int new-value))) + +(define-vop (control-stack-pointer-sap) + (:results (int :scs (sap-reg))) + (:result-types system-area-pointer) + (:translate control-stack-pointer-sap) + (:policy :fast-safe) + (:generator 1 + (move int rsp-tn))) + +;;;; code object frobbing + +(define-vop (code-instructions) + (:translate code-instructions) + (:policy :fast-safe) + (:args (code :scs (descriptor-reg) :to (:result 0))) + (:results (sap :scs (sap-reg) :from (:argument 0))) + (:result-types system-area-pointer) + (:generator 10 + (loadw sap code 0 other-pointer-lowtag) + (inst shr sap n-widetag-bits) + (inst lea sap (make-ea :byte :base code :index sap + :scale n-word-bytes + :disp (- other-pointer-lowtag))))) + +(define-vop (compute-fun) + (:args (code :scs (descriptor-reg) :to (:result 0)) + (offset :scs (signed-reg unsigned-reg) :to (:result 0))) + (:arg-types * positive-fixnum) + (:results (func :scs (descriptor-reg) :from (:argument 0))) + (:generator 10 + (loadw func code 0 other-pointer-lowtag) + (inst shr func n-widetag-bits) + (inst lea func + (make-ea :byte :base offset :index func + :scale n-word-bytes + :disp (- fun-pointer-lowtag other-pointer-lowtag))) + (inst add func code))) + +(define-vop (%simple-fun-self) + (:policy :fast-safe) + (:translate %simple-fun-self) + (:args (function :scs (descriptor-reg))) + (:results (result :scs (descriptor-reg))) + (:generator 3 + (loadw result function simple-fun-self-slot fun-pointer-lowtag) + (inst lea result + (make-ea :byte :base result + :disp (- fun-pointer-lowtag + (* simple-fun-code-offset n-word-bytes)))))) + +;;; The closure function slot is a pointer to raw code on X86 instead +;;; of a pointer to the code function object itself. This VOP is used +;;; to reference the function object given the closure object. +(define-source-transform %closure-fun (closure) + `(%simple-fun-self ,closure)) + +(define-source-transform %funcallable-instance-fun (fin) + `(%simple-fun-self ,fin)) + +(define-vop (%set-fun-self) + (:policy :fast-safe) + (:translate (setf %simple-fun-self)) + (:args (new-self :scs (descriptor-reg) :target result :to :result) + (function :scs (descriptor-reg) :to :result)) + (:temporary (:sc any-reg :from (:argument 0) :to :result) temp) + (:results (result :scs (descriptor-reg))) + (:generator 3 + (inst lea temp + (make-ea :byte :base new-self + :disp (- (ash simple-fun-code-offset word-shift) + fun-pointer-lowtag))) + (storew temp function simple-fun-self-slot fun-pointer-lowtag) + (move result new-self))) + +;;; KLUDGE: This seems to be some kind of weird override of the way +;;; that the objdef.lisp code would ordinarily set up the slot +;;; accessor. It's inherited from CMU CL, and it works, and naively +;;; deleting it seemed to cause problems, but it's not obvious why +;;; it's done this way. Any ideas? -- WHN 2001-08-02 +(defknown ((setf %funcallable-instance-fun)) (function function) function + (unsafe)) +;;; CMU CL comment: +;;; We would have really liked to use a source-transform for this, but +;;; they don't work with SETF functions. +;;; FIXME: Can't we just use DEFSETF or something? +(deftransform (setf %funcallable-instance-fun) ((value fin)) + '(setf (%simple-fun-self fin) value)) + +;;;; other miscellaneous VOPs + +(defknown sb!unix::receive-pending-interrupt () (values)) +(define-vop (sb!unix::receive-pending-interrupt) + (:policy :fast-safe) + (:translate sb!unix::receive-pending-interrupt) + (:generator 1 + (inst break pending-interrupt-trap))) + +#!+sb-thread +(defknown current-thread-offset-sap ((unsigned-byte 32)) + system-area-pointer (flushable)) + +#!+sb-thread +(define-vop (current-thread-offset-sap) + (:results (sap :scs (sap-reg))) + (:result-types system-area-pointer) + (:translate current-thread-offset-sap) + (:args (n :scs (unsigned-reg) :target sap)) + (:arg-types unsigned-num) + (:policy :fast-safe) + (:generator 2 + (inst fs-segment-prefix) + (inst mov sap (make-ea :dword :disp 0 :index n :scale 4)))) + +(define-vop (halt) + (:generator 1 + (inst break halt-trap))) + +(defknown float-wait () (values)) +(define-vop (float-wait) + (:policy :fast-safe) + (:translate float-wait) + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-next-instruction vop :internal-error) + (inst wait))) + +;;;; dynamic vop count collection support + +#!+sb-dyncount +(define-vop (count-me) + (:args (count-vector :scs (descriptor-reg))) + (:info index) + (:generator 0 + (inst inc (make-ea :qword :base count-vector + :disp (- (* (+ vector-data-offset index) n-word-bytes) + other-pointer-lowtag))))) diff --git a/src/compiler/x86-64/target-insts.lisp b/src/compiler/x86-64/target-insts.lisp new file mode 100644 index 0000000..c021af1 --- /dev/null +++ b/src/compiler/x86-64/target-insts.lisp @@ -0,0 +1,59 @@ +;;;; target-only stuff from CMU CL's src/compiler/x86/insts.lisp +;;;; +;;;; i.e. stuff which was in CMU CL's insts.lisp file, but which in +;;;; the SBCL build process can't be compiled into code for the +;;;; cross-compilation host + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(defun print-mem-access (value stream print-size-p dstate) + (declare (type list value) + (type stream stream) + (type (member t nil) print-size-p) + (type sb!disassem:disassem-state dstate)) + (when print-size-p + (princ (sb!disassem:dstate-get-prop dstate 'width) stream) + (princ '| PTR | stream)) + (write-char #\[ stream) + (let ((firstp t)) + (macrolet ((pel ((var val) &body body) + ;; Print an element of the address, maybe with + ;; a leading separator. + `(let ((,var ,val)) + (when ,var + (unless firstp + (write-char #\+ stream)) + ,@body + (setq firstp nil))))) + (pel (base-reg (first value)) + (print-addr-reg base-reg stream dstate)) + (pel (index-reg (third value)) + (print-addr-reg index-reg stream dstate) + (let ((index-scale (fourth value))) + (when (and index-scale (not (= index-scale 1))) + (write-char #\* stream) + (princ index-scale stream)))) + (let ((offset (second value))) + (when (and offset (or firstp (not (zerop offset)))) + (unless (or firstp (minusp offset)) + (write-char #\+ stream)) + (if firstp + (progn + (sb!disassem:princ16 offset stream) + (or (minusp offset) + (nth-value 1 + (sb!disassem::note-code-constant-absolute offset dstate)) + (sb!disassem:maybe-note-assembler-routine offset + nil + dstate))) + (princ offset stream)))))) + (write-char #\] stream)) diff --git a/src/compiler/x86-64/type-vops.lisp b/src/compiler/x86-64/type-vops.lisp new file mode 100644 index 0000000..dfd41ca --- /dev/null +++ b/src/compiler/x86-64/type-vops.lisp @@ -0,0 +1,262 @@ +;;;; type testing and checking VOPs for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;;; test generation utilities + +;;; Emit the most compact form of the test immediate instruction, +;;; using an 8 bit test when the immediate is only 8 bits and the +;;; value is one of the four low registers (eax, ebx, ecx, edx) or the +;;; control stack. +(defun generate-fixnum-test (value) + "zero flag set if VALUE is fixnum" + (let ((offset (tn-offset value))) + (cond ((and (sc-is value any-reg descriptor-reg) + (or (= offset eax-offset) (= offset ebx-offset) + (= offset ecx-offset) (= offset edx-offset))) + (inst test (make-random-tn :kind :normal + :sc (sc-or-lose 'byte-reg) + :offset offset) + 7)) + ((sc-is value control-stack) + (inst test (make-ea :byte :base rbp-tn + :disp (- (* (1+ offset) n-word-bytes))) + 7)) + (t + (inst test value 7))))) + +(defun %test-fixnum (value target not-p) + (generate-fixnum-test value) + (inst jmp (if not-p :nz :z) target)) + +(defun %test-fixnum-and-headers (value target not-p headers) + (let ((drop-through (gen-label))) + (generate-fixnum-test value) + (inst jmp :z (if not-p drop-through target)) + (%test-headers value target not-p nil headers drop-through))) + +(defun %test-immediate (value target not-p immediate) + ;; Code a single instruction byte test if possible. + (let ((offset (tn-offset value))) + (cond ((and (sc-is value any-reg descriptor-reg) + (or (= offset rax-offset) (= offset rbx-offset) + (= offset rcx-offset) (= offset rdx-offset))) + (inst cmp (make-random-tn :kind :normal + :sc (sc-or-lose 'byte-reg) + :offset offset) + immediate)) + (t + (move rax-tn value) + (inst cmp al-tn immediate)))) + (inst jmp (if not-p :ne :e) target)) + +(defun %test-lowtag (value target not-p lowtag &optional al-loaded) + (unless al-loaded + (move rax-tn value) + (inst and al-tn lowtag-mask)) + (inst cmp al-tn lowtag) + (inst jmp (if not-p :ne :e) target)) + +(defun %test-headers (value target not-p function-p headers + &optional (drop-through (gen-label)) al-loaded) + (let ((lowtag (if function-p fun-pointer-lowtag other-pointer-lowtag))) + (multiple-value-bind (equal less-or-equal when-true when-false) + ;; EQUAL and LESS-OR-EQUAL are the conditions for branching to TARGET. + ;; WHEN-TRUE and WHEN-FALSE are the labels to branch to when we know + ;; it's true and when we know it's false respectively. + (if not-p + (values :ne :a drop-through target) + (values :e :na target drop-through)) + (%test-lowtag value when-false t lowtag al-loaded) + (inst mov al-tn (make-ea :byte :base value :disp (- lowtag))) + (do ((remaining headers (cdr remaining))) + ((null remaining)) + (let ((header (car remaining)) + (last (null (cdr remaining)))) + (cond + ((atom header) + (inst cmp al-tn header) + (if last + (inst jmp equal target) + (inst jmp :e when-true))) + (t + (let ((start (car header)) + (end (cdr header))) + (unless (= start bignum-widetag) + (inst cmp al-tn start) + (inst jmp :b when-false)) ; was :l + (inst cmp al-tn end) + (if last + (inst jmp less-or-equal target) + (inst jmp :be when-true))))))) ; was :le + (emit-label drop-through)))) + + +;;;; type checking and testing + +(define-vop (check-type) + (:args (value :target result :scs (any-reg descriptor-reg))) + (:results (result :scs (any-reg descriptor-reg))) + (:temporary (:sc unsigned-reg :offset eax-offset :to (:result 0)) eax) + (:ignore eax) + (:vop-var vop) + (:save-p :compute-only)) + +(define-vop (type-predicate) + (:args (value :scs (any-reg descriptor-reg))) + (:temporary (:sc unsigned-reg :offset eax-offset) eax) + (:ignore eax) + (:conditional) + (:info target not-p) + (:policy :fast-safe)) + +;;; simpler VOP that don't need a temporary register +(define-vop (simple-check-type) + (:args (value :target result :scs (any-reg descriptor-reg))) + (:results (result :scs (any-reg descriptor-reg) + :load-if (not (and (sc-is value any-reg descriptor-reg) + (sc-is result control-stack))))) + (:vop-var vop) + (:save-p :compute-only)) + +(define-vop (simple-type-predicate) + (:args (value :scs (any-reg descriptor-reg control-stack))) + (:conditional) + (:info target not-p) + (:policy :fast-safe)) + +(defun cost-to-test-types (type-codes) + (+ (* 2 (length type-codes)) + (if (> (apply #'max type-codes) lowtag-limit) 7 2))) + +(defmacro !define-type-vops (pred-name check-name ptype error-code + (&rest type-codes) + &key (variant nil variant-p) &allow-other-keys) + ;; KLUDGE: UGH. Why do we need this eval? Can't we put this in the + ;; expansion? + (let* ((cost (cost-to-test-types (mapcar #'eval type-codes))) + (prefix (if variant-p + (concatenate 'string (string variant) "-") + ""))) + `(progn + ,@(when pred-name + `((define-vop (,pred-name ,(intern (concatenate 'string prefix "TYPE-PREDICATE"))) + (:translate ,pred-name) + (:generator ,cost + (test-type value target not-p (,@type-codes)))))) + ,@(when check-name + `((define-vop (,check-name ,(intern (concatenate 'string prefix "CHECK-TYPE"))) + (:generator ,cost + (let ((err-lab + (generate-error-code vop ,error-code value))) + (test-type value err-lab t (,@type-codes)) + (move result value)))))) + ,@(when ptype + `((primitive-type-vop ,check-name (:check) ,ptype)))))) + +;;;; other integer ranges + +(define-vop (fixnump/unsigned-byte-64 simple-type-predicate) + (:args (value :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:translate fixnump) + (:temporary (:sc unsigned-reg) tmp) + (:generator 5 + (inst mov tmp value) + (inst shr tmp 61) + (inst jmp (if not-p :nz :z) target))) + +(define-vop (signed-byte-32-p type-predicate) + (:translate signed-byte-32-p) + (:generator 45 + ;; (and (fixnum) (no bits set >32)) + (move rax-tn value) + (inst test rax-tn 7) + (inst jmp :ne (if not-p target not-target)) + (inst sar rax-tn (+ 32 3)) + (inst jmp (if not-p :nz :z) target) + NOT-TARGET)) + +(define-vop (check-signed-byte-32 check-type) + (:generator 45 + (let ((nope (generate-error-code vop + object-not-signed-byte-32-error + value))) + (move rax-tn value) + (inst test rax-tn 7) + (inst jmp :ne nope) + (inst sar rax-tn (+ 32 3)) + (inst jmp :nz nope) + (move result value)))) + + +(define-vop (unsigned-byte-32-p type-predicate) + (:translate unsigned-byte-32-p) + (:generator 45 + ;; (and (fixnum) (no bits set >31)) + (move rax-tn value) + (inst test rax-tn 7) + (inst jmp :ne (if not-p target not-target)) + (inst sar rax-tn (+ 32 3 -1)) + (inst jmp (if not-p :nz :z) target) + NOT-TARGET)) + +(define-vop (check-unsigned-byte-32 check-type) + (:generator 45 + (let ((nope + (generate-error-code vop object-not-unsigned-byte-32-error value))) + (move rax-tn value) + (inst test rax-tn 7) + (inst jmp :ne nope) + (inst sar rax-tn (+ 32 3 -1)) + (inst jmp :nz nope) + (move result value)))) + +;;;; list/symbol types +;;; +;;; symbolp (or symbol (eq nil)) +;;; consp (and list (not (eq nil))) + +(define-vop (symbolp type-predicate) + (:translate symbolp) + (:generator 12 + (let ((is-symbol-label (if not-p drop-thru target))) + (inst cmp value nil-value) + (inst jmp :e is-symbol-label) + (test-type value target not-p (symbol-header-widetag))) + DROP-THRU)) + +(define-vop (check-symbol check-type) + (:generator 12 + (let ((error (generate-error-code vop object-not-symbol-error value))) + (inst cmp value nil-value) + (inst jmp :e drop-thru) + (test-type value error t (symbol-header-widetag))) + DROP-THRU + (move result value))) + +(define-vop (consp type-predicate) + (:translate consp) + (:generator 8 + (let ((is-not-cons-label (if not-p target drop-thru))) + (inst cmp value nil-value) + (inst jmp :e is-not-cons-label) + (test-type value target not-p (list-pointer-lowtag))) + DROP-THRU)) + +(define-vop (check-cons check-type) + (:generator 8 + (let ((error (generate-error-code vop object-not-cons-error value))) + (inst cmp value nil-value) + (inst jmp :e error) + (test-type value error t (list-pointer-lowtag)) + (move result value)))) diff --git a/src/compiler/x86-64/values.lisp b/src/compiler/x86-64/values.lisp new file mode 100644 index 0000000..e833d7b --- /dev/null +++ b/src/compiler/x86-64/values.lisp @@ -0,0 +1,122 @@ +;;;; unknown-values VOPs for the x86 VM + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(define-vop (reset-stack-pointer) + (:args (ptr :scs (any-reg))) + (:generator 1 + (move rsp-tn ptr))) + +;;; Push some values onto the stack, returning the start and number of values +;;; pushed as results. It is assumed that the Vals are wired to the standard +;;; argument locations. Nvals is the number of values to push. +;;; +;;; The generator cost is pseudo-random. We could get it right by defining a +;;; bogus SC that reflects the costs of the memory-to-memory moves for each +;;; operand, but this seems unworthwhile. +(define-vop (push-values) + (:args (vals :more t)) + (:temporary (:sc unsigned-reg :to (:result 0) :target start) temp) + (:results (start) (count)) + (:info nvals) + (:generator 20 + (move temp rsp-tn) ; WARN pointing 1 below + (do ((val vals (tn-ref-across val))) + ((null val)) + (inst push (tn-ref-tn val))) + (move start temp) + (inst mov count (fixnumize nvals)))) + +;;; Push a list of values on the stack, returning Start and Count as used in +;;; unknown values continuations. +(define-vop (values-list) + (:args (arg :scs (descriptor-reg) :target list)) + (:arg-types list) + (:policy :fast-safe) + (:results (start :scs (any-reg)) + (count :scs (any-reg))) + (:temporary (:sc descriptor-reg :from (:argument 0) :to (:result 1)) list) + (:temporary (:sc descriptor-reg :to (:result 1)) nil-temp) + (:temporary (:sc unsigned-reg :offset rax-offset :to (:result 1)) rax) + (:vop-var vop) + (:save-p :compute-only) + (:generator 0 + (move list arg) + (move start rsp-tn) ; WARN pointing 1 below + (inst mov nil-temp nil-value) + + LOOP + (inst cmp list nil-temp) + (inst jmp :e done) + (pushw list cons-car-slot list-pointer-lowtag) + (loadw list list cons-cdr-slot list-pointer-lowtag) + (inst mov rax list) + (inst and al-tn lowtag-mask) + (inst cmp al-tn list-pointer-lowtag) + (inst jmp :e loop) + (error-call vop bogus-arg-to-values-list-error list) + + DONE + (inst mov count start) ; start is high address + (inst sub count rsp-tn))) ; stackp is low address + +;;; Copy the more arg block to the top of the stack so we can use them +;;; as function arguments. +;;; +;;; Accepts a context as produced by more-arg-context; points to the first +;;; value on the stack, not 4 bytes above as in other contexts. +;;; +;;; Return a context that is 4 bytes above the first value, suitable for +;;; defining a new stack frame. +(define-vop (%more-arg-values) + (:args (context :scs (descriptor-reg any-reg) :target src) + (skip :scs (any-reg immediate)) + (num :scs (any-reg) :target count)) + (:arg-types * positive-fixnum positive-fixnum) + (:temporary (:sc any-reg :offset rsi-offset :from (:argument 0)) src) + (:temporary (:sc descriptor-reg :offset rax-offset) temp) + (:temporary (:sc unsigned-reg :offset rcx-offset) temp1) + (:results (start :scs (any-reg)) + (count :scs (any-reg))) + (:generator 20 + (sc-case skip + (immediate + (cond ((zerop (tn-value skip)) + (move src context) + (move count num)) + (t + (inst lea src (make-ea :dword :base context + :disp (- (* (tn-value skip) + n-word-bytes)))) + (move count num) + (inst sub count (* (tn-value skip) n-word-bytes))))) + + (any-reg + (move src context) + (inst sub src skip) + (move count num) + (inst sub count skip))) + + (move temp1 count) + (inst mov start rsp-tn) + (inst jecxz done) ; check for 0 count? + + (inst shr temp1 word-shift) ; convert the fixnum to a count. + + (inst std) ; move down the stack as more value are copied to the bottom. + LOOP + (inst lods temp) + (inst push temp) + (inst loop loop) + + DONE)) + diff --git a/src/compiler/x86-64/vm.lisp b/src/compiler/x86-64/vm.lisp new file mode 100644 index 0000000..25c736d --- /dev/null +++ b/src/compiler/x86-64/vm.lisp @@ -0,0 +1,466 @@ +;;;; miscellaneous VM definition noise for the x86-64 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; the size of an INTEGER representation of a SYSTEM-AREA-POINTER, i.e. +;;; size of a native memory address +(deftype sap-int () '(unsigned-byte 64)) + +;;;; register specs + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) + (defvar *qword-register-names* (make-array 32 :initial-element nil)) + (defvar *float-register-names* (make-array 8 :initial-element nil))) + +(macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) + (names-vector (symbolicate "*" size "-REGISTER-NAMES*"))) + `(progn + (eval-when (:compile-toplevel :load-toplevel :execute) + ;; EVAL-WHEN is necessary because stuff like #.EAX-OFFSET + ;; (in the same file) depends on compile-time evaluation + ;; of the DEFCONSTANT. -- AL 20010224 + (def!constant ,offset-sym ,offset)) + (setf (svref ,names-vector ,offset-sym) + ,(symbol-name name))))) + ;; FIXME: It looks to me as though DEFREGSET should also + ;; define the related *FOO-REGISTER-NAMES* variable. + (defregset (name &rest regs) + `(eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter ,name + (list ,@(mapcar (lambda (name) + (symbolicate name "-OFFSET")) + regs)))))) + + ;; byte registers + ;; + ;; Note: the encoding here is different than that used by the chip. + ;; We use this encoding so that the compiler thinks that AX (and + ;; EAX) overlap AL and AH instead of AL and CL. + (defreg al 0 :byte) + (defreg ah 1 :byte) + (defreg cl 2 :byte) + (defreg ch 3 :byte) + (defreg dl 4 :byte) + (defreg dh 5 :byte) + (defreg bl 6 :byte) + (defreg bh 7 :byte) + (defregset *byte-regs* al ah cl ch dl dh bl bh) + + ;; word registers + (defreg ax 0 :word) + (defreg cx 2 :word) + (defreg dx 4 :word) + (defreg bx 6 :word) + (defreg sp 8 :word) + (defreg bp 10 :word) + (defreg si 12 :word) + (defreg di 14 :word) + (defregset *word-regs* ax cx dx bx si di) + + ;; double word registers + (defreg eax 0 :dword) + (defreg ecx 2 :dword) + (defreg edx 4 :dword) + (defreg ebx 6 :dword) + (defreg esp 8 :dword) + (defreg ebp 10 :dword) + (defreg esi 12 :dword) + (defreg edi 14 :dword) + (defregset *dword-regs* eax ecx edx ebx esi edi) + + ;; quadword registers + (defreg rax 0 :qword) + (defreg rcx 2 :qword) + (defreg rdx 4 :qword) + (defreg rbx 6 :qword) + (defreg rsp 8 :qword) + (defreg rbp 10 :qword) + (defreg rsi 12 :qword) + (defreg rdi 14 :qword) + (defreg r8 16 :qword) + (defreg r9 18 :qword) + (defreg r10 20 :qword) + (defreg r11 22 :qword) + (defreg r12 24 :qword) + (defreg r13 26 :qword) + (defreg r14 28 :qword) + (defreg r15 30 :qword) + (defregset *qword-regs* rax rcx rdx rbx rsi rdi + r8 r9 r10 r11 #+nil r12 #+nil r13 r14 r15) + + ;; floating point registers + (defreg fr0 0 :float) + (defreg fr1 1 :float) + (defreg fr2 2 :float) + (defreg fr3 3 :float) + (defreg fr4 4 :float) + (defreg fr5 5 :float) + (defreg fr6 6 :float) + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers + (def!constant register-arg-count 3) + ;; names and offsets for registers used to pass arguments + (eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter *register-arg-names* '(rdx rdi rsi))) + (defregset *register-arg-offsets* rdx rdi rsi)) + +;;;; SB definitions + +;;; There are 16 registers really, but we consider them 32 in order to +;;; describe the overlap of byte registers. The only thing we need to +;;; represent is what registers overlap. Therefore, we consider bytes +;;; to take one unit, and [dq]?words to take two. We don't need to +;;; tell the difference between [dq]?words, because you can't put two +;;; words in a dword register. +(define-storage-base registers :finite :size 32) + +;;; I suspect we should do fp with SSE instead of the old x86 stuff, +;;; but for the time being - +(define-storage-base float-registers :finite :size 8) + +(define-storage-base stack :unbounded :size 8) +(define-storage-base constant :non-packed) +(define-storage-base immediate-constant :non-packed) +(define-storage-base noise :unbounded :size 2) + +;;;; SC definitions + +;;; a handy macro so we don't have to keep changing all the numbers whenever +;;; we insert a new storage class +;;; +(defmacro !define-storage-classes (&rest classes) + (collect ((forms)) + (let ((index 0)) + (dolist (class classes) + (let* ((sc-name (car class)) + (constant-name (symbolicate sc-name "-SC-NUMBER"))) + (forms `(define-storage-class ,sc-name ,index + ,@(cdr class))) + (forms `(def!constant ,constant-name ,index)) + (incf index)))) + `(progn + ,@(forms)))) + +;;; The DEFINE-STORAGE-CLASS call for CATCH-BLOCK refers to the size +;;; of CATCH-BLOCK. The size of CATCH-BLOCK isn't calculated until +;;; later in the build process, and the calculation is entangled with +;;; code which has lots of predependencies, including dependencies on +;;; the prior call of DEFINE-STORAGE-CLASS. The proper way to +;;; unscramble this would be to untangle the code, so that the code +;;; which calculates the size of CATCH-BLOCK can be separated from the +;;; other lots-of-dependencies code, so that the code which calculates +;;; the size of CATCH-BLOCK can be executed early, so that this value +;;; is known properly at this point in compilation. However, that +;;; would be a lot of editing of code that I (WHN 19990131) can't test +;;; until the project is complete. So instead, I set the correct value +;;; by hand here (a sort of nondeterministic guess of the right +;;; answer:-) and add an assertion later, after the value is +;;; calculated, that the original guess was correct. +;;; +;;; (What a KLUDGE! Anyone who wants to come in and clean up this mess +;;; has my gratitude.) (FIXME: Maybe this should be me..) +(eval-when (:compile-toplevel :load-toplevel :execute) + (def!constant kludge-nondeterministic-catch-block-size 6)) + +(!define-storage-classes + + ;; non-immediate constants in the constant pool + (constant constant) + + ;; some FP constants can be generated in the i387 silicon + (fp-constant immediate-constant) + + (immediate immediate-constant) + + ;; + ;; the stacks + ;; + + ;; the control stack + (control-stack stack) ; may be pointers, scanned by GC + + ;; the non-descriptor stacks + ;; XXX alpha backend has :element-size 2 :alignment 2 in these entries + (signed-stack stack) ; (signed-byte 32) + (unsigned-stack stack) ; (unsigned-byte 32) + (base-char-stack stack) ; non-descriptor characters. + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats + (complex-double-stack stack :element-size 4) ; complex-double-floats + + + ;; + ;; magic SCs + ;; + + (ignore-me noise) + + ;; + ;; things that can go in the integer registers + ;; + + ;; On the X86, we don't have to distinguish between descriptor and + ;; non-descriptor registers, because of the conservative GC. + ;; Therefore, we use different scs only to distinguish between + ;; descriptor and non-descriptor values and to specify size. + + ;; immediate descriptor objects. Don't have to be seen by GC, but nothing + ;; bad will happen if they are. (fixnums, characters, header values, etc). + (any-reg registers + :locations #.*qword-regs* + :element-size 2 ; I think this is for the al/ah overlap thing + :constant-scs (immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; pointer descriptor objects -- must be seen by GC + (descriptor-reg registers + :locations #.*qword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (constant immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; non-descriptor characters + (base-char-reg registers + :locations #.*byte-regs* + :reserve-locations (#.ah-offset #.al-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (base-char-stack)) + + ;; non-descriptor SAPs (arbitrary pointers into address space) + (sap-reg registers + :locations #.*qword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (sap-stack)) + + ;; non-descriptor (signed or unsigned) numbers + (signed-reg registers + :locations #.*qword-regs* + :element-size 2 + :constant-scs (immediate) + :save-p t + :alternate-scs (signed-stack)) + (unsigned-reg registers + :locations #.*qword-regs* + :element-size 2 + :constant-scs (immediate) + :save-p t + :alternate-scs (unsigned-stack)) + + ;; miscellaneous objects that must not be seen by GC. Used only as + ;; temporaries. + (word-reg registers + :locations #.*word-regs* + :element-size 2 + ) + (dword-reg registers + :locations #.*dword-regs* + :element-size 2 + ) + (byte-reg registers + :locations #.*byte-regs* + ) + + ;; that can go in the floating point registers + + ;; non-descriptor SINGLE-FLOATs + (single-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (single-stack)) + + ;; non-descriptor DOUBLE-FLOATs + (double-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (double-stack)) + + (complex-single-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-single-stack)) + + (complex-double-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-double-stack)) + + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *byte-sc-names* '(base-char-reg byte-reg base-char-stack)) +(defparameter *word-sc-names* '(word-reg)) +(defparameter *dword-sc-names* '(dword-reg)) +(defparameter *qword-sc-names* + '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack + signed-stack unsigned-stack sap-stack single-stack constant)) +;;; added by jrd. I guess the right thing to do is to treat floats +;;; as a separate size... +;;; +;;; These are used to (at least) determine operand size. +(defparameter *float-sc-names* '(single-reg)) +(defparameter *double-sc-names* '(double-reg double-stack)) +) ; EVAL-WHEN + +;;;; miscellaneous TNs for the various registers + +(macrolet ((def-misc-reg-tns (sc-name &rest reg-names) + (collect ((forms)) + (dolist (reg-name reg-names) + (let ((tn-name (symbolicate reg-name "-TN")) + (offset-name (symbolicate reg-name "-OFFSET"))) + ;; FIXME: It'd be good to have the special + ;; variables here be named with the *FOO* + ;; convention. + (forms `(defparameter ,tn-name + (make-random-tn :kind :normal + :sc (sc-or-lose ',sc-name) + :offset + ,offset-name))))) + `(progn ,@(forms))))) + + (def-misc-reg-tns unsigned-reg rax rbx rcx rdx rbp rsp rdi rsi + r8 r9 r10 r11 r12 r13 r14 r15) + (def-misc-reg-tns dword-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) + (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) + +;;; TNs for registers used to pass arguments +(defparameter *register-arg-tns* + (mapcar (lambda (register-arg-name) + (symbol-value (symbolicate register-arg-name "-TN"))) + *register-arg-names*)) + +;;; FIXME: doesn't seem to be used in SBCL +#| +;;; added by pw +(defparameter fp-constant-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'fp-constant) + :offset 31)) ; Offset doesn't get used. +|# + +;;; If value can be represented as an immediate constant, then return +;;; the appropriate SC number, otherwise return NIL. +(!def-vm-support-routine immediate-constant-sc (value) + (typecase value + ((or (integer #.sb!xc:most-negative-fixnum #.sb!xc:most-positive-fixnum) + #-sb-xc-host system-area-pointer character) + (sc-number-or-lose 'immediate)) + (symbol + (when (static-symbol-p value) + (sc-number-or-lose 'immediate))) + (single-float + (when (or (eql value 0f0) (eql value 1f0)) + (sc-number-or-lose 'fp-constant))) + (double-float + (when (or (eql value 0d0) (eql value 1d0)) + (sc-number-or-lose 'fp-constant))) + #!+long-float + (long-float + (when (or (eql value 0l0) (eql value 1l0) + (eql value pi) + (eql value (log 10l0 2l0)) + (eql value (log 2.718281828459045235360287471352662L0 2l0)) + (eql value (log 2l0 10l0)) + (eql value (log 2l0 2.718281828459045235360287471352662L0))) + (sc-number-or-lose 'fp-constant))))) + +;;;; miscellaneous function call parameters + +;;; offsets of special stack frame locations +(def!constant ocfp-save-offset 0) +(def!constant return-pc-save-offset 1) +(def!constant code-save-offset 2) + +;;; FIXME: This is a bad comment (changed since when?) and there are others +;;; like it in this file. It'd be nice to clarify them. Failing that deleting +;;; them or flagging them with KLUDGE might be better than nothing. +;;; +;;; names of these things seem to have changed. these aliases by jrd +(def!constant lra-save-offset return-pc-save-offset) + +#+nil +(def!constant cfp-offset ebp-offset) ; pfw - needed by stuff in /code + ; related to signal context stuff + +;;; This is used by the debugger. +(def!constant single-value-return-byte-offset 2) + +;;; This function is called by debug output routines that want a pretty name +;;; for a TN's location. It returns a thing that can be printed with PRINC. +(!def-vm-support-routine location-print-name (tn) + (declare (type tn tn)) + (let* ((sc (tn-sc tn)) + (sb (sb-name (sc-sb sc))) + (offset (tn-offset tn))) + (ecase sb + (registers + (let* ((sc-name (sc-name sc)) + (name-vec (cond ((member sc-name *byte-sc-names*) + *byte-register-names*) + ((member sc-name *word-sc-names*) + *word-register-names*) + ((member sc-name *dword-sc-names*) + *dword-register-names*) + ((member sc-name *qword-sc-names*) + *qword-register-names*)))) + (or (and name-vec + (< -1 offset (length name-vec)) + (svref name-vec offset)) + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") + (noise (symbol-name (sc-name sc)))))) +;;; FIXME: Could this, and everything that uses it, be made #!+SB-SHOW? + + +;;; The loader uses this to convert alien names to the form they need in +;;; the symbol table (for example, prepending an underscore). +(defun extern-alien-name (name) + (declare (type simple-base-string name)) + ;; OpenBSD is non-ELF, and needs a _ prefix + #!+openbsd (concatenate 'string "_" name) + ;; The other (ELF) ports currently don't need any prefix + #!-openbsd name) + +(defun dwords-for-quad (value) + (let* ((lo (logand value (1- (ash 1 32)))) + (hi (ash (- value lo) -32))) + (values lo hi)))