X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86-64%2Fcall.lisp;h=da925b36c1ad66fc2dd45218fd45467c80e31482;hb=670d28c10c178142146f6916c5fa0967732f3a8f;hp=384155b2ae695eb74a6bcf3a67caea34ff24409a;hpb=0d871fd7a98fc4af92a8b942a1154761466ad8c9;p=sbcl.git diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp index 384155b..da925b3 100644 --- a/src/compiler/x86-64/call.lisp +++ b/src/compiler/x86-64/call.lisp @@ -200,12 +200,11 @@ (cond ((<= nvals 1) (note-this-location vop :single-value-return) - (inst mov rsp-tn rbx-tn)) + (inst cmov :c rsp-tn rbx-tn)) ((<= nvals register-arg-count) (let ((regs-defaulted (gen-label))) (note-this-location vop :unknown-return) - (inst nop) - (inst jmp-short regs-defaulted) + (inst jmp :c regs-defaulted) ;; Default the unsupplied registers. (let* ((2nd-tn-ref (tn-ref-across values)) (2nd-tn (tn-ref-tn 2nd-tn-ref))) @@ -229,8 +228,7 @@ (default-stack-slots (gen-label))) (note-this-location vop :unknown-return) ;; Branch off to the MV case. - (inst nop) - (inst jmp-short regs-defaulted) + (inst jmp :c regs-defaulted) ;; Do the single value case. ;; Default the register args (inst mov rax-tn nil-value) @@ -385,8 +383,7 @@ (declare (type tn args nargs start count)) (let ((variable-values (gen-label)) (done (gen-label))) - (inst nop) - (inst jmp-short variable-values) + (inst jmp :c variable-values) (cond ((location= start (first *register-arg-tns*)) (inst push (first *register-arg-tns*)) @@ -728,7 +725,8 @@ (:info ,@(unless (or variable (eq return :tail)) '(arg-locs)) ,@(unless variable '(nargs)) - ,@(when (eq return :fixed) '(nvals))) + ,@(when (eq return :fixed) '(nvals)) + step-instrumenting) (:ignore ,@(unless (or variable (eq return :tail)) '(arg-locs)) @@ -862,6 +860,12 @@ (move rbp-tn new-fp) ; NB - now on new stack frame. ))) + (when step-instrumenting + (emit-single-step-test) + (inst jmp :eq DONE) + (inst break single-step-around-trap)) + DONE + (note-this-location vop :call-site) (inst ,(if (eq return :tail) 'jmp 'call) @@ -939,21 +943,60 @@ (:args (old-fp) (return-pc) (value)) - (:temporary (:sc unsigned-reg) ofp) - (:temporary (:sc unsigned-reg) ret) (:ignore value) (:generator 6 (trace-table-entry trace-table-fun-epilogue) - (move ret return-pc) - ;; Clear the control stack - (move ofp old-fp) - ;; Adjust the return address for the single value return. - (inst add ret 3) - ;; Restore the frame pointer. - (move rsp-tn rbp-tn) - (move rbp-tn ofp) - ;; Out of here. - (inst jmp ret))) + ;; Code structure lifted from known-return. + (sc-case return-pc + ((sap-reg) + ;; return PC in register for some reason (local call?) + ;; we jmp to the return pc after fixing the stack and frame. + (sc-case old-fp + ((control-stack) + ;; ofp on stack must be in slot 0 (the traditional storage place). + ;; Drop the stack above it and pop it off. + (cond ((zerop (tn-offset old-fp)) + (inst lea rsp-tn (make-ea :dword :base rbp-tn + :disp (- (* (1+ ocfp-save-offset) + n-word-bytes)))) + (inst pop rbp-tn)) + (t + ;; Should this ever happen, we do the same as above, but + ;; using (tn-offset old-fp) instead of ocfp-save-offset + ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and + ;; then lea rsp again against itself with a displacement + ;; of (* (tn-offset old-fp) n-word-bytes) to clear the + ;; rest of the stack. + (cerror "Continue anyway" + "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp))))) + ((any-reg descriptor-reg) + ;; ofp in reg, drop the stack and load the real fp. + (move rsp-tn rbp-tn) + (move rbp-tn old-fp))) + + ;; Set single-value-return flag + (inst clc) + ;; And return + (inst jmp return-pc)) + + ((sap-stack) + ;; Note that this will only work right if, when old-fp is on + ;; the stack, it has a lower tn-offset than return-pc. One of + ;; the comments in known-return indicate that this is the case + ;; (in that it will be in its save location), but we may wish + ;; to assert that (in either the weaker or stronger forms). + ;; Should this ever not be the case, we should load old-fp + ;; into a temp reg while we fix the stack. + ;; Drop stack above return-pc + (inst lea rsp-tn (make-ea :dword :base rbp-tn + :disp (- (* (1+ (tn-offset return-pc)) + n-word-bytes)))) + ;; Set single-value return flag + (inst clc) + ;; Restore the old frame pointer + (move rbp-tn old-fp) + ;; And return, dropping the rest of the stack as we go. + (inst ret (* (tn-offset return-pc) n-word-bytes)))))) ;;; Do unknown-values return of a fixed (other than 1) number of ;;; values. The VALUES are required to be set up in the standard @@ -1004,6 +1047,8 @@ (inst mov first nil-value) (dolist (tn (cdr arg-tns)) (inst mov tn first)))) + ;; Set the multiple value return flag. + (inst stc) ;; And away we go. Except that return-pc is still on the ;; stack and we've changed the stack pointer. So we have to ;; tell it to index off of RBX instead of RBP. @@ -1065,8 +1110,8 @@ (move old-fp-temp old-fp) (move rsp-tn rbp-tn) (move rbp-tn old-fp-temp) - ;; Fix the return-pc to point at the single-value entry point. - (inst add rax 3) ; skip "mov %rbx,%rsp" insn in caller + ;; clear the multiple-value return flag + (inst clc) ;; Out of here. (inst jmp rax) @@ -1103,25 +1148,10 @@ ;;; Copy a &MORE arg from the argument area to the end of the current ;;; frame. FIXED is the number of non-&MORE arguments. -;;; -;;; The tricky part is doing this without trashing any of the calling -;;; convention registers that are still needed. This vop is emitted -;;; directly after the xep-allocate frame. That means the registers -;;; are in use as follows: -;;; -;;; RAX -- The lexenv. -;;; RBX -- Available. -;;; RCX -- The total number of arguments. -;;; RDX -- The first arg. -;;; RDI -- The second arg. -;;; RSI -- The third arg. -;;; -;;; So basically, we have one register available for our use: RBX. -;;; -;;; What we can do is push the other regs onto the stack, and then -;;; restore their values by looking directly below where we put the -;;; more-args. (define-vop (copy-more-arg) + (:temporary (:sc any-reg :offset r8-offset) copy-index) + (:temporary (:sc any-reg :offset r9-offset) source) + (:temporary (:sc descriptor-reg :offset r10-offset) temp) (:info fixed) (:generator 20 ;; Avoid the copy if there are no more args. @@ -1157,29 +1187,23 @@ ;; Number to copy = nargs-fixed (inst sub rcx-tn (fixnumize fixed)))) - ;; Save rdi and rsi register args. - (inst push rdi-tn) - (inst push rsi-tn) - ;; Okay, we have pushed the register args. We can trash them - ;; now. - - ;; Initialize dst to be end of stack; skiping the values pushed - ;; above. - (inst lea rdi-tn (make-ea :qword :base rsp-tn :disp 16)) - - ;; Initialize src to be end of args. - (inst mov rsi-tn rbp-tn) - (inst sub rsi-tn rbx-tn) + ;; Initialize R8 to be the end of args. + (inst mov source rbp-tn) + (inst sub source rbx-tn) - (inst shr rcx-tn word-shift) ; make word count - ;; And copy the args. - (inst cld) ; auto-inc RSI and RDI. - (inst rep) - (inst movs :qword) + ;; We need to copy from downwards up to avoid overwriting some of + ;; the yet uncopied args. So we need to use R9 as the copy index + ;; and RCX as the loop counter, rather than using RCX for both. + (inst xor copy-index copy-index) - ;; So now we need to restore RDI and RSI. - (inst pop rsi-tn) - (inst pop rdi-tn) + ;; We used to use REP MOVS here, but on modern x86 it performs + ;; much worse than an explicit loop for small blocks. + COPY-LOOP + (inst mov temp (make-ea :qword :base source :index copy-index)) + (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp) + (inst add copy-index n-word-bytes) + (inst sub rcx-tn n-word-bytes) + (inst jmp :nz COPY-LOOP) DO-REGS @@ -1188,26 +1212,26 @@ ;; Here: nargs>=1 && nargs>fixed (when (< fixed register-arg-count) - ;; Now we have to deposit any more args that showed up in - ;; registers. - (do ((i fixed)) - ( nil ) - ;; Store it relative to rbp - (inst mov (make-ea :qword :base rbp-tn - :disp (- (* n-word-bytes - (+ 1 (- i fixed) - (max 3 (sb-allocated-size 'stack)))))) - (nth i *register-arg-tns*)) - - (incf i) - (when (>= i register-arg-count) - (return)) - - ;; Don't deposit any more than there are. - (if (zerop i) - (inst test rcx-tn rcx-tn) - (inst cmp rcx-tn (fixnumize i))) - (inst jmp :eq DONE))) + ;; Now we have to deposit any more args that showed up in + ;; registers. + (do ((i fixed)) + ( nil ) + ;; Store it relative to rbp + (inst mov (make-ea :qword :base rbp-tn + :disp (- (* n-word-bytes + (+ 1 (- i fixed) + (max 3 (sb-allocated-size 'stack)))))) + (nth i *register-arg-tns*)) + + (incf i) + (when (>= i register-arg-count) + (return)) + + ;; Don't deposit any more than there are. + (if (zerop i) + (inst test rcx-tn rcx-tn) + (inst cmp rcx-tn (fixnumize i))) + (inst jmp :eq DONE))) (inst jmp DONE) @@ -1219,34 +1243,32 @@ DONE)) -;;; &MORE args are stored contiguously on the stack, starting -;;; immediately at the context pointer. The context pointer is not -;;; typed, so the lowtag is 0. -(define-vop (more-arg) - (:translate %more-arg) +(define-vop (more-kw-arg) + (:translate sb!c::%more-kw-arg) (:policy :fast-safe) - (:args (object :scs (descriptor-reg) :to :result) - (index :scs (any-reg) :target temp)) + (:args (object :scs (descriptor-reg) :to (:result 1)) + (index :scs (any-reg) :to (:result 1) :target keyword)) (:arg-types * tagged-num) - (:temporary (:sc unsigned-reg :from (:argument 1) :to :result) temp) - (:results (value :scs (any-reg descriptor-reg))) - (:result-types *) - (:generator 5 - (move temp index) - (inst neg temp) - (inst mov value (make-ea :qword :base object :index temp)))) + (:results (value :scs (descriptor-reg any-reg)) + (keyword :scs (descriptor-reg any-reg))) + (:result-types * *) + (:generator 4 + (inst mov value (make-ea :qword :base object :index index)) + (inst mov keyword (make-ea :qword :base object :index index + :disp n-word-bytes)))) -(define-vop (more-arg-c) - (:translate %more-arg) +(define-vop (more-arg) + (:translate sb!c::%more-arg) (:policy :fast-safe) - (:args (object :scs (descriptor-reg))) - (:info index) - (:arg-types * (:constant (signed-byte 30))) - (:results (value :scs (any-reg descriptor-reg))) + (:args (object :scs (descriptor-reg) :to (:result 1)) + (index :scs (any-reg) :to (:result 1) :target value)) + (:arg-types * tagged-num) + (:results (value :scs (descriptor-reg any-reg))) (:result-types *) (:generator 4 - (inst mov value - (make-ea :qword :base object :disp (- (* index n-word-bytes)))))) + (move value index) + (inst neg value) + (inst mov value (make-ea :qword :base object :index value)))) ;;; Turn more arg (context, count) into a list. (defoptimizer (%listify-rest-args stack-allocate-result) ((&rest args)) @@ -1278,8 +1300,6 @@ (maybe-pseudo-atomic stack-allocate-p (allocation dst dst node stack-allocate-p) (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) - ;; Convert the count into a raw value, so that we can use the - ;; LOOP instruction. (inst shr rcx (1- n-lowtag-bits)) ;; Set decrement mode (successive args at lower addresses) (inst std) @@ -1298,7 +1318,8 @@ (inst lods rax) (storew rax dst 0 list-pointer-lowtag) ;; Go back for more. - (inst loop loop) + (inst sub rcx 1) + (inst jmp :nz loop) ;; NIL out the last cons. (storew nil-value dst 1 list-pointer-lowtag)) (emit-label done)))) @@ -1374,3 +1395,37 @@ (def unknown-key-arg-error unknown-key-arg-error sb!c::%unknown-key-arg-error key) (def nil-fun-returned-error nil-fun-returned-error nil fun)) + +;;; Single-stepping + +(defun emit-single-step-test () + ;; We use different ways of representing whether stepping is on on + ;; +SB-THREAD / -SB-THREAD: on +SB-THREAD, we use a slot in the + ;; thread structure. On -SB-THREAD we use the value of a static + ;; symbol. Things are done this way, since reading a thread-local + ;; slot from a symbol would require an extra register on +SB-THREAD, + ;; and reading a slot from a thread structure would require an extra + ;; register on -SB-THREAD. While this isn't critical for x86-64, + ;; it's more serious for x86. + #!+sb-thread + (inst cmp (make-ea :qword + :base thread-base-tn + :disp (* thread-stepping-slot n-word-bytes)) + nil-value) + #!-sb-thread + (inst cmp (make-ea :qword + :disp (+ nil-value (static-symbol-offset + 'sb!impl::*stepping*) + (* symbol-value-slot n-word-bytes) + (- other-pointer-lowtag))) + nil-value)) + +(define-vop (step-instrument-before-vop) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (emit-single-step-test) + (inst jmp :eq DONE) + (inst break single-step-before-trap) + DONE + (note-this-location vop :step-before-vop)))