X-Git-Url: http://repo.macrolet.net/gitweb/?a=blobdiff_plain;f=src%2Fcompiler%2Fx86%2Fcall.lisp;h=1b7d900a3e9154444c5e53dc1e27bb78afa17a12;hb=a6b91f356da1b5ae2987f79db9bd137970512959;hp=10feb3458f86c9661095b9b075cb4a4de1efbf81;hpb=0d871fd7a98fc4af92a8b942a1154761466ad8c9;p=sbcl.git diff --git a/src/compiler/x86/call.lisp b/src/compiler/x86/call.lisp index 10feb34..1b7d900 100644 --- a/src/compiler/x86/call.lisp +++ b/src/compiler/x86/call.lisp @@ -133,7 +133,7 @@ ;; The start of the actual code. ;; Save the return-pc. - (popw ebp-tn (- (1+ return-pc-save-offset))) + (popw ebp-tn (frame-word-offset return-pc-save-offset)) ;; If copy-more-arg follows it will allocate the correct stack ;; size. The stack is not allocated first here as this may expose @@ -200,11 +200,18 @@ (cond ((<= nvals 1) (note-this-location vop :single-value-return) - (inst mov esp-tn ebx-tn)) + (let ((single-value (gen-label))) + (cond + ((member :cmov *backend-subfeatures*) + (inst cmov :c esp-tn ebx-tn)) + (t + (inst jmp :nc single-value) + (inst mov esp-tn ebx-tn) + (emit-label single-value))))) ((<= nvals register-arg-count) (let ((regs-defaulted (gen-label))) (note-this-location vop :unknown-return) - (inst jmp-short regs-defaulted) + (inst jmp :c regs-defaulted) ;; Default the unsuppled registers. (let* ((2nd-tn-ref (tn-ref-across values)) (2nd-tn (tn-ref-tn 2nd-tn-ref))) @@ -228,7 +235,7 @@ (default-stack-slots (gen-label))) (note-this-location vop :unknown-return) ;; Branch off to the MV case. - (inst jmp-short regs-defaulted) + (inst jmp :c regs-defaulted) ;; Do the single value case. ;; Default the register args (inst mov eax-tn nil-value) @@ -260,7 +267,7 @@ (inst cmp ecx-tn (fixnumize i)) (inst jmp :be default-lab) - (loadw edx-tn ebx-tn (- (1+ i))) + (loadw edx-tn ebx-tn (frame-word-offset i)) (inst mov tn edx-tn))) (emit-label defaulting-done) @@ -286,7 +293,7 @@ (count-okay (gen-label))) (note-this-location vop :unknown-return) ;; Branch off to the MV case. - (inst jmp-short regs-defaulted) + (inst jmp :c regs-defaulted) ;; Default the register args, and set up the stack as if we ;; entered the MV return point. @@ -299,7 +306,7 @@ (emit-label no-stack-args) (inst lea edi-tn (make-ea :dword :base ebp-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Load EAX with NIL so we can quickly store it, and set up ;; stuff for the loop. (inst mov eax-tn nil-value) @@ -312,7 +319,7 @@ ;; and then default the remaining stack arguments. (emit-label regs-defaulted) ;; Save EDI. - (storew edi-tn ebx-tn (- (1+ 1))) + (storew edi-tn ebx-tn (frame-word-offset 1)) ;; Compute the number of stack arguments, and if it's zero or ;; less, don't copy any stack arguments. (inst sub ecx-tn (fixnumize register-arg-count)) @@ -328,19 +335,19 @@ ;; Compute a pointer to where the stack args go. (inst lea edi-tn (make-ea :dword :base ebp-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Save ESI, and compute a pointer to where the args come from. - (storew esi-tn ebx-tn (- (1+ 2))) + (storew esi-tn ebx-tn (frame-word-offset 2)) (inst lea esi-tn (make-ea :dword :base ebx-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Do the copy. (inst shr ecx-tn word-shift) ; make word count (inst std) (inst rep) (inst movs :dword) ;; Restore ESI. - (loadw esi-tn ebx-tn (- (1+ 2))) + (loadw esi-tn ebx-tn (frame-word-offset 2)) ;; Now we have to default the remaining args. Find out how many. (inst sub eax-tn (fixnumize (- nvals register-arg-count))) (inst neg eax-tn) @@ -356,8 +363,9 @@ (inst stos eax-tn) ;; Restore EDI, and reset the stack. (emit-label restore-edi) - (loadw edi-tn ebx-tn (- (1+ 1))) - (inst mov esp-tn ebx-tn)))) + (loadw edi-tn ebx-tn (frame-word-offset 1)) + (inst mov esp-tn ebx-tn) + (inst cld)))) (values)) ;;;; unknown values receiving @@ -383,7 +391,7 @@ (declare (type tn args nargs start count)) (let ((variable-values (gen-label)) (done (gen-label))) - (inst jmp-short variable-values) + (inst jmp :c variable-values) (cond ((location= start (first *register-arg-tns*)) (inst push (first *register-arg-tns*)) @@ -466,7 +474,7 @@ #+nil (format t "*call-local: ret-tn on stack; offset=~S~%" (tn-offset ret-tn)) (storew (make-fixup nil :code-object return) - ebp-tn (- (1+ (tn-offset ret-tn))))) + ebp-tn (frame-word-offset (tn-offset ret-tn)))) ((sap-reg) (inst lea ret-tn (make-fixup nil :code-object return))))) @@ -505,7 +513,7 @@ (tn-offset ret-tn)) ;; Stack (storew (make-fixup nil :code-object return) - ebp-tn (- (1+ (tn-offset ret-tn))))) + ebp-tn (frame-word-offset (tn-offset ret-tn)))) ((sap-reg) ;; Register (inst lea ret-tn (make-fixup nil :code-object return))))) @@ -553,7 +561,7 @@ (tn-offset ret-tn)) ;; Stack (storew (make-fixup nil :code-object return) - ebp-tn (- (1+ (tn-offset ret-tn))))) + ebp-tn (frame-word-offset (tn-offset ret-tn)))) ((sap-reg) ;; Register (inst lea ret-tn (make-fixup nil :code-object return))))) @@ -638,8 +646,7 @@ (cond ((zerop (tn-offset old-fp)) ;; Zot all of the stack except for the old-fp. (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (- (* (1+ ocfp-save-offset) - n-word-bytes)))) + :disp (frame-byte-offset ocfp-save-offset))) ;; Restore the old fp from its save location on the stack, ;; and zot the stack. (inst pop ebp-tn)) @@ -667,7 +674,7 @@ ;; Zot all of the stack except for the old-fp and return-pc. (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (- (* (1+ (tn-offset return-pc)) n-word-bytes)))) + :disp (frame-byte-offset (tn-offset return-pc)))) ;; Restore the old fp. old-fp may be either on the stack in its ;; save location or in a register, in either case this restores it. (move ebp-tn old-fp) @@ -746,7 +753,8 @@ (:info ,@(unless (or variable (eq return :tail)) '(arg-locs)) ,@(unless variable '(nargs)) - ,@(when (eq return :fixed) '(nvals))) + ,@(when (eq return :fixed) '(nvals)) + step-instrumenting) (:ignore ,@(unless (or variable (eq return :tail)) '(arg-locs)) @@ -844,12 +852,12 @@ (move old-fp-tmp old-fp) (storew old-fp-tmp ebp-tn - (- (1+ ocfp-save-offset))))) + (frame-word-offset ocfp-save-offset)))) ((any-reg descriptor-reg) (format t "** tail-call old-fp in reg not S0~%") (storew old-fp ebp-tn - (- (1+ ocfp-save-offset))))) + (frame-word-offset ocfp-save-offset)))) ;; For tail call, we have to push the ;; return-pc so that it looks like we CALLed @@ -875,21 +883,25 @@ '(inst sub esp-tn (fixnumize 3))) ;; Save the fp - (storew ebp-tn new-fp (- (1+ ocfp-save-offset))) + (storew ebp-tn new-fp (frame-word-offset ocfp-save-offset)) (move ebp-tn new-fp) ; NB - now on new stack frame. ))) + (when step-instrumenting + (emit-single-step-test) + (inst jmp :eq DONE) + (inst break single-step-around-trap)) + DONE + (note-this-location vop :call-site) (inst ,(if (eq return :tail) 'jmp 'call) - (make-ea :dword :base eax - :disp ,(if named - '(- (* fdefn-raw-addr-slot - n-word-bytes) - other-pointer-lowtag) - '(- (* closure-fun-slot n-word-bytes) - fun-pointer-lowtag)))) + ,(if named + '(make-ea-for-object-slot eax fdefn-raw-addr-slot + other-pointer-lowtag) + '(make-ea-for-object-slot eax closure-fun-slot + fun-pointer-lowtag))) ,@(ecase return (:fixed '((default-unknown-values vop values nvals))) @@ -949,25 +961,67 @@ ;;; ;;; pfw--get wired-tn conflicts sometimes if register sc specd for args ;;; having problems targeting args to regs -- using temps instead. +;;; +;;; First off, modifying the return-pc defeats the branch-prediction +;;; optimizations on modern CPUs quite handily. Second, we can do all +;;; this without needing a temp register. Fixed the latter, at least. +;;; -- AB 2006/Feb/04 (define-vop (return-single) (:args (old-fp) (return-pc) (value)) - (:temporary (:sc unsigned-reg) ofp) - (:temporary (:sc unsigned-reg) ret) (:ignore value) (:generator 6 (trace-table-entry trace-table-fun-epilogue) - (move ret return-pc) - ;; Clear the control stack - (move ofp old-fp) - ;; Adjust the return address for the single value return. - (inst add ret 2) - ;; Restore the frame pointer. - (move esp-tn ebp-tn) - (move ebp-tn ofp) - ;; Out of here. - (inst jmp ret))) + ;; Code structure lifted from known-return. + (sc-case return-pc + ((sap-reg) + ;; return PC in register for some reason (local call?) + ;; we jmp to the return pc after fixing the stack and frame. + (sc-case old-fp + ((control-stack) + ;; ofp on stack must be in slot 0 (the traditional storage place). + ;; Drop the stack above it and pop it off. + (cond ((zerop (tn-offset old-fp)) + (inst lea esp-tn (make-ea :dword :base ebp-tn + :disp (frame-byte-offset ocfp-save-offset))) + (inst pop ebp-tn)) + (t + ;; Should this ever happen, we do the same as above, but + ;; using (tn-offset old-fp) instead of ocfp-save-offset + ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and + ;; then lea esp again against itself with a displacement + ;; of (* (tn-offset old-fp) n-word-bytes) to clear the + ;; rest of the stack. + (cerror "Continue anyway" + "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp))))) + ((any-reg descriptor-reg) + ;; ofp in reg, drop the stack and load the real fp. + (move esp-tn ebp-tn) + (move ebp-tn old-fp))) + + ;; Set single-value-return flag + (inst clc) + ;; And return + (inst jmp return-pc)) + + ((sap-stack) + ;; Note that this will only work right if, when old-fp is on + ;; the stack, it has a lower tn-offset than return-pc. One of + ;; the comments in known-return indicate that this is the case + ;; (in that it will be in its save location), but we may wish + ;; to assert that (in either the weaker or stronger forms). + ;; Should this ever not be the case, we should load old-fp + ;; into a temp reg while we fix the stack. + ;; Drop stack above return-pc + (inst lea esp-tn (make-ea :dword :base ebp-tn + :disp (frame-byte-offset (tn-offset return-pc)))) + ;; Set single-value return flag + (inst clc) + ;; Restore the old frame pointer + (move ebp-tn old-fp) + ;; And return, dropping the rest of the stack as we go. + (inst ret (* (tn-offset return-pc) n-word-bytes)))))) ;;; Do unknown-values return of a fixed (other than 1) number of ;;; values. The VALUES are required to be set up in the standard @@ -1018,6 +1072,8 @@ (inst mov first nil-value) (dolist (tn (cdr arg-tns)) (inst mov tn first)))) + ;; Set multi-value return flag. + (inst stc) ;; And away we go. Except that return-pc is still on the ;; stack and we've changed the stack pointer. So we have to ;; tell it to index off of EBX instead of EBP. @@ -1030,8 +1086,7 @@ (inst ret)) (t (inst jmp (make-ea :dword :base ebx - :disp (- (* (1+ (tn-offset return-pc)) - n-word-bytes)))))) + :disp (frame-byte-offset (tn-offset return-pc)))))) (trace-table-entry trace-table-normal))) @@ -1078,8 +1133,8 @@ (move old-fp-temp old-fp) (move esp-tn ebp-tn) (move ebp-tn old-fp-temp) - ;; Fix the return-pc to point at the single-value entry point. - (inst add eax 2) + ;; Set the single-value return flag. + (inst clc) ;; Out of here. (inst jmp eax) @@ -1170,24 +1225,33 @@ ;; Save edi and esi register args. (inst push edi-tn) (inst push esi-tn) + (inst push ebx-tn) ;; Okay, we have pushed the register args. We can trash them ;; now. - ;; Initialize dst to be end of stack; skiping the values pushed - ;; above. - (inst lea edi-tn (make-ea :dword :base esp-tn :disp 8)) - ;; Initialize src to be end of args. (inst mov esi-tn ebp-tn) (inst sub esi-tn ebx-tn) - (inst shr ecx-tn word-shift) ; make word count - ;; And copy the args. - (inst cld) ; auto-inc ESI and EDI. - (inst rep) - (inst movs :dword) + ;; We need to copy from downwards up to avoid overwriting some of + ;; the yet uncopied args. So we need to use EBX as the copy index + ;; and ECX as the loop counter, rather than using ECX for both. + (inst xor ebx-tn ebx-tn) + + ;; We used to use REP MOVS here, but on modern x86 it performs + ;; much worse than an explicit loop for small blocks. + COPY-LOOP + (inst mov edi-tn (make-ea :dword :base esi-tn :index ebx-tn)) + ;; The :DISP is to account for the registers saved on the stack + (inst mov (make-ea :dword :base esp-tn :disp (* 3 n-word-bytes) + :index ebx-tn) + edi-tn) + (inst add ebx-tn n-word-bytes) + (inst sub ecx-tn n-word-bytes) + (inst jmp :nz COPY-LOOP) ;; So now we need to restore EDI and ESI. + (inst pop ebx-tn) (inst pop esi-tn) (inst pop edi-tn) @@ -1229,35 +1293,38 @@ DONE)) -;;; &MORE args are stored contiguously on the stack, starting -;;; immediately at the context pointer. The context pointer is not -;;; typed, so the lowtag is 0. -(define-vop (more-arg) - (:translate %more-arg) +(define-vop (more-kw-arg) + (:translate sb!c::%more-kw-arg) (:policy :fast-safe) - (:args (object :scs (descriptor-reg) :to :result) - (index :scs (any-reg) :target temp)) + (:args (object :scs (descriptor-reg) :to (:result 1)) + (index :scs (any-reg immediate) :to (:result 1) :target keyword)) (:arg-types * tagged-num) - (:temporary (:sc unsigned-reg :from (:argument 1) :to :result) temp) - (:results (value :scs (any-reg descriptor-reg))) - (:result-types *) - (:generator 5 - (move temp index) - (inst neg temp) - (inst mov value (make-ea :dword :base object :index temp)))) + (:results (value :scs (descriptor-reg any-reg)) + (keyword :scs (descriptor-reg any-reg))) + (:result-types * *) + (:generator 4 + (sc-case index + (immediate + (inst mov value (make-ea :dword :base object :disp (tn-value index))) + (inst mov keyword (make-ea :dword :base object + :disp (+ (tn-value index) n-word-bytes)))) + (t + (inst mov value (make-ea :dword :base object :index index)) + (inst mov keyword (make-ea :dword :base object :index index + :disp n-word-bytes)))))) -(define-vop (more-arg-c) - (:translate %more-arg) +(define-vop (more-arg) + (:translate sb!c::%more-arg) (:policy :fast-safe) - (:args (object :scs (descriptor-reg))) - (:info index) - (:arg-types * (:constant (signed-byte 30))) - (:results (value :scs (any-reg descriptor-reg))) + (:args (object :scs (descriptor-reg) :to (:result 1)) + (index :scs (any-reg) :to (:result 1) :target value)) + (:arg-types * tagged-num) + (:results (value :scs (descriptor-reg any-reg))) (:result-types *) (:generator 4 - (inst mov value - (make-ea :dword :base object :disp (- (* index n-word-bytes)))))) - + (move value index) + (inst neg value) + (inst mov value (make-ea :dword :base object :index value)))) ;;; Turn more arg (context, count) into a list. (defoptimizer (%listify-rest-args stack-allocate-result) ((&rest args)) @@ -1285,12 +1352,10 @@ ;; Check to see whether there are no args, and just return NIL if so. (inst mov result nil-value) (inst jecxz done) - (inst lea dst (make-ea :dword :index ecx :scale 2)) + (inst lea dst (make-ea :dword :base ecx :index ecx)) (maybe-pseudo-atomic stack-allocate-p (allocation dst dst node stack-allocate-p) (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) - ;; Convert the count into a raw value, so that we can use the - ;; LOOP instruction. (inst shr ecx 2) ;; Set decrement mode (successive args at lower addresses) (inst std) @@ -1309,9 +1374,11 @@ (inst lods eax) (storew eax dst 0 list-pointer-lowtag) ;; Go back for more. - (inst loop loop) + (inst sub ecx 1) + (inst jmp :nz loop) ;; NIL out the last cons. - (storew nil-value dst 1 list-pointer-lowtag)) + (storew nil-value dst 1 list-pointer-lowtag) + (inst cld)) (emit-label done)))) ;;; Return the location and size of the &MORE arg glob created by @@ -1355,7 +1422,7 @@ (:save-p :compute-only) (:generator 3 (let ((err-lab - (generate-error-code vop invalid-arg-count-error nargs))) + (generate-error-code vop 'invalid-arg-count-error nargs))) (if (zerop count) (inst test nargs nargs) ; smaller instruction (inst cmp nargs (fixnumize count))) @@ -1373,7 +1440,7 @@ (:vop-var vop) (:save-p :compute-only) (:generator 1000 - (error-call vop ,error ,@args))))) + (error-call vop ',error ,@args))))) (def arg-count-error invalid-arg-count-error sb!c::%arg-count-error nargs) (def type-check-error object-not-type-error sb!c::%type-check-error @@ -1385,3 +1452,33 @@ (def unknown-key-arg-error unknown-key-arg-error sb!c::%unknown-key-arg-error key) (def nil-fun-returned-error nil-fun-returned-error nil fun)) + +;;; Single-stepping + +(defun emit-single-step-test () + ;; We use different ways of representing whether stepping is on on + ;; +SB-THREAD / -SB-THREAD: on +SB-THREAD, we use a slot in the + ;; thread structure. On -SB-THREAD we use the value of a static + ;; symbol. Things are done this way, since reading a thread-local + ;; slot from a symbol would require an extra register on +SB-THREAD, + ;; and reading a slot from a thread structure would require an extra + ;; register on -SB-THREAD. + #!+sb-thread + (progn + (inst fs-segment-prefix) + (inst cmp (make-ea :dword + :disp (* thread-stepping-slot n-word-bytes)) + nil-value)) + #!-sb-thread + (inst cmp (make-ea-for-symbol-value sb!impl::*stepping*) + nil-value)) + +(define-vop (step-instrument-before-vop) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (emit-single-step-test) + (inst jmp :eq DONE) + (inst break single-step-before-trap) + DONE + (note-this-location vop :step-before-vop)))