(cond
((<= nvals 1)
(note-this-location vop :single-value-return)
- (inst mov rsp-tn rbx-tn))
+ (inst cmov :c rsp-tn rbx-tn))
((<= nvals register-arg-count)
(let ((regs-defaulted (gen-label)))
(note-this-location vop :unknown-return)
- (inst nop)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Default the unsupplied registers.
(let* ((2nd-tn-ref (tn-ref-across values))
(2nd-tn (tn-ref-tn 2nd-tn-ref)))
(default-stack-slots (gen-label)))
(note-this-location vop :unknown-return)
;; Branch off to the MV case.
- (inst nop)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Do the single value case.
;; Default the register args
(inst mov rax-tn nil-value)
(count-okay (gen-label)))
(note-this-location vop :unknown-return)
;; Branch off to the MV case.
- (inst nop)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Default the register args, and set up the stack as if we
;; entered the MV return point.
;; Restore EDI, and reset the stack.
(emit-label restore-edi)
(loadw rdi-tn rbx-tn (- (1+ 1)))
- (inst mov rsp-tn rbx-tn))))
+ (inst mov rsp-tn rbx-tn)
+ (inst cld))))
(values))
\f
;;;; unknown values receiving
(declare (type tn args nargs start count))
(let ((variable-values (gen-label))
(done (gen-label)))
- (inst nop)
- (inst jmp-short variable-values)
+ (inst jmp :c variable-values)
(cond ((location= start (first *register-arg-tns*))
(inst push (first *register-arg-tns*))
(:info
,@(unless (or variable (eq return :tail)) '(arg-locs))
,@(unless variable '(nargs))
- ,@(when (eq return :fixed) '(nvals)))
+ ,@(when (eq return :fixed) '(nvals))
+ step-instrumenting)
(:ignore
,@(unless (or variable (eq return :tail)) '(arg-locs))
do (noise `(loadw ,name new-fp ,index)))
(noise))
'((if (zerop nargs)
- (inst xor rcx rcx)
+ (zeroize rcx)
(inst mov rcx (fixnumize nargs)))))
,@(cond ((eq return :tail)
'(;; Python has figured out what frame we should
(move rbp-tn new-fp) ; NB - now on new stack frame.
)))
+ (when step-instrumenting
+ (emit-single-step-test)
+ (inst jmp :eq DONE)
+ (inst break single-step-around-trap))
+ DONE
+
(note-this-location vop :call-site)
(inst ,(if (eq return :tail) 'jmp 'call)
(:args (old-fp)
(return-pc)
(value))
- (:temporary (:sc unsigned-reg) ofp)
- (:temporary (:sc unsigned-reg) ret)
(:ignore value)
(:generator 6
(trace-table-entry trace-table-fun-epilogue)
- (move ret return-pc)
- ;; Clear the control stack
- (move ofp old-fp)
- ;; Adjust the return address for the single value return.
- (inst add ret 3)
- ;; Restore the frame pointer.
- (move rsp-tn rbp-tn)
- (move rbp-tn ofp)
- ;; Out of here.
- (inst jmp ret)))
+ ;; Code structure lifted from known-return.
+ (sc-case return-pc
+ ((sap-reg)
+ ;; return PC in register for some reason (local call?)
+ ;; we jmp to the return pc after fixing the stack and frame.
+ (sc-case old-fp
+ ((control-stack)
+ ;; ofp on stack must be in slot 0 (the traditional storage place).
+ ;; Drop the stack above it and pop it off.
+ (cond ((zerop (tn-offset old-fp))
+ (inst lea rsp-tn (make-ea :dword :base rbp-tn
+ :disp (- (* (1+ ocfp-save-offset)
+ n-word-bytes))))
+ (inst pop rbp-tn))
+ (t
+ ;; Should this ever happen, we do the same as above, but
+ ;; using (tn-offset old-fp) instead of ocfp-save-offset
+ ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and
+ ;; then lea rsp again against itself with a displacement
+ ;; of (* (tn-offset old-fp) n-word-bytes) to clear the
+ ;; rest of the stack.
+ (cerror "Continue anyway"
+ "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp)))))
+ ((any-reg descriptor-reg)
+ ;; ofp in reg, drop the stack and load the real fp.
+ (move rsp-tn rbp-tn)
+ (move rbp-tn old-fp)))
+
+ ;; Set single-value-return flag
+ (inst clc)
+ ;; And return
+ (inst jmp return-pc))
+
+ ((sap-stack)
+ ;; Note that this will only work right if, when old-fp is on
+ ;; the stack, it has a lower tn-offset than return-pc. One of
+ ;; the comments in known-return indicate that this is the case
+ ;; (in that it will be in its save location), but we may wish
+ ;; to assert that (in either the weaker or stronger forms).
+ ;; Should this ever not be the case, we should load old-fp
+ ;; into a temp reg while we fix the stack.
+ ;; Drop stack above return-pc
+ (inst lea rsp-tn (make-ea :dword :base rbp-tn
+ :disp (- (* (1+ (tn-offset return-pc))
+ n-word-bytes))))
+ ;; Set single-value return flag
+ (inst clc)
+ ;; Restore the old frame pointer
+ (move rbp-tn old-fp)
+ ;; And return, dropping the rest of the stack as we go.
+ (inst ret (* (tn-offset return-pc) n-word-bytes))))))
;;; Do unknown-values return of a fixed (other than 1) number of
;;; values. The VALUES are required to be set up in the standard
;; Establish the values pointer and values count.
(move rbx rbp-tn)
(if (zerop nvals)
- (inst xor rcx rcx) ; smaller
+ (zeroize rcx) ; smaller
(inst mov rcx (fixnumize nvals)))
;; Restore the frame pointer.
(move rbp-tn old-fp)
(inst mov first nil-value)
(dolist (tn (cdr arg-tns))
(inst mov tn first))))
+ ;; Set the multiple value return flag.
+ (inst stc)
;; And away we go. Except that return-pc is still on the
;; stack and we've changed the stack pointer. So we have to
;; tell it to index off of RBX instead of RBP.
(move old-fp-temp old-fp)
(move rsp-tn rbp-tn)
(move rbp-tn old-fp-temp)
- ;; Fix the return-pc to point at the single-value entry point.
- (inst add rax 3) ; skip "mov %rbx,%rsp" insn in caller
+ ;; clear the multiple-value return flag
+ (inst clc)
;; Out of here.
(inst jmp rax)
;;; Copy a &MORE arg from the argument area to the end of the current
;;; frame. FIXED is the number of non-&MORE arguments.
-;;;
-;;; The tricky part is doing this without trashing any of the calling
-;;; convention registers that are still needed. This vop is emitted
-;;; directly after the xep-allocate frame. That means the registers
-;;; are in use as follows:
-;;;
-;;; RAX -- The lexenv.
-;;; RBX -- Available.
-;;; RCX -- The total number of arguments.
-;;; RDX -- The first arg.
-;;; RDI -- The second arg.
-;;; RSI -- The third arg.
-;;;
-;;; So basically, we have one register available for our use: RBX.
-;;;
-;;; What we can do is push the other regs onto the stack, and then
-;;; restore their values by looking directly below where we put the
-;;; more-args.
(define-vop (copy-more-arg)
+ (:temporary (:sc any-reg :offset r8-offset) copy-index)
+ (:temporary (:sc any-reg :offset r9-offset) source)
+ (:temporary (:sc descriptor-reg :offset r10-offset) temp)
(:info fixed)
(:generator 20
;; Avoid the copy if there are no more args.
(cond ((zerop fixed)
- (inst jecxz JUST-ALLOC-FRAME))
+ (inst jrcxz JUST-ALLOC-FRAME))
(t
(inst cmp rcx-tn (fixnumize fixed))
(inst jmp :be JUST-ALLOC-FRAME)))
;; Number to copy = nargs-fixed
(inst sub rcx-tn (fixnumize fixed))))
- ;; Save rdi and rsi register args.
- (inst push rdi-tn)
- (inst push rsi-tn)
- ;; Okay, we have pushed the register args. We can trash them
- ;; now.
-
- ;; Initialize dst to be end of stack; skiping the values pushed
- ;; above.
- (inst lea rdi-tn (make-ea :qword :base rsp-tn :disp 16))
+ ;; Initialize R8 to be the end of args.
+ (inst mov source rbp-tn)
+ (inst sub source rbx-tn)
- ;; Initialize src to be end of args.
- (inst mov rsi-tn rbp-tn)
- (inst sub rsi-tn rbx-tn)
+ ;; We need to copy from downwards up to avoid overwriting some of
+ ;; the yet uncopied args. So we need to use R9 as the copy index
+ ;; and RCX as the loop counter, rather than using RCX for both.
+ (zeroize copy-index)
- (inst shr rcx-tn word-shift) ; make word count
- ;; And copy the args.
- (inst cld) ; auto-inc RSI and RDI.
- (inst rep)
- (inst movs :qword)
-
- ;; So now we need to restore RDI and RSI.
- (inst pop rsi-tn)
- (inst pop rdi-tn)
+ ;; We used to use REP MOVS here, but on modern x86 it performs
+ ;; much worse than an explicit loop for small blocks.
+ COPY-LOOP
+ (inst mov temp (make-ea :qword :base source :index copy-index))
+ (inst mov (make-ea :qword :base rsp-tn :index copy-index) temp)
+ (inst add copy-index n-word-bytes)
+ (inst sub rcx-tn n-word-bytes)
+ (inst jmp :nz COPY-LOOP)
DO-REGS
;; Here: nargs>=1 && nargs>fixed
(when (< fixed register-arg-count)
- ;; Now we have to deposit any more args that showed up in
- ;; registers.
- (do ((i fixed))
- ( nil )
- ;; Store it relative to rbp
- (inst mov (make-ea :qword :base rbp-tn
- :disp (- (* n-word-bytes
- (+ 1 (- i fixed)
- (max 3 (sb-allocated-size 'stack))))))
- (nth i *register-arg-tns*))
-
- (incf i)
- (when (>= i register-arg-count)
- (return))
-
- ;; Don't deposit any more than there are.
- (if (zerop i)
- (inst test rcx-tn rcx-tn)
- (inst cmp rcx-tn (fixnumize i)))
- (inst jmp :eq DONE)))
+ ;; Now we have to deposit any more args that showed up in
+ ;; registers.
+ (do ((i fixed))
+ ( nil )
+ ;; Store it relative to rbp
+ (inst mov (make-ea :qword :base rbp-tn
+ :disp (- (* n-word-bytes
+ (+ 1 (- i fixed)
+ (max 3 (sb-allocated-size 'stack))))))
+ (nth i *register-arg-tns*))
+
+ (incf i)
+ (when (>= i register-arg-count)
+ (return))
+
+ ;; Don't deposit any more than there are.
+ (if (zerop i)
+ (inst test rcx-tn rcx-tn)
+ (inst cmp rcx-tn (fixnumize i)))
+ (inst jmp :eq DONE)))
(inst jmp DONE)
DONE))
-;;; &MORE args are stored contiguously on the stack, starting
-;;; immediately at the context pointer. The context pointer is not
-;;; typed, so the lowtag is 0.
-(define-vop (more-arg)
- (:translate %more-arg)
+(define-vop (more-kw-arg)
+ (:translate sb!c::%more-kw-arg)
(:policy :fast-safe)
- (:args (object :scs (descriptor-reg) :to :result)
- (index :scs (any-reg) :target temp))
+ (:args (object :scs (descriptor-reg) :to (:result 1))
+ (index :scs (any-reg) :to (:result 1) :target keyword))
(:arg-types * tagged-num)
- (:temporary (:sc unsigned-reg :from (:argument 1) :to :result) temp)
- (:results (value :scs (any-reg descriptor-reg)))
- (:result-types *)
- (:generator 5
- (move temp index)
- (inst neg temp)
- (inst mov value (make-ea :qword :base object :index temp))))
+ (:results (value :scs (descriptor-reg any-reg))
+ (keyword :scs (descriptor-reg any-reg)))
+ (:result-types * *)
+ (:generator 4
+ (inst mov value (make-ea :qword :base object :index index))
+ (inst mov keyword (make-ea :qword :base object :index index
+ :disp n-word-bytes))))
-(define-vop (more-arg-c)
- (:translate %more-arg)
+(define-vop (more-arg)
+ (:translate sb!c::%more-arg)
(:policy :fast-safe)
- (:args (object :scs (descriptor-reg)))
- (:info index)
- (:arg-types * (:constant (signed-byte 30)))
- (:results (value :scs (any-reg descriptor-reg)))
+ (:args (object :scs (descriptor-reg) :to (:result 1))
+ (index :scs (any-reg) :to (:result 1) :target value))
+ (:arg-types * tagged-num)
+ (:results (value :scs (descriptor-reg any-reg)))
(:result-types *)
(:generator 4
- (inst mov value
- (make-ea :qword :base object :disp (- (* index n-word-bytes))))))
+ (move value index)
+ (inst neg value)
+ (inst mov value (make-ea :qword :base object :index value))))
;;; Turn more arg (context, count) into a list.
(defoptimizer (%listify-rest-args stack-allocate-result) ((&rest args))
(move rcx count)
;; Check to see whether there are no args, and just return NIL if so.
(inst mov result nil-value)
- (inst jecxz done)
- (inst lea dst (make-ea :qword :index rcx :scale 2))
+ (inst jrcxz done)
+ (inst lea dst (make-ea :qword :base rcx :index rcx))
(maybe-pseudo-atomic stack-allocate-p
(allocation dst dst node stack-allocate-p)
(inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag))
- ;; Convert the count into a raw value, so that we can use the
- ;; LOOP instruction.
(inst shr rcx (1- n-lowtag-bits))
;; Set decrement mode (successive args at lower addresses)
(inst std)
(inst lods rax)
(storew rax dst 0 list-pointer-lowtag)
;; Go back for more.
- (inst loop loop)
+ (inst sub rcx 1)
+ (inst jmp :nz loop)
;; NIL out the last cons.
- (storew nil-value dst 1 list-pointer-lowtag))
+ (storew nil-value dst 1 list-pointer-lowtag)
+ (inst cld))
(emit-label done))))
;;; Return the location and size of the &MORE arg glob created by
(:save-p :compute-only)
(:generator 3
(let ((err-lab
- (generate-error-code vop invalid-arg-count-error nargs)))
+ (generate-error-code vop 'invalid-arg-count-error nargs)))
(if (zerop count)
(inst test nargs nargs) ; smaller instruction
(inst cmp nargs (fixnumize count)))
(:vop-var vop)
(:save-p :compute-only)
(:generator 1000
- (error-call vop ,error ,@args)))))
+ (error-call vop ',error ,@args)))))
(def arg-count-error invalid-arg-count-error
sb!c::%arg-count-error nargs)
(def type-check-error object-not-type-error sb!c::%type-check-error
(def unknown-key-arg-error unknown-key-arg-error
sb!c::%unknown-key-arg-error key)
(def nil-fun-returned-error nil-fun-returned-error nil fun))
+
+;;; Single-stepping
+
+(defun emit-single-step-test ()
+ ;; We use different ways of representing whether stepping is on on
+ ;; +SB-THREAD / -SB-THREAD: on +SB-THREAD, we use a slot in the
+ ;; thread structure. On -SB-THREAD we use the value of a static
+ ;; symbol. Things are done this way, since reading a thread-local
+ ;; slot from a symbol would require an extra register on +SB-THREAD,
+ ;; and reading a slot from a thread structure would require an extra
+ ;; register on -SB-THREAD. While this isn't critical for x86-64,
+ ;; it's more serious for x86.
+ #!+sb-thread
+ (inst cmp (make-ea :qword
+ :base thread-base-tn
+ :disp (* thread-stepping-slot n-word-bytes))
+ nil-value)
+ #!-sb-thread
+ (inst cmp (make-ea :qword
+ :disp (+ nil-value (static-symbol-offset
+ 'sb!impl::*stepping*)
+ (* symbol-value-slot n-word-bytes)
+ (- other-pointer-lowtag)))
+ nil-value))
+
+(define-vop (step-instrument-before-vop)
+ (:policy :fast-safe)
+ (:vop-var vop)
+ (:generator 3
+ (emit-single-step-test)
+ (inst jmp :eq DONE)
+ (inst break single-step-before-trap)
+ DONE
+ (note-this-location vop :step-before-vop)))