* optimization: calling functions via constant symbols -- as in
(FUNCALL 'FOO) -- is now roughly as efficient as calling them
via the function object as in (FUNCALL #'FOO).
+ * optimization: on x86 and x86-64, the calling convention for the
+ case of dealing with an unknown number of values has been altered
+ to be friendlier to the prediction heuristics implemented,
+ particularly with reference to CALL and RET pairing. (thanks to
+ Alastair Bridgewater)
* enhancement: CONSTANTP is now able to determine constantness of
more complex forms, including calls to constant-foldable standardized
functions and some special forms beyond QUOTE.
INLINE-FIXNUM-COMPARE
(inst cmp x y)
- (inst jmp ,test RETURN-TRUE)
(inst mov res nil-value)
- ;; FIXME: A note explaining this return convention, or a
- ;; symbolic name for it, would be nice. (It looks as though we
- ;; should be hand-crafting the same return sequence as would be
- ;; produced by GENERATE-RETURN-SEQUENCE, but in that case it's
- ;; not clear why we don't just jump to the end of this function
- ;; to share the return sequence there.
- (inst pop eax)
- (inst add eax 3)
- (inst jmp eax)
-
+ (inst jmp ,test RETURN-FALSE)
RETURN-TRUE
- (load-symbol res t))))
+ (load-symbol res t)
+ RETURN-FALSE
+ DONE)))
- (define-cond-assem-rtn generic-< < two-arg-< :l)
- (define-cond-assem-rtn generic-> > two-arg-> :g))
+ (define-cond-assem-rtn generic-< < two-arg-< :ge)
+ (define-cond-assem-rtn generic-> > two-arg-> :le))
(define-assembly-routine (generic-eql
(:cost 10)
RETURN-NIL
(inst mov res nil-value)
- (inst pop eax)
- (inst add eax 3)
- (inst jmp eax)
+ (inst jmp DONE)
DO-STATIC-FN
(inst pop eax)
RETURN-T
(load-symbol res t)
- ;; FIXME: I don't understand how we return from here..
- )
+ DONE)
(define-assembly-routine (generic-=
(:cost 10)
(inst jmp :e RETURN-T) ; ok
(inst mov res nil-value)
- (inst pop eax)
- (inst add eax 3)
- (inst jmp eax)
+ (inst jmp DONE)
DO-STATIC-FN
(inst pop eax)
:disp (+ nil-value (static-fun-offset 'two-arg-=))))
RETURN-T
- (load-symbol res t))
+ (load-symbol res t)
+ DONE)
(loadw esi ebx -3)
;; And back we go.
+ (inst stc)
(inst jmp eax)
;; Handle the register arg cases.
(inst mov edx nil-value)
(inst mov edi edx)
(inst mov esi edx)
+ (inst stc)
(inst jmp eax)
ONE-VALUE ; Note: we can get this, because the return-multiple vop
; doesn't check for this case when size > speed.
(loadw edx esi -1)
(inst mov rsp-tn ebx)
- (inst add eax 3)
+ (inst clc)
(inst jmp eax)
TWO-VALUES
(loadw edi esi -2)
(inst mov esi nil-value)
(inst lea rsp-tn (make-ea :qword :base ebx :disp (* -2 n-word-bytes)))
+ (inst stc)
(inst jmp eax)
THREE-VALUES
(loadw edi esi -2)
(loadw esi esi -3)
(inst lea rsp-tn (make-ea :qword :base ebx :disp (* -3 n-word-bytes)))
+ (inst stc)
(inst jmp eax))
\f
;;;; TAIL-CALL-VARIABLE
(make-ea :qword :disp (make-fixup ',name :assembly-routine)))
(inst call temp-reg-tn)
(note-this-location ,vop :single-value-return)
- (move rsp-tn rbx-tn))
+ (inst jmp :nc single-value)
+ (move rsp-tn rbx-tn)
+ single-value)
'((:save-p :compute-only))))
(:none
(values
(:raw
`(inst ret))
(:full-call
- `(
- (inst pop rax-tn)
-
- (inst add rax-tn 3)
- (inst jmp rax-tn)))
+ `((inst clc)
+ (inst ret)))
(:none)))
INLINE-FIXNUM-COMPARE
(inst cmp x y)
- (inst jmp ,test RETURN-TRUE)
(inst mov res nil-value)
- ;; FIXME: A note explaining this return convention, or a
- ;; symbolic name for it, would be nice. (It looks as though we
- ;; should be hand-crafting the same return sequence as would be
- ;; produced by GENERATE-RETURN-SEQUENCE, but in that case it's
- ;; not clear why we don't just jump to the end of this function
- ;; to share the return sequence there.
- (inst pop eax)
- (inst add eax 2)
- (inst jmp eax)
+ (inst jmp ,test RETURN-FALSE)
+
+ (load-symbol res t)
- RETURN-TRUE
- (load-symbol res t))))
+ RETURN-FALSE
+ DONE)))
- (define-cond-assem-rtn generic-< < two-arg-< :l)
- (define-cond-assem-rtn generic-> > two-arg-> :g))
+ (define-cond-assem-rtn generic-< < two-arg-< :ge)
+ (define-cond-assem-rtn generic-> > two-arg-> :le))
(define-assembly-routine (generic-eql
(:cost 10)
RETURN-NIL
(inst mov res nil-value)
- (inst pop eax)
- (inst add eax 2)
- (inst jmp eax)
+ (inst jmp DONE)
DO-STATIC-FN
(inst pop eax)
RETURN-T
(load-symbol res t)
- ;; FIXME: I don't understand how we return from here..
- )
+
+ DONE)
(define-assembly-routine (generic-=
(:cost 10)
(inst jmp :e RETURN-T) ; ok
(inst mov res nil-value)
- (inst pop eax)
- (inst add eax 2)
- (inst jmp eax)
+ (inst jmp DONE)
DO-STATIC-FN
(inst pop eax)
:disp (+ nil-value (static-fun-offset 'two-arg-=))))
RETURN-T
- (load-symbol res t))
+ (load-symbol res t)
+
+ DONE)
\f
;;; Support for the Mersenne Twister, MT19937, random number generator
(loadw esi ebx -3)
;; And back we go.
+ (inst stc)
(inst jmp eax)
;; Handle the register arg cases.
(inst mov edx nil-value)
(inst mov edi edx)
(inst mov esi edx)
+ (inst stc)
(inst jmp eax)
ONE-VALUE ; Note: we can get this, because the return-multiple vop
; doesn't check for this case when size > speed.
(loadw edx esi -1)
(inst mov esp-tn ebx)
- (inst add eax 2)
+ (inst clc)
(inst jmp eax)
TWO-VALUES
(loadw edi esi -2)
(inst mov esi nil-value)
(inst lea esp-tn (make-ea :dword :base ebx :disp (* -2 n-word-bytes)))
+ (inst stc)
(inst jmp eax)
THREE-VALUES
(loadw edi esi -2)
(loadw esi esi -3)
(inst lea esp-tn (make-ea :dword :base ebx :disp (* -3 n-word-bytes)))
+ (inst stc)
(inst jmp eax))
\f
;;;; TAIL-CALL-VARIABLE
(in-package "SB!VM")
+;;; The :full-call assembly-routines must use the same full-call
+;;; unknown-values return convention as a normal call, as some
+;;; of the routines will tail-chain to a static-function. The
+;;; routines themselves, however, take all of their arguments
+;;; in registers (this will typically be one or two arguments,
+;;; and is one of the lower bounds on the number of argument-
+;;; passing registers), and thus don't need a call frame, which
+;;; simplifies things for the normal call/return case. When it
+;;; is neccessary for one of the assembly-functions to call a
+;;; static-function it will construct the required call frame.
+;;; Also, none of the assembly-routines return other than one
+;;; value, which again simplifies the return path.
+;;; -- AB, 2006/Feb/05.
+
(!def-vm-support-routine generate-call-sequence (name style vop)
(ecase style
((:raw :none)
`((note-this-location ,vop :call-site)
(inst call (make-fixup ',name :assembly-routine))
(note-this-location ,vop :single-value-return)
- (move esp-tn ebx-tn))
+ (inst jmp :nc single-value)
+ (move esp-tn ebx-tn)
+ single-value)
'((:save-p :compute-only))))))
(!def-vm-support-routine generate-return-sequence (style)
(:raw
`(inst ret))
(:full-call
- `(
- (inst pop eax-tn)
-
- (inst add eax-tn 2)
- (inst jmp eax-tn)))
+ `((inst clc)
+ (inst ret)))
(:none)))
(macrolet ((define-fasl-format-features ()
(let (;; master value for *F-P-A-F-F*
- (fpaff '(:sb-thread :sb-package-locks :sb-unicode)))
+ (fpaff '(:sb-thread :sb-package-locks :sb-unicode :gencgc)))
`(progn
;; a list of *(SHEBANG-)FEATURES* flags which affect
;; binary compatibility, i.e. which must be the same
;;; versions which break binary compatibility. But it certainly should
;;; be incremented for release versions which break binary
;;; compatibility.
-(def!constant +fasl-file-version+ 63)
+(def!constant +fasl-file-version+ 64)
;;; (record of versions before 2003 deleted in 2003-04-26/0.pre8.107 or so)
;;; 38: (2003-01-05) changed names of internal SORT machinery
;;; 39: (2003-02-20) in 0.7.12.1 a slot was added to
;;; a 32-bit value also on 64-bit platforms.
;;; 63: (2006-01-27) Shuffle storage classes around to reduce the error
;;; trap information size on RISCy platforms.
+;;; 64: (2006-03-24) New calling convention for unknown-values on x86 and
+;;; x86-64. Also (belatedly) PPC/gencgc, including :gencgc on FPAFF.
;;; the conventional file extension for our fasl files
(declaim (type simple-string *fasl-file-type*))
(cond
((<= nvals 1)
(note-this-location vop :single-value-return)
- (inst mov rsp-tn rbx-tn))
+ (let ((single-value (gen-label)))
+ (inst jmp :nc single-value)
+ (inst mov rsp-tn rbx-tn)
+ (emit-label single-value)))
((<= nvals register-arg-count)
(let ((regs-defaulted (gen-label)))
(note-this-location vop :unknown-return)
- (inst nop)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Default the unsupplied registers.
(let* ((2nd-tn-ref (tn-ref-across values))
(2nd-tn (tn-ref-tn 2nd-tn-ref)))
(default-stack-slots (gen-label)))
(note-this-location vop :unknown-return)
;; Branch off to the MV case.
- (inst nop)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Do the single value case.
;; Default the register args
(inst mov rax-tn nil-value)
(declare (type tn args nargs start count))
(let ((variable-values (gen-label))
(done (gen-label)))
- (inst nop)
- (inst jmp-short variable-values)
+ (inst jmp :c variable-values)
(cond ((location= start (first *register-arg-tns*))
(inst push (first *register-arg-tns*))
(:args (old-fp)
(return-pc)
(value))
- (:temporary (:sc unsigned-reg) ofp)
- (:temporary (:sc unsigned-reg) ret)
(:ignore value)
(:generator 6
(trace-table-entry trace-table-fun-epilogue)
- (move ret return-pc)
- ;; Clear the control stack
- (move ofp old-fp)
- ;; Adjust the return address for the single value return.
- (inst add ret 3)
- ;; Restore the frame pointer.
- (move rsp-tn rbp-tn)
- (move rbp-tn ofp)
- ;; Out of here.
- (inst jmp ret)))
+ ;; Code structure lifted from known-return.
+ (sc-case return-pc
+ ((sap-reg)
+ ;; return PC in register for some reason (local call?)
+ ;; we jmp to the return pc after fixing the stack and frame.
+ (sc-case old-fp
+ ((control-stack)
+ ;; ofp on stack must be in slot 0 (the traditional storage place).
+ ;; Drop the stack above it and pop it off.
+ (cond ((zerop (tn-offset old-fp))
+ (inst lea rsp-tn (make-ea :dword :base rbp-tn
+ :disp (- (* (1+ ocfp-save-offset)
+ n-word-bytes))))
+ (inst pop rbp-tn))
+ (t
+ ;; Should this ever happen, we do the same as above, but
+ ;; using (tn-offset old-fp) instead of ocfp-save-offset
+ ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and
+ ;; then lea rsp again against itself with a displacement
+ ;; of (* (tn-offset old-fp) n-word-bytes) to clear the
+ ;; rest of the stack.
+ (cerror "Continue anyway"
+ "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp)))))
+ ((any-reg descriptor-reg)
+ ;; ofp in reg, drop the stack and load the real fp.
+ (move rsp-tn rbp-tn)
+ (move rbp-tn old-fp)))
+
+ ;; Set single-value-return flag
+ (inst clc)
+ ;; And return
+ (inst jmp return-pc))
+
+ ((sap-stack)
+ ;; Note that this will only work right if, when old-fp is on
+ ;; the stack, it has a lower tn-offset than return-pc. One of
+ ;; the comments in known-return indicate that this is the case
+ ;; (in that it will be in its save location), but we may wish
+ ;; to assert that (in either the weaker or stronger forms).
+ ;; Should this ever not be the case, we should load old-fp
+ ;; into a temp reg while we fix the stack.
+ ;; Drop stack above return-pc
+ (inst lea rsp-tn (make-ea :dword :base rbp-tn
+ :disp (- (* (1+ (tn-offset return-pc))
+ n-word-bytes))))
+ ;; Set single-value return flag
+ (inst clc)
+ ;; Restore the old frame pointer
+ (move rbp-tn old-fp)
+ ;; And return, dropping the rest of the stack as we go.
+ (inst ret (* (tn-offset return-pc) n-word-bytes))))))
;;; Do unknown-values return of a fixed (other than 1) number of
;;; values. The VALUES are required to be set up in the standard
(inst mov first nil-value)
(dolist (tn (cdr arg-tns))
(inst mov tn first))))
+ ;; Set the multiple value return flag.
+ (inst stc)
;; And away we go. Except that return-pc is still on the
;; stack and we've changed the stack pointer. So we have to
;; tell it to index off of RBX instead of RBP.
(move old-fp-temp old-fp)
(move rsp-tn rbp-tn)
(move rbp-tn old-fp-temp)
- ;; Fix the return-pc to point at the single-value entry point.
- (inst add rax 3) ; skip "mov %rbx,%rsp" insn in caller
+ ;; clear the multiple-value return flag
+ (inst clc)
;; Out of here.
(inst jmp rax)
(cond
((<= nvals 1)
(note-this-location vop :single-value-return)
- (inst mov esp-tn ebx-tn))
+ (let ((single-value (gen-label)))
+ (inst jmp :nc single-value)
+ (inst mov esp-tn ebx-tn)
+ (emit-label single-value)))
((<= nvals register-arg-count)
(let ((regs-defaulted (gen-label)))
(note-this-location vop :unknown-return)
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Default the unsuppled registers.
(let* ((2nd-tn-ref (tn-ref-across values))
(2nd-tn (tn-ref-tn 2nd-tn-ref)))
(default-stack-slots (gen-label)))
(note-this-location vop :unknown-return)
;; Branch off to the MV case.
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Do the single value case.
;; Default the register args
(inst mov eax-tn nil-value)
(count-okay (gen-label)))
(note-this-location vop :unknown-return)
;; Branch off to the MV case.
- (inst jmp-short regs-defaulted)
+ (inst jmp :c regs-defaulted)
;; Default the register args, and set up the stack as if we
;; entered the MV return point.
(declare (type tn args nargs start count))
(let ((variable-values (gen-label))
(done (gen-label)))
- (inst jmp-short variable-values)
+ (inst jmp :c variable-values)
(cond ((location= start (first *register-arg-tns*))
(inst push (first *register-arg-tns*))
;;;
;;; pfw--get wired-tn conflicts sometimes if register sc specd for args
;;; having problems targeting args to regs -- using temps instead.
+;;;
+;;; First off, modifying the return-pc defeats the branch-prediction
+;;; optimizations on modern CPUs quite handily. Second, we can do all
+;;; this without needing a temp register. Fixed the latter, at least.
+;;; -- AB 2006/Feb/04
(define-vop (return-single)
(:args (old-fp)
(return-pc)
(value))
- (:temporary (:sc unsigned-reg) ofp)
- (:temporary (:sc unsigned-reg) ret)
(:ignore value)
(:generator 6
(trace-table-entry trace-table-fun-epilogue)
- (move ret return-pc)
- ;; Clear the control stack
- (move ofp old-fp)
- ;; Adjust the return address for the single value return.
- (inst add ret 2)
- ;; Restore the frame pointer.
- (move esp-tn ebp-tn)
- (move ebp-tn ofp)
- ;; Out of here.
- (inst jmp ret)))
+ ;; Code structure lifted from known-return.
+ (sc-case return-pc
+ ((sap-reg)
+ ;; return PC in register for some reason (local call?)
+ ;; we jmp to the return pc after fixing the stack and frame.
+ (sc-case old-fp
+ ((control-stack)
+ ;; ofp on stack must be in slot 0 (the traditional storage place).
+ ;; Drop the stack above it and pop it off.
+ (cond ((zerop (tn-offset old-fp))
+ (inst lea esp-tn (make-ea :dword :base ebp-tn
+ :disp (- (* (1+ ocfp-save-offset)
+ n-word-bytes))))
+ (inst pop ebp-tn))
+ (t
+ ;; Should this ever happen, we do the same as above, but
+ ;; using (tn-offset old-fp) instead of ocfp-save-offset
+ ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and
+ ;; then lea esp again against itself with a displacement
+ ;; of (* (tn-offset old-fp) n-word-bytes) to clear the
+ ;; rest of the stack.
+ (cerror "Continue anyway"
+ "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp)))))
+ ((any-reg descriptor-reg)
+ ;; ofp in reg, drop the stack and load the real fp.
+ (move esp-tn ebp-tn)
+ (move ebp-tn old-fp)))
+
+ ;; Set single-value-return flag
+ (inst clc)
+ ;; And return
+ (inst jmp return-pc))
+
+ ((sap-stack)
+ ;; Note that this will only work right if, when old-fp is on
+ ;; the stack, it has a lower tn-offset than return-pc. One of
+ ;; the comments in known-return indicate that this is the case
+ ;; (in that it will be in its save location), but we may wish
+ ;; to assert that (in either the weaker or stronger forms).
+ ;; Should this ever not be the case, we should load old-fp
+ ;; into a temp reg while we fix the stack.
+ ;; Drop stack above return-pc
+ (inst lea esp-tn (make-ea :dword :base ebp-tn
+ :disp (- (* (1+ (tn-offset return-pc))
+ n-word-bytes))))
+ ;; Set single-value return flag
+ (inst clc)
+ ;; Restore the old frame pointer
+ (move ebp-tn old-fp)
+ ;; And return, dropping the rest of the stack as we go.
+ (inst ret (* (tn-offset return-pc) n-word-bytes))))))
;;; Do unknown-values return of a fixed (other than 1) number of
;;; values. The VALUES are required to be set up in the standard
(inst mov first nil-value)
(dolist (tn (cdr arg-tns))
(inst mov tn first))))
+ ;; Set multi-value return flag.
+ (inst stc)
;; And away we go. Except that return-pc is still on the
;; stack and we've changed the stack pointer. So we have to
;; tell it to index off of EBX instead of EBP.
(move old-fp-temp old-fp)
(move esp-tn ebp-tn)
(move ebp-tn old-fp-temp)
- ;; Fix the return-pc to point at the single-value entry point.
- (inst add eax 2)
+ ;; Set the single-value return flag.
+ (inst clc)
;; Out of here.
(inst jmp eax)
/* If the function returned multiple values, it will return to
this point. Lose them */
+ jnc LsingleValue
mov %rbx, %rsp
- /* A singled value function returns here */
+LsingleValue:
/* Restore the stack, in case there was a stack change. */
pop %rsp # c-sp
.align align_8byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
- jmp multiple_value_return
- /* the above jmp is only 2 bytes long, we need to add a nop for
- * padding since the single value return convention jumps to original
- * return address + 3 bytes */
- nop
+ jc multiple_value_return
/* Single value return: The eventual return will now use the
multiple values return convention but with a return values
count of one. */
/* If the function returned multiple values, it will return to
this point. Lose them */
+ jnc LsingleValue
mov %ebx, %esp
+LsingleValue:
/* A singled value function returns here */
/* Restore the stack, in case there was a stack change. */
.align align_4byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
- jmp multiple_value_return
+ jc multiple_value_return
/* Single value return: The eventual return will now use the
multiple values return convention but with a return values
count of one. */
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"0.9.10.10"
+"0.9.10.11"