From 2973941cf214a5ea274cd0381a651ce0e1a7fab2 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Tue, 21 Apr 2009 10:24:15 +0000 Subject: [PATCH] 1.0.27.11: swap ocfp and return-pc slots in x86oid call frames Forward port of Alastair Bridgewater's patch. Also, port it to x86-64. Bring x86 and x86-64 sources closer in the process. Plus cleanups, indentation, remove dead code, comments, more checks. --- doc/internals/calling-convention.texinfo | 213 +++++++++++++++--------------- src/assembly/x86-64/arith.lisp | 28 ++-- src/assembly/x86-64/assem-rtns.lisp | 24 ++-- src/assembly/x86/arith.lisp | 29 ++-- src/assembly/x86/assem-rtns.lisp | 38 +++--- src/compiler/x86-64/call.lisp | 194 +++++++++++---------------- src/compiler/x86-64/insts.lisp | 2 +- src/compiler/x86-64/nlx.lisp | 2 +- src/compiler/x86-64/static-fn.lisp | 10 +- src/compiler/x86-64/vm.lisp | 12 +- src/compiler/x86/call.lisp | 200 +++++++++++----------------- src/compiler/x86/insts.lisp | 4 +- src/compiler/x86/nlx.lisp | 2 +- src/compiler/x86/static-fn.lisp | 10 +- src/compiler/x86/vm.lisp | 4 +- src/runtime/backtrace.c | 4 +- src/runtime/x86-64-assem.S | 3 +- src/runtime/x86-assem.S | 5 +- version.lisp-expr | 2 +- 19 files changed, 360 insertions(+), 426 deletions(-) diff --git a/doc/internals/calling-convention.texinfo b/doc/internals/calling-convention.texinfo index 3e403f0..bf5b2f3 100644 --- a/doc/internals/calling-convention.texinfo +++ b/doc/internals/calling-convention.texinfo @@ -11,31 +11,27 @@ * Additional Notes:: @end menu -The calling convention used within Lisp code on SBCL/x86 was, -for the longest time, really bad. If it weren't for the fact -that it predates modern x86 CPUs, one might almost believe it to -have been designed explicitly to defeat the branch-prediction -hardware therein. This chapter is somewhat of a brain-dump of -information that might be useful when attempting to improve the -situation further, mostly written immediately after having made -a dent in the problem. - -Assumptions about the calling convention are embedded throughout -the system. The runtime knows how to call in to Lisp and receive -a value from Lisp, the assembly-routines have intimate knowledge -of what registers are involved in a call situation, +The calling convention used within Lisp code on SBCL/x86 was, for the +longest time, really bad. If it weren't for the fact that it predates +modern x86 CPUs, one might almost believe it to have been designed +explicitly to defeat the branch-prediction hardware therein. This +chapter is somewhat of a brain-dump of information that might be +useful when attempting to improve the situation further, mostly +written immediately after having made a dent in the problem. + +Assumptions about the calling convention are embedded throughout the +system. The runtime knows how to call in to Lisp and receive a value +from Lisp, the assembly-routines have intimate knowledge of what +registers are involved in a call situation, @file{src/compiler/target/call.lisp} contains the VOPs involved in -implementing function call/return, and @file{src/compiler/ir2tran.lisp} has -assumptions about frame allocation and argument/return-value -passing locations. - -The current round of changes has been limited to VOPs, assembly-routines, -related support functions, and the required support in the runtime. +implementing function call/return, and +@file{src/compiler/ir2tran.lisp} has assumptions about frame +allocation and argument/return-value passing locations. Note that most of this documentation also applies to other CPUs, -modulo the actual registers involved, the displacement used -in the single-value return convention, and the fact that they -use the ``old'' convention anywhere it is mentioned. +modulo the actual registers involved, the displacement used in the +single-value return convention, and the fact that they use the ``old'' +convention anywhere it is mentioned. @node Assembly Routines @@ -58,13 +54,13 @@ use the ``old'' convention anywhere it is mentioned. ;;; -- AB, 2006/Feb/05. @end example -There are a couple of assembly-routines that implement parts of -the process of returning or tail-calling with a variable number -of values. These are @code{return-multiple} and @code{tail-call-variable} in +There are a couple of assembly-routines that implement parts of the +process of returning or tail-calling with a variable number of values. +These are @code{return-multiple} and @code{tail-call-variable} in @file{src/assembly/x86/assem-rtns.lisp}. They have their own calling -convention for invocation from a VOP, but implement various -block-move operations on the stack contents followed by a return -or tail-call operation. +convention for invocation from a VOP, but implement various block-move +operations on the stack contents followed by a return or tail-call +operation. That's about all I have to say about the assembly-routines. @@ -73,22 +69,22 @@ That's about all I have to say about the assembly-routines. @comment node-name, next, previous, up @section Local Calls -Calls within a block, whatever a block is, can use a local -calling convention in which the compiler knows where all of the -values are to be stored, and thus can elide the check for number -of return values, stack-pointer restoration, etc. Alternately, -they can use the full unknown-values return convention while -trying to short-circuit the call convention. There is probably -some low-hanging fruit here in terms of CPU branch-prediction. +Calls within a block, whatever a block is, can use a local calling +convention in which the compiler knows where all of the values are to +be stored, and thus can elide the check for number of return values, +stack-pointer restoration, etc. Alternately, they can use the full +unknown-values return convention while trying to short-circuit the +call convention. There is probably some low-hanging fruit here in +terms of CPU branch-prediction. -The local (known-values) calling convention is implemented by -the @code{known-call-local} and @code{known-return} VOPs. +The local (known-values) calling convention is implemented by the +@code{known-call-local} and @code{known-return} VOPs. Local unknown-values calls are handled at the call site by the -@code{call-local} and @code{mutiple-call-local} VOPs. The main difference -between the full call and local call protocols here is that -local calls use a different frame setup protocol, and will tend -to not use the normal frame layout for the old frame-pointer and +@code{call-local} and @code{mutiple-call-local} VOPs. The main +difference between the full call and local call protocols here is that +local calls use a different frame setup protocol, and will tend to not +use the normal frame layout for the old frame-pointer and return-address. @@ -108,96 +104,97 @@ return-address. @end example Basically, we use caller-allocated frames, pass an fdefinition, -function, or closure in @code{EAX}, -argcount in @code{ECX}, and first three args in @code{EDX}, @code{EDI}, -and @code{ESI}. @code{EBP} points to just past the start of the frame -(the first frame slot is at @code{[EBP-4]}, not the traditional @code{[EBP]}, -due in part to how the frame allocation works). The caller stores the -link for the old frame at @code{[EBP-4]} and reserved space for a -return address at @code{[EBP-8]}. @code{[EBP-12]} appears to be an -empty slot available to the compiler within a function, it -may-or-may-not be used by some of the call/return junk. The first stack -argument is at @code{[EBP-16]}. The callee then reallocates the +function, or closure in @code{EAX}, argcount in @code{ECX}, and first +three args in @code{EDX}, @code{EDI}, and @code{ESI}. @code{EBP} +points to just past the start of the frame (the first frame slot is at +@code{[EBP-4]}, not the traditional @code{[EBP]}, due in part to how +the frame allocation works). The caller stores the link for the old +frame at @code{[EBP-4]} and reserved space for a return address at +@code{[EBP-8]}. @code{[EBP-12]} appears to be an empty slot that +conveniently makes just enough space for the first three multiple +return values (returned in the argument passing registers) to be +written over the beginning of the frame by the receiver. The first +stack argument is at @code{[EBP-16]}. The callee then reallocates the frame to include sufficient space for its local variables, after possibly converting any @code{&rest} arguments to a proper list. +The above scheme was changed in 1.0.27 on x86 and x86-64 by swapping +the old frame pointer and the return address. @node Unknown-Values Returns @comment node-name, next, previous, up @section Unknown-Values Returns -The unknown-values return convention consists of two parts. The -first part is that of returning a single value. The second is -that of returning a different number of values. We also changed -the convention here recently, so we should describe both the old -and new versions. The three interesting VOPs here are @code{return-single}, -@code{return}, and @code{return-multiple}. +The unknown-values return convention consists of two parts. The first +part is that of returning a single value. The second is that of +returning a different number of values. We also changed the convention +in 0.9.10, so we should describe both the old and new versions. The +three interesting VOPs here are @code{return-single}, @code{return}, +and @code{return-multiple}. For a single-value return, we load the return value in the first -argument-passing register (@code{A0}, or @code{EDI}), reload the old frame -pointer, burn the stack frame, and return. The old convention -was to increment the return address by two before returning, -typically via a @code{JMP}, which was guaranteed to screw up branch- -prediction hardware. The new convention is to return with the -carry flag clear. - -For a multiple-value return, we pass the first three values in -the argument-passing registers, and the remainder on the stack. -@code{ECX} contains the total number of values as a fixnum, @code{EBX} points -to where the callee frame was, @code{EBP} has been restored to point to -the caller frame, and the first of the values on the stack (the -fourth overall) is at @code{[EBP-16]}. The old convention was just to -jump to the return address at this point. The newer one has us -setting the carry flag first. - -The code at the call site for accepting some number of unknown- -values is fairly well boilerplated. If we are expecting zero or -one values, then we need to reset the stack pointer if we are in -a multiple-value return. In the old convention we just encoded a -@code{MOV ESP, EBX} instruction, which neatly fit in the two byte gap -that was skipped by a single-value return. In the new convention -we have to explicitly check the carry flag with a conditional -jump around the @code{MOV ESP, EBX} instruction. When expecting more -than one value, we need to arrange to set up default values when -a single-value return happens, so we encode a jump around a -stub of code which fakes up the register use convention of a -multiple-value return. Again, in the old convention this was a -two-byte unconditionl jump, and in the new convention this is -a conditional jump based on the carry flag. +argument-passing register (@code{A0}, or @code{EDI}), reload the old +frame pointer, burn the stack frame, and return. The old convention +was to increment the return address by two before returning, typically +via a @code{JMP}, which was guaranteed to screw up branch- prediction +hardware. The new convention is to return with the carry flag clear. + +For a multiple-value return, we pass the first three values in the +argument-passing registers, and the remainder on the stack. @code{ECX} +contains the total number of values as a fixnum, @code{EBX} points to +where the callee frame was, @code{EBP} has been restored to point to +the caller frame, and the first of the values on the stack (the fourth +overall) is at @code{[EBP-16]}. The old convention was just to jump to +the return address at this point. The newer one has us setting the +carry flag first. + +The code at the call site for accepting some number of unknown- values +is fairly well boilerplated. If we are expecting zero or one values, +then we need to reset the stack pointer if we are in a multiple-value +return. In the old convention we just encoded a @code{MOV ESP, EBX} +instruction, which neatly fit in the two byte gap that was skipped by +a single-value return. In the new convention we have to explicitly +check the carry flag with a conditional jump around the @code{MOV ESP, +EBX} instruction. When expecting more than one value, we need to +arrange to set up default values when a single-value return happens, +so we encode a jump around a stub of code which fakes up the register +use convention of a multiple-value return. Again, in the old +convention this was a two-byte unconditionl jump, and in the new +convention this is a conditional jump based on the carry flag. @node IR2 Conversion @comment node-name, next, previous, up @section IR2 Conversion -The actual selection of VOPs for implementing call/return for a -given function is handled in ir2tran.lisp. Returns are handled -by @code{ir2-convert-return}, calls are handled by @code{ir2-convert-local-call}, -@code{ir2-convert-full-call}, and @code{ir2-convert-mv-call}, and -function prologues are handled by @code{ir2-convert-bind} (which calls -@code{init-xep-environment} for the case of an entry point for a full -call). +The actual selection of VOPs for implementing call/return for a given +function is handled in ir2tran.lisp. Returns are handled by +@code{ir2-convert-return}, calls are handled by +@code{ir2-convert-local-call}, @code{ir2-convert-full-call}, and +@code{ir2-convert-mv-call}, and function prologues are handled by +@code{ir2-convert-bind} (which calls @code{init-xep-environment} for +the case of an entry point for a full call). @node Additional Notes @comment node-name, next, previous, up @section Additional Notes -The low-hanging fruit here is going to be changing every call -and return to use @code{CALL} and @code{RETURN} instructions -instead of @code{JMP} instructions. +The low-hanging fruit here is going to be changing every call and +return to use @code{CALL} and @code{RETURN} instructions instead of +@code{JMP} instructions. A more involved change would be to reduce the number of argument -passing registers from three to two, which may be beneficial in -terms of our quest to free up a GPR for use on Win32 boxes for a -thread structure. +passing registers from three to two, which may be beneficial in terms +of our quest to free up a GPR for use on Win32 boxes for a thread +structure. Another possible win could be to store multiple return-values -somewhere other than the stack, such as a dedicated area of the -thread structure. The main concern here in terms of clobbering -would be to make sure that interrupts (and presumably the -internal-error machinery) know to save the area and that the -compiler knows that the area cannot be live across a function -call. Actually implementing this would involve hacking the IR2 -conversion, since as it stands now the same argument conventions -are used for both call and return value storage (same TNs). +somewhere other than the stack, such as a dedicated area of the thread +structure. The main concern here in terms of clobbering would be to +make sure that interrupts (and presumably the internal-error +machinery) know to save the area and that the compiler knows that the +area cannot be live across a function call. Actually implementing this +would involve hacking the IR2 conversion, since as it stands now the +same argument conventions are used for both call and return value +storage (same TNs). diff --git a/src/assembly/x86-64/arith.lisp b/src/assembly/x86-64/arith.lisp index d47b720..b6a1934 100644 --- a/src/assembly/x86-64/arith.lisp +++ b/src/assembly/x86-64/arith.lisp @@ -36,17 +36,16 @@ (inst jmp :nz DO-STATIC-FUN) ; no - do generic ,@body - (inst clc) + (inst clc) ; single-value return (inst ret) DO-STATIC-FUN - (inst pop rax) (inst push rbp-tn) - (inst lea - rbp-tn - (make-ea :qword :base rsp-tn :disp n-word-bytes)) - (inst sub rsp-tn (fixnumize 2)) - (inst push rax) ; callers return addr + (inst lea rbp-tn (make-ea :qword + :base rsp-tn + :disp (* 2 n-word-bytes))) + (inst sub rsp-tn (fixnumize 1)) + (inst push (make-ea :qword :base rbp-tn :disp (- n-word-bytes))) (inst mov rcx (fixnumize 2)) ; arg count (inst jmp (make-ea :qword @@ -127,11 +126,10 @@ (inst test x fixnum-tag-mask) (inst jmp :z FIXNUM) - (inst pop rax) (inst push rbp-tn) - (inst lea rbp-tn (make-ea :qword :base rsp-tn :disp n-word-bytes)) - (inst sub rsp-tn (fixnumize 2)) - (inst push rax) + (inst lea rbp-tn (make-ea :qword :base rsp-tn :disp (* 2 n-word-bytes))) + (inst sub rsp-tn (fixnumize 1)) + (inst push (make-ea :qword :base rbp-tn :disp (- n-word-bytes))) (inst mov rcx (fixnumize 1)) ; arg count (inst jmp (make-ea :qword :disp (+ nil-value (static-fun-offset '%negate)))) @@ -163,7 +161,7 @@ (inst mov rcx x) (inst or rcx y) (inst test rcx fixnum-tag-mask) - (inst jmp :nz DO-STATIC-FUN) + (inst jmp :nz DO-STATIC-FUN) ; are both fixnums? (inst cmp x y) (inst ret) @@ -173,7 +171,7 @@ (inst sub rsp-tn (fixnumize 3)) (inst mov (make-ea :qword :base rcx - :disp (fixnumize -1)) + :disp (frame-byte-offset ocfp-save-offset)) rbp-tn) (move rbp-tn rcx) (inst mov rcx (fixnumize 2)) @@ -238,7 +236,7 @@ (inst sub rsp-tn (fixnumize 3)) (inst mov (make-ea :qword :base rcx - :disp (fixnumize -1)) + :disp (frame-byte-offset ocfp-save-offset)) rbp-tn) (move rbp-tn rcx) (inst mov rcx (fixnumize 2)) @@ -295,7 +293,7 @@ (inst sub rsp-tn (fixnumize 3)) (inst mov (make-ea :qword :base rcx - :disp (fixnumize -1)) + :disp (frame-byte-offset ocfp-save-offset)) rbp-tn) (move rbp-tn rcx) (inst mov rcx (fixnumize 2)) diff --git a/src/assembly/x86-64/assem-rtns.lisp b/src/assembly/x86-64/assem-rtns.lisp index dea6cbb..7270802 100644 --- a/src/assembly/x86-64/assem-rtns.lisp +++ b/src/assembly/x86-64/assem-rtns.lisp @@ -135,11 +135,11 @@ (inst cmp ecx (fixnumize 3)) (inst jmp :le REGISTER-ARGS) - ;; Save the OLD-FP and RETURN-PC because the blit it going to trash - ;; those stack locations. Save the ECX, because the loop is going - ;; to trash it. - (pushw rbp-tn -1) - (loadw ebx rbp-tn -2) + ;; Save the OLD-FP and RETURN-PC because the blit is going to trash + ;; those stack locations. Save the ECX, because the loop is going to + ;; trash it. + (pushw rbp-tn (frame-word-offset ocfp-save-offset)) + (loadw ebx rbp-tn (frame-word-offset return-pc-save-offset)) (inst push ecx) ;; Do the blit. Because we are coping from smaller addresses to @@ -147,25 +147,27 @@ ;; our way down. (inst shr ecx 3) ; fixnum to raw words (inst std) ; count down - (inst lea edi (make-ea :qword :base rbp-tn :disp (- n-word-bytes))) + (inst lea edi (make-ea :qword :base rbp-tn :disp (frame-byte-offset 0))) (inst sub esi (fixnumize 1)) (inst rep) (inst movs :qword) (inst cld) ;; Load the register arguments carefully. - (loadw edx rbp-tn -1) + (loadw edx rbp-tn (frame-word-offset ocfp-save-offset)) ;; Restore OLD-FP and ECX. (inst pop ecx) - (popw rbp-tn -1) ; overwrites a0 + ;; Overwrites a1 + (popw rbp-tn (frame-word-offset ocfp-save-offset)) ;; Blow off the stack above the arguments. (inst lea rsp-tn (make-ea :qword :base edi :disp n-word-bytes)) ;; remaining register args - (loadw edi rbp-tn -2) - (loadw esi rbp-tn -3) + (inst mov edi edx) + (loadw edx rbp-tn (frame-word-offset 0)) + (loadw esi rbp-tn (frame-word-offset 2)) ;; Push the (saved) return-pc so it looks like we just called. (inst push ebx) @@ -187,7 +189,7 @@ (make-ea :qword :base rbp-tn :disp (* -3 n-word-bytes))) ;; Push the return-pc so it looks like we just called. - (pushw rbp-tn -2) ; XXX dan ? + (pushw rbp-tn (frame-word-offset return-pc-save-offset)) ;; And away we go. (inst jmp (make-ea :byte :base eax diff --git a/src/assembly/x86/arith.lisp b/src/assembly/x86/arith.lisp index f3081f8..28d510f 100644 --- a/src/assembly/x86/arith.lisp +++ b/src/assembly/x86/arith.lisp @@ -40,13 +40,12 @@ (inst ret) DO-STATIC-FUN - (inst pop eax) (inst push ebp-tn) - (inst lea - ebp-tn - (make-ea :dword :base esp-tn :disp n-word-bytes)) - (inst sub esp-tn (fixnumize 2)) - (inst push eax) ; callers return addr + (inst lea ebp-tn (make-ea :dword + :base esp-tn + :disp (* 2 n-word-bytes))) + (inst sub esp-tn (fixnumize 1)) + (inst push (make-ea :dword :base ebp-tn :disp (- n-word-bytes))) (inst mov ecx (fixnumize 2)) ; arg count (inst jmp (make-ea :dword @@ -86,7 +85,7 @@ (move eax x) ; must use eax for 64-bit result (inst sar eax n-fixnum-tag-bits) ; remove *4 fixnum bias (inst imul y) ; result in edx:eax - (inst jmp :no okay) ; still fixnum + (inst jmp :no OKAY) ; still fixnum ;; zzz jrd changed edx to ebx in here, as edx isn't listed as a temp, above ;; pfw says that loses big -- edx is target for arg x and result res @@ -130,11 +129,10 @@ (inst test x fixnum-tag-mask) (inst jmp :z FIXNUM) - (inst pop eax) (inst push ebp-tn) - (inst lea ebp-tn (make-ea :dword :base esp-tn :disp n-word-bytes)) - (inst sub esp-tn (fixnumize 2)) - (inst push eax) + (inst lea ebp-tn (make-ea :dword :base esp-tn :disp (* 2 n-word-bytes))) + (inst sub esp-tn (fixnumize 1)) + (inst push (make-ea :dword :base ebp-tn :disp (- n-word-bytes))) (inst mov ecx (fixnumize 1)) ; arg count (inst jmp (make-ea :dword :disp (+ nil-value (static-fun-offset '%negate)))) @@ -174,7 +172,8 @@ (move ecx esp-tn) (inst sub esp-tn (fixnumize 3)) (inst mov (make-ea :dword - :base ecx :disp (fixnumize -1)) + :base ecx + :disp (frame-byte-offset ocfp-save-offset)) ebp-tn) (move ebp-tn ecx) (inst mov ecx (fixnumize 2)) @@ -189,7 +188,7 @@ (:g `((inst cmp x (1+ nil-value))))) (inst ret)) #-sb-assembling - `(define-vop (,name) + `(define-vop (,name) (:translate ,translate) (:policy :safe) (:save-p t) @@ -244,7 +243,7 @@ (inst sub esp-tn (fixnumize 3)) (inst mov (make-ea :dword :base ecx - :disp (fixnumize -1)) + :disp (frame-byte-offset ocfp-save-offset)) ebp-tn) (move ebp-tn ecx) (inst mov ecx (fixnumize 2)) @@ -301,7 +300,7 @@ (inst sub esp-tn (fixnumize 3)) (inst mov (make-ea :dword :base ecx - :disp (fixnumize -1)) + :disp (frame-byte-offset ocfp-save-offset)) ebp-tn) (move ebp-tn ecx) (inst mov ecx (fixnumize 2)) diff --git a/src/assembly/x86/assem-rtns.lisp b/src/assembly/x86/assem-rtns.lisp index 930c2e7..36152cd 100644 --- a/src/assembly/x86/assem-rtns.lisp +++ b/src/assembly/x86/assem-rtns.lisp @@ -31,13 +31,13 @@ (:temp edi unsigned-reg edi-offset)) ;; Pick off the cases where everything fits in register args. - (inst jecxz zero-values) + (inst jecxz ZERO-VALUES) (inst cmp ecx (fixnumize 1)) - (inst jmp :e one-value) + (inst jmp :e ONE-VALUE) (inst cmp ecx (fixnumize 2)) - (inst jmp :e two-values) + (inst jmp :e TWO-VALUES) (inst cmp ecx (fixnumize 3)) - (inst jmp :e three-values) + (inst jmp :e THREE-VALUES) ;; Save the count, because the loop is going to destroy it. (inst mov edx ecx) @@ -135,11 +135,11 @@ (inst cmp ecx (fixnumize 3)) (inst jmp :le REGISTER-ARGS) - ;; Save the OLD-FP and RETURN-PC because the blit it going to trash - ;; those stack locations. Save the ECX, because the loop is going - ;; to trash it. - (pushw ebp-tn -1) - (loadw ebx ebp-tn -2) + ;; Save the OLD-FP and RETURN-PC because the blit is going to trash + ;; those stack locations. Save the ECX, because the loop is going to + ;; trash it. + (pushw ebp-tn (frame-word-offset ocfp-save-offset)) + (loadw ebx ebp-tn (frame-word-offset return-pc-save-offset)) (inst push ecx) ;; Do the blit. Because we are coping from smaller addresses to @@ -147,25 +147,27 @@ ;; our way down. (inst shr ecx 2) ; fixnum to raw words (inst std) ; count down - (inst lea edi (make-ea :dword :base ebp-tn :disp (- n-word-bytes))) + (inst lea edi (make-ea :dword :base ebp-tn :disp (frame-byte-offset 0))) (inst sub esi (fixnumize 1)) (inst rep) (inst movs :dword) (inst cld) ;; Load the register arguments carefully. - (loadw edx ebp-tn -1) + (loadw edx ebp-tn (frame-word-offset ocfp-save-offset)) ;; Restore OLD-FP and ECX. (inst pop ecx) - (popw ebp-tn -1) ; overwrites a0 + ;; Overwrites a1 + (popw ebp-tn (frame-word-offset ocfp-save-offset)) ;; Blow off the stack above the arguments. (inst lea esp-tn (make-ea :dword :base edi :disp n-word-bytes)) ;; remaining register args - (loadw edi ebp-tn -2) - (loadw esi ebp-tn -3) + (inst mov edi edx) + (loadw edx ebp-tn (frame-word-offset 0)) + (loadw esi ebp-tn (frame-word-offset 2)) ;; Push the (saved) return-pc so it looks like we just called. (inst push ebx) @@ -184,7 +186,7 @@ (make-ea :dword :base ebp-tn :disp (* -3 n-word-bytes))) ;; Push the return-pc so it looks like we just called. - (pushw ebp-tn -2) + (pushw ebp-tn (frame-word-offset return-pc-save-offset)) ;; And away we go. (inst jmp (make-ea-for-object-slot eax closure-fun-slot fun-pointer-lowtag))) @@ -207,10 +209,10 @@ (inst jmp :z error)) (inst cmp target (make-ea-for-object-slot catch catch-block-tag-slot 0)) - (inst jmp :e exit) + (inst jmp :e EXIT) (loadw catch catch catch-block-previous-catch-slot) - (inst jmp loop) + (inst jmp LOOP) EXIT @@ -241,7 +243,7 @@ (inst cmp uwp (make-ea-for-object-slot block unwind-block-current-uwp-slot 0)) ;; If a match, return to context in arg block. - (inst jmp :e do-exit) + (inst jmp :e DO-EXIT) ;; Not a match - return to *CURRENT-UNWIND-PROTECT-BLOCK* context. ;; Important! Must save (and return) the arg 'block' for later use!! diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp index a0ae3a4..73ff2ac 100644 --- a/src/compiler/x86-64/call.lisp +++ b/src/compiler/x86-64/call.lisp @@ -133,7 +133,7 @@ ;; The start of the actual code. ;; Save the return-pc. - (popw rbp-tn (- (1+ return-pc-save-offset))) + (popw rbp-tn (frame-word-offset return-pc-save-offset)) ;; If copy-more-arg follows it will allocate the correct stack ;; size. The stack is not allocated first here as this may expose @@ -260,7 +260,7 @@ (inst cmp rcx-tn (fixnumize i)) (inst jmp :be default-lab) - (loadw rdx-tn rbx-tn (- (1+ i))) + (loadw rdx-tn rbx-tn (frame-word-offset i)) (inst mov tn rdx-tn))) (emit-label defaulting-done) @@ -298,7 +298,7 @@ (emit-label no-stack-args) (inst lea rdi-tn (make-ea :qword :base rbp-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Load RAX with NIL so we can quickly store it, and set up ;; stuff for the loop. (inst mov rax-tn nil-value) @@ -311,7 +311,7 @@ ;; and then default the remaining stack arguments. (emit-label regs-defaulted) ;; Save EDI. - (storew rdi-tn rbx-tn (- (1+ 1))) + (storew rdi-tn rbx-tn (frame-word-offset 1)) ;; Compute the number of stack arguments, and if it's zero or ;; less, don't copy any stack arguments. (inst sub rcx-tn (fixnumize register-arg-count)) @@ -327,19 +327,19 @@ ;; Compute a pointer to where the stack args go. (inst lea rdi-tn (make-ea :qword :base rbp-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Save ESI, and compute a pointer to where the args come from. - (storew rsi-tn rbx-tn (- (1+ 2))) + (storew rsi-tn rbx-tn (frame-word-offset 2)) (inst lea rsi-tn (make-ea :qword :base rbx-tn - :disp (* (- (1+ register-arg-count)) n-word-bytes))) + :disp (frame-byte-offset register-arg-count))) ;; Do the copy. (inst shr rcx-tn word-shift) ; make word count (inst std) (inst rep) (inst movs :qword) ;; Restore RSI. - (loadw rsi-tn rbx-tn (- (1+ 2))) + (loadw rsi-tn rbx-tn (frame-word-offset 2)) ;; Now we have to default the remaining args. Find out how many. (inst sub rax-tn (fixnumize (- nvals register-arg-count))) (inst neg rax-tn) @@ -355,7 +355,7 @@ (inst stos rax-tn) ;; Restore EDI, and reset the stack. (emit-label restore-edi) - (loadw rdi-tn rbx-tn (- (1+ 1))) + (loadw rdi-tn rbx-tn (frame-word-offset 1)) (inst mov rsp-tn rbx-tn) (inst cld)))) (values)) @@ -387,7 +387,7 @@ (cond ((location= start (first *register-arg-tns*)) (inst push (first *register-arg-tns*)) - (inst lea start (make-ea :qword :base rsp-tn :disp 8))) + (inst lea start (make-ea :qword :base rsp-tn :disp n-word-bytes))) (t (inst mov start rsp-tn) (inst push (first *register-arg-tns*)))) (inst mov count (fixnumize 1)) @@ -464,12 +464,14 @@ ;; Is the return-pc on the stack or in a register? (sc-case ret-tn ((sap-stack) + (unless (= (tn-offset ret-tn) return-pc-save-offset) + (error "ret-tn ~A in wrong stack slot" ret-tn)) #+nil (format t "*call-local: ret-tn on stack; offset=~S~%" (tn-offset ret-tn)) (inst lea return-label (make-fixup nil :code-object RETURN)) - (storew return-label rbp-tn (- (1+ (tn-offset ret-tn))))) - ((sap-reg) - (inst lea ret-tn (make-fixup nil :code-object RETURN))))) + (storew return-label rbp-tn (frame-word-offset (tn-offset ret-tn)))) + (t + (error "ret-tn ~A in sap-reg" ret-tn)))) (note-this-location vop :call-site) (inst jmp target) @@ -507,10 +509,9 @@ (tn-offset ret-tn)) ;; Stack (inst lea return-label (make-fixup nil :code-object RETURN)) - (storew return-label rbp-tn (- (1+ (tn-offset ret-tn))))) - ((sap-reg) - ;; Register - (inst lea ret-tn (make-fixup nil :code-object RETURN))))) + (storew return-label rbp-tn (frame-word-offset (tn-offset ret-tn)))) + (t + (error "multiple-call-local: return-pc not on stack.")))) (note-this-location vop :call-site) (inst jmp target) @@ -556,10 +557,9 @@ (tn-offset ret-tn)) ;; Stack (inst lea return-label (make-fixup nil :code-object RETURN)) - (storew return-label rbp-tn (- (1+ (tn-offset ret-tn))))) - ((sap-reg) - ;; Register - (inst lea ret-tn (make-fixup nil :code-object RETURN))))) + (storew return-label rbp-tn (frame-word-offset (tn-offset ret-tn)))) + (t + (error "known-call-local: return-pc not on stack.")))) (note-this-location vop :call-site) (inst jmp target) @@ -621,39 +621,23 @@ (trace-table-entry trace-table-fun-epilogue) ;; return-pc may be either in a register or on the stack. (sc-case return-pc - ((sap-reg) - (sc-case old-fp - ((control-stack) - (cond ((zerop (tn-offset old-fp)) - ;; Zot all of the stack except for the old-fp. - (inst lea rsp-tn (make-ea :qword :base rbp-tn - :disp (- (* (1+ ocfp-save-offset) - n-word-bytes)))) - ;; Restore the old fp from its save location on the stack, - ;; and zot the stack. - (inst pop rbp-tn)) - - (t - (cerror "Continue anyway" - "VOP return-local doesn't work if old-fp (in slot ~ - ~S) is not in slot 0" - (tn-offset old-fp))))) - - ((any-reg descriptor-reg) - ;; Zot all the stack. - (move rsp-tn rbp-tn) - ;; Restore the old-fp. - (move rbp-tn old-fp))) - - ;; Return; return-pc is in a register. - (inst jmp return-pc)) - ((sap-stack) + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "known-return: ocfp not on stack in standard save location?")) + (unless (and (sc-is return-pc sap-stack) + (= (tn-offset return-pc) return-pc-save-offset)) + (error + "known-return: return-pc not on stack in standard save location?")) + + ;; Zot all of the stack except for the old-fp and return-pc. (inst lea rsp-tn (make-ea :qword :base rbp-tn - :disp (- (* (1+ (tn-offset return-pc)) n-word-bytes)))) - (move rbp-tn old-fp) - (inst ret (* (tn-offset return-pc) n-word-bytes)))) + :disp (frame-byte-offset ocfp-save-offset))) + (inst pop rbp-tn) + (inst ret (* (tn-offset return-pc) n-word-bytes))) + (t + (error "known-return, return-pc not on stack"))) (trace-table-entry trace-table-normal))) @@ -820,20 +804,20 @@ ;; FIXME: FORMAT T for stale ;; diagnostic output (several of ;; them around here), ick - (format t "** tail-call old-fp not S0~%") + (error "** tail-call old-fp not S0~%") (move old-fp-tmp old-fp) (storew old-fp-tmp rbp-tn - (- (1+ ocfp-save-offset))))) + (frame-word-offset ocfp-save-offset)))) ((any-reg descriptor-reg) - (format t "** tail-call old-fp in reg not S0~%") + (error "** tail-call old-fp in reg not S0~%") (storew old-fp rbp-tn - (- (1+ ocfp-save-offset))))) + (frame-word-offset ocfp-save-offset)))) ;; For tail call, we have to push the ;; return-pc so that it looks like we CALLed - ;; drspite the fact that we are going to JMP. + ;; despite the fact that we are going to JMP. (inst push return-pc) )) (t @@ -855,7 +839,8 @@ '(inst sub rsp-tn (fixnumize 3))) ;; Save the fp - (storew rbp-tn new-fp (- (1+ ocfp-save-offset))) + (storew rbp-tn new-fp + (frame-word-offset ocfp-save-offset)) (move rbp-tn new-fp) ; NB - now on new stack frame. ))) @@ -886,7 +871,7 @@ (trace-table-entry trace-table-normal))))) (define-full-call call nil :fixed nil) - (define-full-call call-named t :fixed nil) + (define-full-call call-named t :fixed nil) (define-full-call multiple-call nil :unknown nil) (define-full-call multiple-call-named t :unknown nil) (define-full-call tail-call nil :tail nil) @@ -930,15 +915,15 @@ ;;;; unknown values return -;;; Return a single-value using the Unknown-Values convention. Specifically, -;;; we jump to clear the stack and jump to return-pc+3. -;;; -;;; We require old-fp to be in a register, because we want to reset RSP before -;;; restoring RBP. If old-fp were still on the stack, it could get clobbered -;;; by a signal. +;;; Return a single-value using the Unknown-Values convention. ;;; ;;; pfw--get wired-tn conflicts sometimes if register sc specd for args ;;; having problems targeting args to regs -- using temps instead. +;;; +;;; First off, modifying the return-pc defeats the branch-prediction +;;; optimizations on modern CPUs quite handily. Second, we can do all +;;; this without needing a temp register. Fixed the latter, at least. +;;; -- AB 2006/Feb/04 (define-vop (return-single) (:args (old-fp) (return-pc) @@ -948,55 +933,31 @@ (trace-table-entry trace-table-fun-epilogue) ;; Code structure lifted from known-return. (sc-case return-pc - ((sap-reg) - ;; return PC in register for some reason (local call?) - ;; we jmp to the return pc after fixing the stack and frame. - (sc-case old-fp - ((control-stack) - ;; ofp on stack must be in slot 0 (the traditional storage place). - ;; Drop the stack above it and pop it off. - (cond ((zerop (tn-offset old-fp)) - (inst lea rsp-tn (make-ea :dword :base rbp-tn - :disp (- (* (1+ ocfp-save-offset) - n-word-bytes)))) - (inst pop rbp-tn)) - (t - ;; Should this ever happen, we do the same as above, but - ;; using (tn-offset old-fp) instead of ocfp-save-offset - ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and - ;; then lea rsp again against itself with a displacement - ;; of (* (tn-offset old-fp) n-word-bytes) to clear the - ;; rest of the stack. - (cerror "Continue anyway" - "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp))))) - ((any-reg descriptor-reg) - ;; ofp in reg, drop the stack and load the real fp. - (move rsp-tn rbp-tn) - (move rbp-tn old-fp))) - - ;; Set single-value-return flag - (inst clc) - ;; And return - (inst jmp return-pc)) - ((sap-stack) ;; Note that this will only work right if, when old-fp is on ;; the stack, it has a lower tn-offset than return-pc. One of ;; the comments in known-return indicate that this is the case ;; (in that it will be in its save location), but we may wish ;; to assert that (in either the weaker or stronger forms). - ;; Should this ever not be the case, we should load old-fp - ;; into a temp reg while we fix the stack. - ;; Drop stack above return-pc - (inst lea rsp-tn (make-ea :dword :base rbp-tn - :disp (- (* (1+ (tn-offset return-pc)) - n-word-bytes)))) + ;; Should this ever not be the case, we should load old-fp into + ;; a temp reg while we fix the stack. + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "ocfp not on stack in standard save location?")) + (unless (and (sc-is return-pc sap-stack) + (= (tn-offset return-pc) return-pc-save-offset)) + (error "return-pc not on stack in standard save location?")) + ;; Drop stack above old-fp + (inst lea rsp-tn (make-ea :qword :base rbp-tn + :disp (frame-byte-offset (tn-offset old-fp)))) ;; Set single-value return flag (inst clc) ;; Restore the old frame pointer - (move rbp-tn old-fp) + (inst pop rbp-tn) ;; And return, dropping the rest of the stack as we go. - (inst ret (* (tn-offset return-pc) n-word-bytes)))))) + (inst ret (* (tn-offset return-pc) n-word-bytes))) + (t + (error "return pc not on stack"))))) ;;; Do unknown-values return of a fixed (other than 1) number of ;;; values. The VALUES are required to be set up in the standard @@ -1038,8 +999,10 @@ (move rbp-tn old-fp) ;; Clear as much of the stack as possible, but not past the return ;; address. - (inst lea rsp-tn (make-ea :qword :base rbx - :disp (- (* (max nvals 2) n-word-bytes)))) + (inst lea rsp-tn + (make-ea :qword :base rbx + :disp (frame-byte-offset (max (1- nvals) + return-pc-save-offset)))) ;; Pre-default any argument register that need it. (when (< nvals register-arg-count) (let* ((arg-tns (nthcdr nvals (list a0 a1 a2))) @@ -1053,16 +1016,17 @@ ;; stack and we've changed the stack pointer. So we have to ;; tell it to index off of RBX instead of RBP. (cond ((zerop nvals) - ;; Return popping the return address and the OCFP. - (inst ret n-word-bytes)) + ;; Return popping the return address and what's earlier in + ;; the frame. + (inst ret (* return-pc-save-offset n-word-bytes))) ((= nvals 1) - ;; Return popping the return, leaving 1 slot. Can this - ;; happen, or is a single value return handled elsewhere? - (inst ret)) + ;; This is handled in RETURN-SINGLE. + (error "nvalues is 1")) (t - (inst jmp (make-ea :qword :base rbx - :disp (- (* (1+ (tn-offset return-pc)) - n-word-bytes)))))) + ;; Thou shalt not JMP unto thy return address. + (inst push (make-ea :qword :base rbx + :disp (frame-byte-offset (tn-offset return-pc)))) + (inst ret))) (trace-table-entry trace-table-normal))) @@ -1113,8 +1077,8 @@ ;; clear the multiple-value return flag (inst clc) ;; Out of here. - (inst jmp rax) - + (inst push rax) + (inst ret) ;; Nope, not the single case. Jump to the assembly routine. (emit-label not-single))) (move rsi vals) diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp index 7b800e4..fdf8ad7 100644 --- a/src/compiler/x86-64/insts.lisp +++ b/src/compiler/x86-64/insts.lisp @@ -2583,7 +2583,7 @@ (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16)) '(:name :tab imm)) (:emitter - (cond (stack-delta + (cond ((and stack-delta (not (zerop stack-delta))) (emit-byte segment #b11000010) (emit-word segment stack-delta)) (t diff --git a/src/compiler/x86-64/nlx.lisp b/src/compiler/x86-64/nlx.lisp index 6208d5c..a277afa 100644 --- a/src/compiler/x86-64/nlx.lisp +++ b/src/compiler/x86-64/nlx.lisp @@ -274,7 +274,7 @@ (make-ea :qword :base rbp-tn :disp (* -3 n-word-bytes))) ;; Push the return-pc so it looks like we just called. - (pushw rbp-tn -2) + (pushw rbp-tn (frame-word-offset return-pc-save-offset)) ;; Call it (inst jmp (make-ea :qword :base block diff --git a/src/compiler/x86-64/static-fn.lisp b/src/compiler/x86-64/static-fn.lisp index ae94061..ee2baea 100644 --- a/src/compiler/x86-64/static-fn.lisp +++ b/src/compiler/x86-64/static-fn.lisp @@ -16,7 +16,7 @@ (:policy :safe) (:variant-vars function) (:vop-var vop) - (:node-var node) + ;;(:node-var node) (:temporary (:sc unsigned-reg :offset ebx-offset :from (:eval 0) :to (:eval 2)) ebx) (:temporary (:sc unsigned-reg :offset ecx-offset @@ -79,15 +79,17 @@ ;; If speed not more important than size, duplicate the ;; effect of the ENTER with discrete instructions. Takes ;; 2+1+3+2=8 bytes as opposed to 4+3=7 bytes. - (cond ((policy node (>= speed space)) + (cond (t ;(policy node (>= speed space)) (inst mov ebx rsp-tn) + ;; Dummy for return address + (inst push rbp-tn) ;; Save the old-fp (inst push rbp-tn) ;; Ensure that at least three slots are available; one ;; above, two more needed. - (inst sub rsp-tn (fixnumize 2)) + (inst sub rsp-tn (fixnumize 1)) (inst mov rbp-tn ebx)) - (t + #+(or) (t (inst enter (fixnumize 2)) ;; The enter instruction pushes EBP and then copies ;; ESP into EBP. We want the new EBP to be the diff --git a/src/compiler/x86-64/vm.lisp b/src/compiler/x86-64/vm.lisp index 7803220..405ca67 100644 --- a/src/compiler/x86-64/vm.lisp +++ b/src/compiler/x86-64/vm.lisp @@ -446,10 +446,18 @@ ;;;; miscellaneous function call parameters ;;; offsets of special stack frame locations -(def!constant ocfp-save-offset 0) -(def!constant return-pc-save-offset 1) +(def!constant ocfp-save-offset 1) +(def!constant return-pc-save-offset 0) (def!constant code-save-offset 2) +(declaim (inline frame-word-offset)) +(defun frame-word-offset (index) + (- (1+ index))) + +(declaim (inline frame-byte-offset)) +(defun frame-byte-offset (index) + (* (frame-word-offset index) n-word-bytes)) + (def!constant lra-save-offset return-pc-save-offset) ; ? ;;; This is used by the debugger. diff --git a/src/compiler/x86/call.lisp b/src/compiler/x86/call.lisp index bdd1d54..f4f0c29 100644 --- a/src/compiler/x86/call.lisp +++ b/src/compiler/x86/call.lisp @@ -212,7 +212,7 @@ (let ((regs-defaulted (gen-label))) (note-this-location vop :unknown-return) (inst jmp :c regs-defaulted) - ;; Default the unsuppled registers. + ;; Default the unsupplied registers. (let* ((2nd-tn-ref (tn-ref-across values)) (2nd-tn (tn-ref-tn 2nd-tn-ref))) (inst mov 2nd-tn nil-value) @@ -395,7 +395,7 @@ (cond ((location= start (first *register-arg-tns*)) (inst push (first *register-arg-tns*)) - (inst lea start (make-ea :dword :base esp-tn :disp 4))) + (inst lea start (make-ea :dword :base esp-tn :disp n-word-bytes))) (t (inst mov start esp-tn) (inst push (first *register-arg-tns*)))) (inst mov count (fixnumize 1)) @@ -471,12 +471,14 @@ ;; Is the return-pc on the stack or in a register? (sc-case ret-tn ((sap-stack) + (unless (= (tn-offset ret-tn) return-pc-save-offset) + (error "ret-tn ~A in wrong stack slot" ret-tn)) #+nil (format t "*call-local: ret-tn on stack; offset=~S~%" (tn-offset ret-tn)) - (storew (make-fixup nil :code-object return) + (storew (make-fixup nil :code-object RETURN) ebp-tn (frame-word-offset (tn-offset ret-tn)))) - ((sap-reg) - (inst lea ret-tn (make-fixup nil :code-object return))))) + (t + (error "ret-tn ~A in sap-reg" ret-tn)))) (note-this-location vop :call-site) (inst jmp target) @@ -512,11 +514,10 @@ #+nil (format t "*multiple-call-local: ret-tn on stack; offset=~S~%" (tn-offset ret-tn)) ;; Stack - (storew (make-fixup nil :code-object return) + (storew (make-fixup nil :code-object RETURN) ebp-tn (frame-word-offset (tn-offset ret-tn)))) - ((sap-reg) - ;; Register - (inst lea ret-tn (make-fixup nil :code-object return))))) + (t + (error "multiple-call-local: return-pc not on stack.")))) (note-this-location vop :call-site) (inst jmp target) @@ -560,11 +561,10 @@ #+nil (format t "*known-call-local: ret-tn on stack; offset=~S~%" (tn-offset ret-tn)) ;; Stack - (storew (make-fixup nil :code-object return) + (storew (make-fixup nil :code-object RETURN) ebp-tn (frame-word-offset (tn-offset ret-tn)))) - ((sap-reg) - ;; Register - (inst lea ret-tn (make-fixup nil :code-object return))))) + (t + (error "known-call-local: return-pc not on stack.")))) (note-this-location vop :call-site) (inst jmp target) @@ -636,52 +636,25 @@ ;; return-pc may be either in a register or on the stack. (sc-case return-pc - ((sap-reg) - (sc-case old-fp - ((control-stack) - - #+nil (format t "*known-return: old-fp ~S on stack; offset=~S~%" - old-fp (tn-offset old-fp)) - - (cond ((zerop (tn-offset old-fp)) - ;; Zot all of the stack except for the old-fp. - (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (frame-byte-offset ocfp-save-offset))) - ;; Restore the old fp from its save location on the stack, - ;; and zot the stack. - (inst pop ebp-tn)) - - (t - (cerror "Continue anyway" - "VOP return-local doesn't work if old-fp (in slot ~ - ~S) is not in slot 0" - (tn-offset old-fp))))) - - ((any-reg descriptor-reg) - ;; Zot all the stack. - (move esp-tn ebp-tn) - ;; Restore the old-fp. - (move ebp-tn old-fp))) - - ;; Return; return-pc is in a register. - (inst jmp return-pc)) - ((sap-stack) - #+nil (format t "*known-return: return-pc ~S on stack; offset=~S~%" return-pc (tn-offset return-pc)) + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "known-return: ocfp not on stack in standard save location?")) + (unless (and (sc-is return-pc sap-stack) + (= (tn-offset return-pc) return-pc-save-offset)) + (error + "known-return: return-pc not on stack in standard save location?")) ;; Zot all of the stack except for the old-fp and return-pc. (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (frame-byte-offset (tn-offset return-pc)))) - ;; Restore the old fp. old-fp may be either on the stack in its - ;; save location or in a register, in either case this restores it. - (move ebp-tn old-fp) - ;; The return pops the return address (4 bytes), then we need - ;; to pop all the slots before the return-pc which includes the - ;; 4 bytes for the old-fp. - (inst ret (* (tn-offset return-pc) n-word-bytes)))) + :disp (frame-byte-offset ocfp-save-offset))) + (inst pop ebp-tn) + (inst ret (* (tn-offset return-pc) n-word-bytes))) + (t + (error "known-return, return-pc not on stack"))) (trace-table-entry trace-table-normal))) @@ -848,13 +821,13 @@ ;; FIXME: FORMAT T for stale ;; diagnostic output (several of ;; them around here), ick - (format t "** tail-call old-fp not S0~%") + (error "** tail-call old-fp not S0~%") (move old-fp-tmp old-fp) (storew old-fp-tmp ebp-tn (frame-word-offset ocfp-save-offset)))) ((any-reg descriptor-reg) - (format t "** tail-call old-fp in reg not S0~%") + (error "** tail-call old-fp in reg not S0~%") (storew old-fp ebp-tn (frame-word-offset ocfp-save-offset)))) @@ -883,7 +856,8 @@ '(inst sub esp-tn (fixnumize 3))) ;; Save the fp - (storew ebp-tn new-fp (frame-word-offset ocfp-save-offset)) + (storew ebp-tn new-fp + (frame-word-offset ocfp-save-offset)) (move ebp-tn new-fp) ; NB - now on new stack frame. ))) @@ -952,12 +926,7 @@ ;;;; unknown values return -;;; Return a single-value using the Unknown-Values convention. Specifically, -;;; we jump to clear the stack and jump to return-pc+2. -;;; -;;; We require old-fp to be in a register, because we want to reset ESP before -;;; restoring EBP. If old-fp were still on the stack, it could get clobbered -;;; by a signal. +;;; Return a single-value using the Unknown-Values convention. ;;; ;;; pfw--get wired-tn conflicts sometimes if register sc specd for args ;;; having problems targeting args to regs -- using temps instead. @@ -975,53 +944,31 @@ (trace-table-entry trace-table-fun-epilogue) ;; Code structure lifted from known-return. (sc-case return-pc - ((sap-reg) - ;; return PC in register for some reason (local call?) - ;; we jmp to the return pc after fixing the stack and frame. - (sc-case old-fp - ((control-stack) - ;; ofp on stack must be in slot 0 (the traditional storage place). - ;; Drop the stack above it and pop it off. - (cond ((zerop (tn-offset old-fp)) - (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (frame-byte-offset ocfp-save-offset))) - (inst pop ebp-tn)) - (t - ;; Should this ever happen, we do the same as above, but - ;; using (tn-offset old-fp) instead of ocfp-save-offset - ;; (which is 0 anyway, see src/compiler/x86/vm.lisp) and - ;; then lea esp again against itself with a displacement - ;; of (* (tn-offset old-fp) n-word-bytes) to clear the - ;; rest of the stack. - (cerror "Continue anyway" - "VOP return-single doesn't work if old-fp (in slot ~S) is not in slot 0" (tn-offset old-fp))))) - ((any-reg descriptor-reg) - ;; ofp in reg, drop the stack and load the real fp. - (move esp-tn ebp-tn) - (move ebp-tn old-fp))) - - ;; Set single-value-return flag - (inst clc) - ;; And return - (inst jmp return-pc)) - ((sap-stack) ;; Note that this will only work right if, when old-fp is on ;; the stack, it has a lower tn-offset than return-pc. One of ;; the comments in known-return indicate that this is the case ;; (in that it will be in its save location), but we may wish ;; to assert that (in either the weaker or stronger forms). - ;; Should this ever not be the case, we should load old-fp - ;; into a temp reg while we fix the stack. - ;; Drop stack above return-pc + ;; Should this ever not be the case, we should load old-fp into + ;; a temp reg while we fix the stack. + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "ocfp not on stack in standard save location?")) + (unless (and (sc-is return-pc sap-stack) + (= (tn-offset return-pc) return-pc-save-offset)) + (error "return-pc not on stack in standard save location?")) + ;; Drop stack above old-fp (inst lea esp-tn (make-ea :dword :base ebp-tn - :disp (frame-byte-offset (tn-offset return-pc)))) + :disp (frame-byte-offset (tn-offset old-fp)))) ;; Set single-value return flag (inst clc) ;; Restore the old frame pointer - (move ebp-tn old-fp) + (inst pop ebp-tn) ;; And return, dropping the rest of the stack as we go. - (inst ret (* (tn-offset return-pc) n-word-bytes)))))) + (inst ret (* (tn-offset return-pc) n-word-bytes))) + (t + (error "return pc not on stack"))))) ;;; Do unknown-values return of a fixed (other than 1) number of ;;; values. The VALUES are required to be set up in the standard @@ -1053,18 +1000,27 @@ :from :eval) a2) (:generator 6 + (unless (and (sc-is old-fp control-stack) + (= (tn-offset old-fp) ocfp-save-offset)) + (error "ocfp not on stack in standard save location?")) + (unless (and (sc-is return-pc sap-stack) + (= (tn-offset return-pc) return-pc-save-offset)) + (error "return-pc not on stack in standard save location?")) + (trace-table-entry trace-table-fun-epilogue) ;; Establish the values pointer and values count. (move ebx ebp-tn) (if (zerop nvals) - (inst xor ecx ecx) ; smaller - (inst mov ecx (fixnumize nvals))) + (inst xor ecx ecx) ; smaller + (inst mov ecx (fixnumize nvals))) ;; Restore the frame pointer. (move ebp-tn old-fp) ;; Clear as much of the stack as possible, but not past the return ;; address. - (inst lea esp-tn (make-ea :dword :base ebx - :disp (- (* (max nvals 2) n-word-bytes)))) + (inst lea esp-tn + (make-ea :dword :base ebx + :disp (frame-byte-offset (max (1- nvals) + return-pc-save-offset)))) ;; Pre-default any argument register that need it. (when (< nvals register-arg-count) (let* ((arg-tns (nthcdr nvals (list a0 a1 a2))) @@ -1072,21 +1028,23 @@ (inst mov first nil-value) (dolist (tn (cdr arg-tns)) (inst mov tn first)))) - ;; Set multi-value return flag. + ;; Set the multiple value return flag. (inst stc) ;; And away we go. Except that return-pc is still on the ;; stack and we've changed the stack pointer. So we have to ;; tell it to index off of EBX instead of EBP. (cond ((zerop nvals) - ;; Return popping the return address and the OCFP. - (inst ret n-word-bytes)) + ;; Return popping the return address and what's earlier in + ;; the frame. + (inst ret (* return-pc-save-offset n-word-bytes))) ((= nvals 1) - ;; Return popping the return, leaving 1 slot. Can this - ;; happen, or is a single value return handled elsewhere? - (inst ret)) + ;; This is handled in RETURN-SINGLE. + (error "nvalues is 1")) (t - (inst jmp (make-ea :dword :base ebx - :disp (frame-byte-offset (tn-offset return-pc)))))) + ;; Thou shalt not JMP unto thy return address. + (inst push (make-ea :dword :base ebx + :disp (frame-byte-offset (tn-offset return-pc)))) + (inst ret))) (trace-table-entry trace-table-normal))) @@ -1133,11 +1091,11 @@ (move old-fp-temp old-fp) (move esp-tn ebp-tn) (move ebp-tn old-fp-temp) - ;; Set the single-value return flag. + ;; clear the multiple-value return flag (inst clc) ;; Out of here. - (inst jmp eax) - + (inst push eax) + (inst ret) ;; Nope, not the single case. Jump to the assembly routine. (emit-label not-single))) (move esi vals) @@ -1191,10 +1149,10 @@ (:generator 20 ;; Avoid the copy if there are no more args. (cond ((zerop fixed) - (inst jecxz just-alloc-frame)) + (inst jecxz JUST-ALLOC-FRAME)) (t (inst cmp ecx-tn (fixnumize fixed)) - (inst jmp :be just-alloc-frame))) + (inst jmp :be JUST-ALLOC-FRAME))) ;; Allocate the space on the stack. ;; stack = ebp - (max 3 frame-size) - (nargs - fixed) @@ -1217,7 +1175,7 @@ ;; Number to copy = nargs-3 (inst sub ecx-tn (fixnumize register-arg-count)) ;; Everything of interest in registers. - (inst jmp :be do-regs)) + (inst jmp :be DO-REGS)) (t ;; Number to copy = nargs-fixed (inst sub ecx-tn (fixnumize fixed)))) @@ -1268,7 +1226,7 @@ ( nil ) ;; Store it relative to ebp (inst mov (make-ea :dword :base ebp-tn - :disp (- (* 4 + :disp (- (* n-word-bytes (+ 1 (- i fixed) (max 3 (sb-allocated-size 'stack)))))) (nth i *register-arg-tns*)) @@ -1281,9 +1239,9 @@ (if (zerop i) (inst test ecx-tn ecx-tn) (inst cmp ecx-tn (fixnumize i))) - (inst jmp :eq done))) + (inst jmp :eq DONE))) - (inst jmp done) + (inst jmp DONE) JUST-ALLOC-FRAME (inst lea esp-tn @@ -1352,12 +1310,12 @@ (inst lea dst (make-ea :dword :base ecx :index ecx)) (maybe-pseudo-atomic stack-allocate-p (allocation dst dst node stack-allocate-p list-pointer-lowtag) - (inst shr ecx 2) + (inst shr ecx (1- n-lowtag-bits)) ;; Set decrement mode (successive args at lower addresses) (inst std) ;; Set up the result. (move result dst) - ;; Jump into the middle of the loop, 'cause that's were we want + ;; Jump into the middle of the loop, 'cause that's where we want ;; to start. (inst jmp enter) (emit-label loop) @@ -1403,7 +1361,7 @@ ;; Point to the first more-arg, not above it. (inst lea context (make-ea :dword :base esp-tn :index count :scale 1 - :disp (- (+ (fixnumize fixed) 4)))) + :disp (- (+ (fixnumize fixed) n-word-bytes)))) (unless (zerop fixed) (inst sub count (fixnumize fixed))))) diff --git a/src/compiler/x86/insts.lisp b/src/compiler/x86/insts.lisp index 39a7d09..794c90d 100644 --- a/src/compiler/x86/insts.lisp +++ b/src/compiler/x86/insts.lisp @@ -1796,7 +1796,7 @@ y)) ((sc-is x control-stack) (inst test (make-ea :byte :base ebp-tn - :disp (- (* (1+ offset) n-word-bytes))) + :disp (frame-byte-offset offset)) y)) (t (inst test x y))))) @@ -2078,7 +2078,7 @@ (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16)) '(:name :tab imm)) (:emitter - (cond (stack-delta + (cond ((and stack-delta (not (zerop stack-delta))) (emit-byte segment #b11000010) (emit-word segment stack-delta)) (t diff --git a/src/compiler/x86/nlx.lisp b/src/compiler/x86/nlx.lisp index 37b752a..7a910a1 100644 --- a/src/compiler/x86/nlx.lisp +++ b/src/compiler/x86/nlx.lisp @@ -293,7 +293,7 @@ (make-ea :dword :base ebp-tn :disp (* -3 n-word-bytes))) ;; Push the return-pc so it looks like we just called. - (pushw ebp-tn -2) + (pushw ebp-tn (frame-word-offset return-pc-save-offset)) ;; Call it (inst jmp (make-ea :dword :base block diff --git a/src/compiler/x86/static-fn.lisp b/src/compiler/x86/static-fn.lisp index cf8fdc5..675d204 100644 --- a/src/compiler/x86/static-fn.lisp +++ b/src/compiler/x86/static-fn.lisp @@ -16,7 +16,7 @@ (:policy :safe) (:variant-vars function) (:vop-var vop) - (:node-var node) + ;;(:node-var node) (:temporary (:sc unsigned-reg :offset ebx-offset :from (:eval 0) :to (:eval 2)) ebx) (:temporary (:sc unsigned-reg :offset ecx-offset @@ -78,15 +78,17 @@ ;; If speed not more important than size, duplicate the ;; effect of the ENTER with discrete instructions. Takes ;; 2+1+3+2=8 bytes as opposed to 4+3=7 bytes. - (cond ((policy node (>= speed space)) + (cond (t ;(policy node (>= speed space)) (inst mov ebx esp-tn) + ;; Dummy for return address + (inst push ebp-tn) ;; Save the old-fp (inst push ebp-tn) ;; Ensure that at least three slots are available; one ;; above, two more needed. - (inst sub esp-tn (fixnumize 2)) + (inst sub esp-tn (fixnumize 1)) (inst mov ebp-tn ebx)) - (t + #+(or) (t (inst enter (fixnumize 2)) ;; The enter instruction pushes EBP and then copies ;; ESP into EBP. We want the new EBP to be the diff --git a/src/compiler/x86/vm.lisp b/src/compiler/x86/vm.lisp index d0023c6..7b1f486 100644 --- a/src/compiler/x86/vm.lisp +++ b/src/compiler/x86/vm.lisp @@ -422,8 +422,8 @@ ;;;; miscellaneous function call parameters ;;; offsets of special stack frame locations -(def!constant ocfp-save-offset 0) -(def!constant return-pc-save-offset 1) +(def!constant ocfp-save-offset 1) +(def!constant return-pc-save-offset 0) (declaim (inline frame-word-offset)) (defun frame-word-offset (index) diff --git a/src/runtime/backtrace.c b/src/runtime/backtrace.c index b6dfc44..ea53069 100644 --- a/src/runtime/backtrace.c +++ b/src/runtime/backtrace.c @@ -335,8 +335,8 @@ x86_call_context (void *fp, void **ra, void **ocfp) c_ocfp = *((void **) fp); c_ra = *((void **) fp + 1); - lisp_ocfp = *((void **) fp - 1); - lisp_ra = *((void **) fp - 2); + lisp_ocfp = *((void **) fp - 2); + lisp_ra = *((void **) fp - 1); lisp_valid_p = (lisp_ocfp > fp && stack_pointer_p(lisp_ocfp) diff --git a/src/runtime/x86-64-assem.S b/src/runtime/x86-64-assem.S index fb12a06..64f04ee 100644 --- a/src/runtime/x86-64-assem.S +++ b/src/runtime/x86-64-assem.S @@ -210,8 +210,9 @@ Ldone: /* Alloc new frame. */ mov %rsp,%rbx # The current sp marks start of new frame. + push %rbp # dummy for return address push %rbp # fp in save location S0 - sub $16,%rsp # Ensure 3 slots are allocated, one above. + sub $8,%rsp # Ensure 3 slots are allocated, two above. mov %rbx,%rbp # Switch to new frame. Lcall: diff --git a/src/runtime/x86-assem.S b/src/runtime/x86-assem.S index 8101da2..04c6031 100644 --- a/src/runtime/x86-assem.S +++ b/src/runtime/x86-assem.S @@ -273,8 +273,9 @@ Ldone: /* Alloc new frame. */ mov %esp,%ebx # The current sp marks start of new frame. - push %ebp # fp in save location S0 - sub $8,%esp # Ensure 3 slots are allocated, one above. + push %ebp # dummy for return address + push %ebp # fp in save location S1 + sub $4,%esp # Ensure 3 slots are allocated, two above. mov %ebx,%ebp # Switch to new frame. call *CLOSURE_FUN_OFFSET(%eax) diff --git a/version.lisp-expr b/version.lisp-expr index 19cf88c..091d37f 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.27.10" +"1.0.27.11" -- 1.7.10.4