From: Nikodemus Siivola Date: Fri, 14 Mar 2008 20:10:17 +0000 (+0000) Subject: 1.0.15.33: implement SB-VM::WITH-CYCLE-COUNTER for x86 and x86-64 X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=f866d5d986cc920a8823549df5045b8182e7d92d;p=sbcl.git 1.0.15.33: implement SB-VM::WITH-CYCLE-COUNTER for x86 and x86-64 * For potential future exporting from SB-SYS. * Thanks to Paul Khuong and the CMUCL team. --- diff --git a/src/code/unix.lisp b/src/code/unix.lisp index a26a951..f5f28ca 100644 --- a/src/code/unix.lisp +++ b/src/code/unix.lisp @@ -113,9 +113,9 @@ SYSCALL-FORM. Repeat evaluation of SYSCALL-FORM if it is interrupted." #!-win32 (define-alien-routine ("getenv" posix-getenv) c-string - "Return the \"value\" part of the environment string \"name=value\" which - corresponds to NAME, or NIL if there is none." - (name c-string)) + "Return the \"value\" part of the environment string \"name=value\" which +corresponds to NAME, or NIL if there is none." + (name c-string)) ;;; from stdio.h diff --git a/src/compiler/x86-64/insts.lisp b/src/compiler/x86-64/insts.lisp index 3f0c6f0..75cf555 100644 --- a/src/compiler/x86-64/insts.lisp +++ b/src/compiler/x86-64/insts.lisp @@ -542,6 +542,10 @@ (accum :type 'accum) (imm)) +(sb!disassem:define-instruction-format (two-bytes 16 + :default-printer '(:name)) + (op :fields (list (byte 8 0) (byte 8 8)))) + ;;; A one-byte instruction with a #x66 prefix, used to indicate an ;;; operand size of :word. (sb!disassem:define-instruction-format (x66-byte 16 @@ -2967,3 +2971,17 @@ (emit-byte segment #x0f) (emit-byte segment #xae) (emit-ea segment dst 3))) + +;;;; Miscellany + +(define-instruction cpuid (segment) + (:printer two-bytes ((op '(#b00001111 #b10100010)))) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment #b10100010))) + +(define-instruction rdtsc (segment) + (:printer two-bytes ((op '(#b00001111 #b00110001)))) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment #b00110001))) diff --git a/src/compiler/x86-64/system.lisp b/src/compiler/x86-64/system.lisp index 86d80df..88f9997 100644 --- a/src/compiler/x86-64/system.lisp +++ b/src/compiler/x86-64/system.lisp @@ -290,7 +290,70 @@ (note-next-instruction vop :internal-error) (inst wait))) -;;;; dynamic vop count collection support +;;;; Miscellany + +;;; the RDTSC instruction (present on Pentium processors and +;;; successors) allows you to access the time-stamp counter, a 64-bit +;;; model-specific register that counts executed cycles. The +;;; instruction returns the low cycle count in EAX and high cycle +;;; count in EDX. +;;; +;;; In order to obtain more significant results on out-of-order +;;; processors (such as the Pentium II and later), we issue a +;;; serializing CPUID instruction before and after reading the cycle +;;; counter. This instruction is used for its side effect of emptying +;;; the processor pipeline, to ensure that the RDTSC instruction is +;;; executed once all pending instructions have been completed and +;;; before any others. CPUID writes to EBX and ECX in addition to EAX +;;; and EDX, so they need to be added as temporaries. +;;; +;;; Note that cache effects mean that the cycle count can vary for +;;; different executions of the same code (it counts cycles, not +;;; retired instructions). Furthermore, the results are per-processor +;;; and not per-process, so are unreliable on multiprocessor machines +;;; where processes can migrate between processors. +;;; +;;; This method of obtaining a cycle count has the advantage of being +;;; very fast (around 20 cycles), and of not requiring a system call. +;;; However, you need to know your processor's clock speed to translate +;;; this into real execution time. +;;; +;;; FIXME: This about the WITH-CYCLE-COUNTER interface a bit, and then +;;; perhaps export it from SB-SYS. + +(defknown %read-cycle-counter () (values (unsigned-byte 32) (unsigned-byte 32)) ()) + +(define-vop (%read-cycle-counter) + (:policy :fast-safe) + (:translate %read-cycle-counter) + (:temporary (:sc unsigned-reg :offset eax-offset :target lo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :target hi) edx) + (:temporary (:sc unsigned-reg :offset ebx-offset) ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) + (:ignore ebx ecx) + (:results (hi :scs (unsigned-reg)) + (lo :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 5 + (zeroize eax) + (inst cpuid) + (inst rdtsc) + (inst push edx) + (inst push eax) + (zeroize eax) + (inst cpuid) + (inst pop lo) + (inst pop hi))) + +(defmacro with-cycle-counter (&body body) + "Returns the primary value of BODY as the primary value, and the +number of CPU cycles elapsed as secondary value. EXPERIMENTAL." + (with-unique-names (hi0 hi1 lo0 lo1) + `(multiple-value-bind (,hi0 ,lo0) (%read-cycle-counter) + (values (locally ,@body) + (multiple-value-bind (,hi1 ,lo1) (%read-cycle-counter) + (+ (ash (- ,hi1 ,hi0) 32) + (- ,lo1 ,lo0))))))) #!+sb-dyncount (define-vop (count-me) diff --git a/src/compiler/x86/insts.lisp b/src/compiler/x86/insts.lisp index ce271e6..c51745c 100644 --- a/src/compiler/x86/insts.lisp +++ b/src/compiler/x86/insts.lisp @@ -378,6 +378,10 @@ (accum :type 'accum) (imm)) +(sb!disassem:define-instruction-format (two-bytes 16 + :default-printer '(:name)) + (op :fields (list (byte 8 0) (byte 8 8)))) + ;;; Same as simple, but with direction bit (sb!disassem:define-instruction-format (simple-dir 8 :include 'simple) (op :field (byte 6 2)) @@ -2731,3 +2735,17 @@ (:emitter (emit-byte segment #b11011001) (emit-byte segment #b11101101))) + +;;;; Miscellany + +(define-instruction cpuid (segment) + (:printer two-bytes ((op '(#b00001111 #b10100010)))) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment #b10100010))) + +(define-instruction rdtsc (segment) + (:printer two-bytes ((op '(#b00001111 #b00110001)))) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment #b00110001))) diff --git a/src/compiler/x86/system.lisp b/src/compiler/x86/system.lisp index 597e3c2..ca691a8 100644 --- a/src/compiler/x86/system.lisp +++ b/src/compiler/x86/system.lisp @@ -284,7 +284,70 @@ (note-next-instruction vop :internal-error) (inst wait))) -;;;; dynamic vop count collection support +;;;; Miscellany + +;;; the RDTSC instruction (present on Pentium processors and +;;; successors) allows you to access the time-stamp counter, a 64-bit +;;; model-specific register that counts executed cycles. The +;;; instruction returns the low cycle count in EAX and high cycle +;;; count in EDX. +;;; +;;; In order to obtain more significant results on out-of-order +;;; processors (such as the Pentium II and later), we issue a +;;; serializing CPUID instruction before reading the cycle counter. +;;; This instruction is used for its side effect of emptying the +;;; processor pipeline, to ensure that the RDTSC instruction is +;;; executed once all pending instructions have been completed. +;;; CPUID writes to EBX and ECX in addition to EAX and EDX, so +;;; they need to be added as temporaries. +;;; +;;; Note that cache effects mean that the cycle count can vary for +;;; different executions of the same code (it counts cycles, not +;;; retired instructions). Furthermore, the results are per-processor +;;; and not per-process, so are unreliable on multiprocessor machines +;;; where processes can migrate between processors. +;;; +;;; This method of obtaining a cycle count has the advantage of being +;;; very fast (around 20 cycles), and of not requiring a system call. +;;; However, you need to know your processor's clock speed to translate +;;; this into real execution time. +;;; +;;; FIXME: This about the WITH-CYCLE-COUNTER interface a bit, and then +;;; perhaps export it from SB-SYS. + +(defknown %read-cycle-counter () (values (unsigned-byte 32) (unsigned-byte 32)) ()) + +(define-vop (%read-cycle-counter) + (:policy :fast-safe) + (:translate %read-cycle-counter) + (:temporary (:sc unsigned-reg :offset eax-offset :target lo) eax) + (:temporary (:sc unsigned-reg :offset edx-offset :target hi) edx) + (:temporary (:sc unsigned-reg :offset ebx-offset) ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) + (:ignore ebx ecx) + (:results (hi :scs (unsigned-reg)) + (lo :scs (unsigned-reg))) + (:result-types unsigned-num unsigned-num) + (:generator 5 + (inst xor eax eax) + (inst cpuid) + (inst rdtsc) + (inst push edx) + (inst push eax) + (inst xor eax eax) + (inst cpuid) + (inst pop lo) + (inst pop hi))) + +(defmacro with-cycle-counter (&body body) + "Returns the primary value of BODY as the primary value, and the +number of CPU cycles elapsed as secondary value. EXPERIMENTAL." + (with-unique-names (hi0 hi1 lo0 lo1) + `(multiple-value-bind (,hi0 ,lo0) (%read-cycle-counter) + (values (locally ,@body) + (multiple-value-bind (,hi1 ,lo1) (%read-cycle-counter) + (+ (ash (- ,hi1 ,hi0) 32) + (- ,lo1 ,lo0))))))) #!+sb-dyncount (define-vop (count-me) diff --git a/version.lisp-expr b/version.lisp-expr index 7e0d118..c23cce7 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.15.32" +"1.0.15.33"