* For potential future exporting from SB-SYS.
* Thanks to Paul Khuong and the CMUCL team.
#!-win32
(define-alien-routine ("getenv" posix-getenv) c-string
- "Return the \"value\" part of the environment string \"name=value\" which
- corresponds to NAME, or NIL if there is none."
- (name c-string))
+ "Return the \"value\" part of the environment string \"name=value\" which
+corresponds to NAME, or NIL if there is none."
+ (name c-string))
\f
;;; from stdio.h
(accum :type 'accum)
(imm))
+(sb!disassem:define-instruction-format (two-bytes 16
+ :default-printer '(:name))
+ (op :fields (list (byte 8 0) (byte 8 8))))
+
;;; A one-byte instruction with a #x66 prefix, used to indicate an
;;; operand size of :word.
(sb!disassem:define-instruction-format (x66-byte 16
(emit-byte segment #x0f)
(emit-byte segment #xae)
(emit-ea segment dst 3)))
+
+;;;; Miscellany
+
+(define-instruction cpuid (segment)
+ (:printer two-bytes ((op '(#b00001111 #b10100010))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b10100010)))
+
+(define-instruction rdtsc (segment)
+ (:printer two-bytes ((op '(#b00001111 #b00110001))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b00110001)))
(note-next-instruction vop :internal-error)
(inst wait)))
\f
-;;;; dynamic vop count collection support
+;;;; Miscellany
+
+;;; the RDTSC instruction (present on Pentium processors and
+;;; successors) allows you to access the time-stamp counter, a 64-bit
+;;; model-specific register that counts executed cycles. The
+;;; instruction returns the low cycle count in EAX and high cycle
+;;; count in EDX.
+;;;
+;;; In order to obtain more significant results on out-of-order
+;;; processors (such as the Pentium II and later), we issue a
+;;; serializing CPUID instruction before and after reading the cycle
+;;; counter. This instruction is used for its side effect of emptying
+;;; the processor pipeline, to ensure that the RDTSC instruction is
+;;; executed once all pending instructions have been completed and
+;;; before any others. CPUID writes to EBX and ECX in addition to EAX
+;;; and EDX, so they need to be added as temporaries.
+;;;
+;;; Note that cache effects mean that the cycle count can vary for
+;;; different executions of the same code (it counts cycles, not
+;;; retired instructions). Furthermore, the results are per-processor
+;;; and not per-process, so are unreliable on multiprocessor machines
+;;; where processes can migrate between processors.
+;;;
+;;; This method of obtaining a cycle count has the advantage of being
+;;; very fast (around 20 cycles), and of not requiring a system call.
+;;; However, you need to know your processor's clock speed to translate
+;;; this into real execution time.
+;;;
+;;; FIXME: This about the WITH-CYCLE-COUNTER interface a bit, and then
+;;; perhaps export it from SB-SYS.
+
+(defknown %read-cycle-counter () (values (unsigned-byte 32) (unsigned-byte 32)) ())
+
+(define-vop (%read-cycle-counter)
+ (:policy :fast-safe)
+ (:translate %read-cycle-counter)
+ (:temporary (:sc unsigned-reg :offset eax-offset :target lo) eax)
+ (:temporary (:sc unsigned-reg :offset edx-offset :target hi) edx)
+ (:temporary (:sc unsigned-reg :offset ebx-offset) ebx)
+ (:temporary (:sc unsigned-reg :offset ecx-offset) ecx)
+ (:ignore ebx ecx)
+ (:results (hi :scs (unsigned-reg))
+ (lo :scs (unsigned-reg)))
+ (:result-types unsigned-num unsigned-num)
+ (:generator 5
+ (zeroize eax)
+ (inst cpuid)
+ (inst rdtsc)
+ (inst push edx)
+ (inst push eax)
+ (zeroize eax)
+ (inst cpuid)
+ (inst pop lo)
+ (inst pop hi)))
+
+(defmacro with-cycle-counter (&body body)
+ "Returns the primary value of BODY as the primary value, and the
+number of CPU cycles elapsed as secondary value. EXPERIMENTAL."
+ (with-unique-names (hi0 hi1 lo0 lo1)
+ `(multiple-value-bind (,hi0 ,lo0) (%read-cycle-counter)
+ (values (locally ,@body)
+ (multiple-value-bind (,hi1 ,lo1) (%read-cycle-counter)
+ (+ (ash (- ,hi1 ,hi0) 32)
+ (- ,lo1 ,lo0)))))))
#!+sb-dyncount
(define-vop (count-me)
(accum :type 'accum)
(imm))
+(sb!disassem:define-instruction-format (two-bytes 16
+ :default-printer '(:name))
+ (op :fields (list (byte 8 0) (byte 8 8))))
+
;;; Same as simple, but with direction bit
(sb!disassem:define-instruction-format (simple-dir 8 :include 'simple)
(op :field (byte 6 2))
(:emitter
(emit-byte segment #b11011001)
(emit-byte segment #b11101101)))
+
+;;;; Miscellany
+
+(define-instruction cpuid (segment)
+ (:printer two-bytes ((op '(#b00001111 #b10100010))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b10100010)))
+
+(define-instruction rdtsc (segment)
+ (:printer two-bytes ((op '(#b00001111 #b00110001))))
+ (:emitter
+ (emit-byte segment #b00001111)
+ (emit-byte segment #b00110001)))
(note-next-instruction vop :internal-error)
(inst wait)))
\f
-;;;; dynamic vop count collection support
+;;;; Miscellany
+
+;;; the RDTSC instruction (present on Pentium processors and
+;;; successors) allows you to access the time-stamp counter, a 64-bit
+;;; model-specific register that counts executed cycles. The
+;;; instruction returns the low cycle count in EAX and high cycle
+;;; count in EDX.
+;;;
+;;; In order to obtain more significant results on out-of-order
+;;; processors (such as the Pentium II and later), we issue a
+;;; serializing CPUID instruction before reading the cycle counter.
+;;; This instruction is used for its side effect of emptying the
+;;; processor pipeline, to ensure that the RDTSC instruction is
+;;; executed once all pending instructions have been completed.
+;;; CPUID writes to EBX and ECX in addition to EAX and EDX, so
+;;; they need to be added as temporaries.
+;;;
+;;; Note that cache effects mean that the cycle count can vary for
+;;; different executions of the same code (it counts cycles, not
+;;; retired instructions). Furthermore, the results are per-processor
+;;; and not per-process, so are unreliable on multiprocessor machines
+;;; where processes can migrate between processors.
+;;;
+;;; This method of obtaining a cycle count has the advantage of being
+;;; very fast (around 20 cycles), and of not requiring a system call.
+;;; However, you need to know your processor's clock speed to translate
+;;; this into real execution time.
+;;;
+;;; FIXME: This about the WITH-CYCLE-COUNTER interface a bit, and then
+;;; perhaps export it from SB-SYS.
+
+(defknown %read-cycle-counter () (values (unsigned-byte 32) (unsigned-byte 32)) ())
+
+(define-vop (%read-cycle-counter)
+ (:policy :fast-safe)
+ (:translate %read-cycle-counter)
+ (:temporary (:sc unsigned-reg :offset eax-offset :target lo) eax)
+ (:temporary (:sc unsigned-reg :offset edx-offset :target hi) edx)
+ (:temporary (:sc unsigned-reg :offset ebx-offset) ebx)
+ (:temporary (:sc unsigned-reg :offset ecx-offset) ecx)
+ (:ignore ebx ecx)
+ (:results (hi :scs (unsigned-reg))
+ (lo :scs (unsigned-reg)))
+ (:result-types unsigned-num unsigned-num)
+ (:generator 5
+ (inst xor eax eax)
+ (inst cpuid)
+ (inst rdtsc)
+ (inst push edx)
+ (inst push eax)
+ (inst xor eax eax)
+ (inst cpuid)
+ (inst pop lo)
+ (inst pop hi)))
+
+(defmacro with-cycle-counter (&body body)
+ "Returns the primary value of BODY as the primary value, and the
+number of CPU cycles elapsed as secondary value. EXPERIMENTAL."
+ (with-unique-names (hi0 hi1 lo0 lo1)
+ `(multiple-value-bind (,hi0 ,lo0) (%read-cycle-counter)
+ (values (locally ,@body)
+ (multiple-value-bind (,hi1 ,lo1) (%read-cycle-counter)
+ (+ (ash (- ,hi1 ,hi0) 32)
+ (- ,lo1 ,lo0)))))))
#!+sb-dyncount
(define-vop (count-me)
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.15.32"
+"1.0.15.33"