From: William Harold Newman Date: Wed, 28 Mar 2007 17:59:16 +0000 (+0000) Subject: 1.0.4.8: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=1cfea5d9f744f2d10bcf69c3eeb2d99a3c5ad8bc;p=sbcl.git 1.0.4.8: "What's old is new again" or "what was he thinking?" At least in the wisdom of hindsight I can see that my new RANDOM is not ready for release, so I have reverted it. This version should be essentially 1.0.4.5 with a new version number. (It also has an extra, orphaned file in src/compiler/integer-tran.lisp. When I revisit the problem later, that should be cleaned up one way or another, meanwhile I don't feel like deleting it and then likely recreating it later.) Sorry about the confusion.:-( --- diff --git a/NEWS b/NEWS index 99c8f40..bbe8edc 100644 --- a/NEWS +++ b/NEWS @@ -6,19 +6,13 @@ changes in sbcl-1.0.5 relative to sbcl-1.0.4: * minor incompatible change: changed experimental JOIN-THREAD interface * documentation: the manual now lists reader and writer methods in class slot documentation sections. (thanks to Richard M Kreuter) - * documentation: unwinding from asynchronous events has been + * documentation: unwinding from asyncronous events has been documented as unsafe. * documentation: SB-SYS:WITHOUT-GCING has been documented as unsafe in multithreaded application code. * bug fix: GC deadlocks from asynchronous interrupts has been fixed by disabling interrupts for the duration of any SB-SYS:WITHOUT-GCING section. - * bug fix: RANDOM of large INTEGER is no longer slightly biased. (It - used to be biased by as much as about 1/2000; many algorithms aren't - sensitive to that, but Metropolis Monte Carlo algorithms are.) This - involved a sizable rewrite, so if you think there's a new bug or - performance problem in the code, you're probably right, please - report it. changes in sbcl-1.0.4 relative to sbcl-1.0.3: * new platform: experimental support for x86-64/darwin (MacOS). diff --git a/build-order.lisp-expr b/build-order.lisp-expr index 29cf8ca..9987d27 100644 --- a/build-order.lisp-expr +++ b/build-order.lisp-expr @@ -514,7 +514,6 @@ ("src/compiler/typetran") ("src/compiler/generic/vm-typetran") ("src/compiler/float-tran") - ("src/compiler/integer-tran") ("src/compiler/saptran") ("src/compiler/srctran") ("src/compiler/generic/vm-tran") diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 5f49beb..fe1ab00 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1541,13 +1541,10 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "STANDARD-CLASSOID" "CLASSOID-OF" "MAKE-STANDARD-CLASSOID" "CLASSOID-CELL-TYPEP" "FIND-CLASSOID-CELL" "EXTRACT-FUN-TYPE" - "%RANDOM-WORD" "%RANDOM32" - "%INCLUSIVE-RANDOM-FIXNUM" "%INCLUSIVE-RANDOM-INTEGER" - "%INCLUSIVE-RANDOM-INTEGER-ACCEPT-REJECT" "%RANDOM-BITS" "%RANDOM-DOUBLE-FLOAT" #!+long-float "%RANDOM-LONG-FLOAT" "%RANDOM-SINGLE-FLOAT" "STATIC-CLASSOID" - "%FUNCALLABLE-INSTANCE-INFO" + "%FUNCALLABLE-INSTANCE-INFO" "RANDOM-CHUNK" "BIG-RANDOM-CHUNK" "LAYOUT-CLOS-HASH-MAX" "CLASSOID-CELL-NAME" "BUILT-IN-CLASSOID-DIRECT-SUPERCLASSES" "BUILT-IN-CLASSOID-TRANSLATION" "RANDOM-LAYOUT-CLOS-HASH" @@ -1556,7 +1553,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "%SET-FUNCALLABLE-INSTANCE-LAYOUT" "BASIC-STRUCTURE-CLASSOID" "CLASSOID-CELL-CLASSOID" "REGISTER-LAYOUT" - "FUNCALLABLE-INSTANCE" + "FUNCALLABLE-INSTANCE" "RANDOM-FIXNUM-MAX" "MAKE-STATIC-CLASSOID" "INSTANCE-LAMBDA" "%MAKE-SYMBOL" "%FUNCALLABLE-INSTANCE-FUNCTION" "SYMBOL-HASH" diff --git a/src/code/random.lisp b/src/code/random.lisp index 2ff7336..373a3b2 100644 --- a/src/code/random.lisp +++ b/src/code/random.lisp @@ -9,9 +9,21 @@ (in-package "SB!KERNEL") -;;; our implementation of the RANDOM-STATE type specified by ANSI CL +;;; the size of the chunks returned by RANDOM-CHUNK +(def!constant random-chunk-length 32) + +;;; the amount that we overlap chunks by when building a large integer +;;; to make up for the loss of randomness in the low bits +(def!constant random-integer-overlap 3) + +;;; extra bits of randomness that we generate before taking the value MOD the +;;; limit, to avoid loss of randomness near the limit +(def!constant random-integer-extra-bits 10) + +;;; the largest fixnum we can compute from one chunk of bits +(def!constant random-fixnum-max + (1- (ash 1 (- random-chunk-length random-integer-extra-bits)))) + (sb!xc:defstruct (random-state (:constructor %make-random-state) - ;; Shallow copy would be wrong: we must - ;; effectively COPY-SEQ the STATE slot. - (:copier nil)) + (:copier nil)) ; since shallow copy is wrong (state (init-random-state) :type (simple-array (unsigned-byte 32) (627)))) diff --git a/src/code/target-random.lisp b/src/code/target-random.lisp index 21668a4..f0b740f 100644 --- a/src/code/target-random.lisp +++ b/src/code/target-random.lisp @@ -69,7 +69,7 @@ (defun make-random-state (&optional state) #!+sb-doc - "Make a RANDOM-STATE object. If STATE is not supplied, return a copy + "Make a random state object. If STATE is not supplied, return a copy of the default random state. If STATE is a random state, then return a copy of it. If STATE is T then return a random state generated from the universal time." @@ -102,8 +102,7 @@ ;;; This function generates a 32bit integer between 0 and #xffffffff ;;; inclusive. -(defconstant n-random-mt19937-bits 32) -(declaim (inline %random32)) +#!-sb-fluid (declaim (inline random-chunk)) ;;; portable implementation (defconstant mt19937-n 624) (defconstant mt19937-m 397) @@ -139,7 +138,7 @@ (ash y -1) (aref state (logand y 1))))) (values)) #!-x86 -(defun %random32 (state) +(defun random-chunk (state) (declare (type random-state state)) (let* ((state (random-state-state state)) (k (aref state 2))) @@ -162,29 +161,16 @@ ;;; My inclination is to get rid of the nonportable implementation ;;; unless the performance difference is just enormous. #!+x86 -(defun %random32 (state) +(defun random-chunk (state) (declare (type random-state state)) (sb!vm::random-mt19937 (random-state-state state))) -(declaim (inline %random-word)) -(defun %random-word (state) - ;; KLUDGE: This #.(ECASE ...) is not the most flexible and elegant - ;; construct one could imagine. It is intended as a quick fix to stand - ;; in for The Right Thing which can't be coded so quickly. - ;; - ;; The Right Thing: The Mersenne Twister has been generalized to 64 - ;; bits, and seems likely to be generalized to any future common CPU - ;; word width as well. Thus, it should be straightforward to - ;; implement this as "Return one sample from the MT variant - ;; corresponding to SB!VM:N-WORD-BITS." - ;; - ;; Meanwhile: Mock that up by pasting together as many samples from - ;; the 32-bit Mersenne Twister as necessary. - #.(ecase sb!vm:n-word-bits - (32 '(%random32 state)) - (64 '(logior - (%random32 state) - (ash (%random32 state) 32))))) +#!-sb-fluid (declaim (inline big-random-chunk)) +(defun big-random-chunk (state) + (declare (type random-state state)) + (logand (1- (expt 2 64)) + (logior (ash (random-chunk state) 32) + (random-chunk state)))) ;;; Handle the single or double float case of RANDOM. We generate a ;;; float between 0.0 and 1.0 by clobbering the significand of 1.0 @@ -197,16 +183,10 @@ (defun %random-single-float (arg state) (declare (type (single-float (0f0)) arg) (type random-state state)) - ;; KLUDGE: The hardwired-to-32 hackery here could be replaced by a - ;; call to %RANDOM-BITS if there were sufficiently smart - ;; DEFTRANSFORMs for it, but in sbcl-1.0.4.epsilon it looks as - ;; though it would be a performance disaster. - (aver (<= sb!vm:single-float-digits 32)) (* arg (- (make-single-float - (dpb (ash - (%random32 state) - (- sb!vm:single-float-digits 32)) + (dpb (ash (random-chunk state) + (- sb!vm:single-float-digits random-chunk-length)) sb!vm:single-float-significand-byte (single-float-bits 1.0))) 1.0))) @@ -214,21 +194,25 @@ (double-float 0d0)) %random-double-float)) +;;; 32-bit version +#!+nil +(defun %random-double-float (arg state) + (declare (type (double-float (0d0)) arg) + (type random-state state)) + (* (float (random-chunk state) 1d0) (/ 1d0 (expt 2 32)))) + ;;; 53-bit version #!-x86 (defun %random-double-float (arg state) (declare (type (double-float (0d0)) arg) (type random-state state)) - ;; KLUDGE: As in %RANDOM-SIMNGLE-FLOAT, as of sbcl-1.0.4.epsilon - ;; calling %RANDOM-BITS doesn't look reasonable, so we bang bits. - (aver (<= sb!vm:single-float-digits 32)) (* arg (- (sb!impl::make-double-float - (dpb (ash (%random32 state) - (- sb!vm:double-float-digits 64)) + (dpb (ash (random-chunk state) + (- sb!vm:double-float-digits random-chunk-length 32)) sb!vm:double-float-significand-byte (sb!impl::double-float-high-bits 1d0)) - (%random32 state)) + (random-chunk state)) 1d0))) ;;; using a faster inline VOP @@ -240,140 +224,34 @@ (* arg (- (sb!impl::make-double-float (dpb (ash (sb!vm::random-mt19937 state-vector) - (- sb!vm:double-float-digits - n-random-mt19937-bits + (- sb!vm:double-float-digits random-chunk-length sb!vm:n-word-bits)) sb!vm:double-float-significand-byte (sb!impl::double-float-high-bits 1d0)) (sb!vm::random-mt19937 state-vector)) 1d0)))) + ;;;; random integers -;;; a bitmask M wide enough that (= (LOGAND INCLUSIVE-LIMIT M) INCLUSIVE-LIMIT) -(declaim (inline %inclusive-random-integer-mask)) -(defun %inclusive-random-integer-mask (inclusive-limit) - (1- (ash 1 (integer-length inclusive-limit)))) - -;;; Transform a uniform sample from an at-least-as-large range into a -;;; random sample in [0,INCLUSIVE-LIMIT] range by throwing away -;;; too-big samples. -;;; -;;; Up through sbcl-1.0.4, the (RANDOM INTEGER) worked by taking (MOD -;;; RAW-MERSENNE-OUTPUT INTEGER). That introduces enough bias that it -;;; is unsuitable for some calculations (like the Metropolis Monte -;;; Carlo simulations that I, WHN, worked on in grad school): in the -;;; sbcl-1.0.4, the expectation value of a sample was (in the worst -;;; part of the range) more than 1.0001 times the ideal expectation -;;; value. Perhaps that was even ANSI-conformant, because ANSI says only -;;; An approximately uniform choice distribution is used. If -;;; LIMIT is an integer, each of the possible results occurs -;;; with (approximate) probability 1/LIMIT. -;;; But I (WHN again) said "yuck," so these days we try to get a -;;; truly uniform distribution by translating RAW-MERSENNE-OUTPUT to -;;; our output using the second method recommended by -;;; : -;;; In the rare case that the rounding-off error becomes a problem, -;;; obtain minimum integer n such that N<=2^n, generate integer -;;; random numbers, take the most significant n bits, and discard -;;; those more than or equal to N. -;;; (The first method recommended there differs from the sbcl-1.0.4 -;;; method: the recommended method gives slightly different weights -;;; to different integers, but distributes the overweighted integers -;;; evenly across the range of outputs so that any skew would be -;;; of order (/ MOST-POSITIVE-MACHINE-WORD), rather than the (/ 10000) -;;; or so achieved by sbcl-1.0.4. That skew would probably be -;;; acceptable --- it seems to be exactly the kind of deviation that -;;; might have been anticipated in the ANSI CL standard. However, that -;;; recommended calculation involves multiplication and division of -;;; two-machine-word bignums, which is hard for us to do efficiently -;;; here. Without special low-level hacking to support such short -;;; bignums without consing, the accept-reject solution is not only -;;; more exact, but also likely more efficient.) -(defmacro %inclusive-random-integer-accept-reject (raw-mersenne-output-expr - inclusive-limit) - (with-unique-names (raw-mersenne-output inclusive-limit-once) - `(loop - with ,inclusive-limit-once = ,inclusive-limit - for ,raw-mersenne-output = ,raw-mersenne-output-expr - until (<= ,raw-mersenne-output ,inclusive-limit-once) - finally (return ,raw-mersenne-output)))) - -;;; an UNSIGNED-BYTE of N-WORDS words sampled from the Mersenne twister -(declaim (inline %random-words)) -(defun %random-words (n-words state) - ;; KLUDGE: This algorithm will cons O(N^2) words when constructing - ;; an N-word result. To do better should be fundamentally easy, we - ;; just need to do some low-level hack preallocating the bignum and - ;; writing its words one by one. - ;; - ;; Note: The old sbcl-1.0.4 code did its analogous operation using a - ;; mysterious RANDOM-INTEGER-OVERLAP parameter, "the amount that we - ;; overlap chunks by when building a large integer to make up for - ;; the loss of randomness in the low bits." No such thing is called - ;; for on - ;; : - ;; they say there that it's OK just to concatenate words of twister - ;; output with no overlap. Thus, crossing our fingers and hoping that - ;; the previous RNG author didn't have some subtle reason to need - ;; RANDOM-INTEGER-OVERLAP that we know not of, we just concatenate - ;; chunks. - (loop repeat n-words - for result = 0 then (logior (ash result sb!vm:n-word-bits) - (%random-word state)) - finally (return result))) - -;;; an UNSIGNED-BYTE of N-BITS bits sampled from the Mersenne twister -(declaim (inline %random-bits)) -(defun %random-bits (n-bits state) - (multiple-value-bind (n-full-words n-extra-bits) - (floor n-bits sb!vm:n-word-bits) - (let ((full-chunks (%random-words n-full-words state))) - (if (zerop n-extra-bits) - full-chunks - (logior full-chunks - (ash (logand (%random-word state) - (1- (ash 1 n-extra-bits))) - (* n-full-words - sb!vm:n-word-bits))))))) - -;;; the guts of (RANDOM (1+ INCLUSIVE-LIMIT)) -(defun %inclusive-random-integer (inclusive-limit state) - (declare (optimize speed (space 1))) ; to ensure DEFTRANSFORM is enabled - (%inclusive-random-integer inclusive-limit state)) - -;;; the guts of RANDOM for the known-to-return-FIXNUM case -;;; -;;; We use INCLUSIVE-LIMIT instead of the (exclusive) LIMIT of RANDOM, -;;; because we want it to be a FIXNUM even for the possibly-common -;;; case of (RANDOM (1+ MOST-POSITIVE-FIXNUM)). (That case is what -;;; one might use for generating random hash values, e.g.) -;;; It also turns out to be just what's needed for INTEGER-LENGTH. -(declaim (maybe-inline %inclusive-random-fixnum)) -(defun %inclusive-random-fixnum (inclusive-limit state) - (declare (type (and fixnum unsigned-byte) inclusive-limit)) - (let (;; If this calculation needs to be optimized further, a good - ;; start might be a DEFTRANSFORM which picks off the case of - ;; constant LIMIT and precomputes the MASK at compile time. - (mask (%inclusive-random-integer-mask inclusive-limit))) - (%inclusive-random-integer-accept-reject (logand (%random-word state) mask) - inclusive-limit))) - -;;;; outer, dynamically-typed interface +(defun %random-integer (arg state) + (declare (type (integer 1) arg) (type random-state state)) + (let ((shift (- random-chunk-length random-integer-overlap))) + (do ((bits (random-chunk state) + (logxor (ash bits shift) (random-chunk state))) + (count (+ (integer-length arg) + (- random-integer-extra-bits shift)) + (- count shift))) + ((minusp count) + (rem bits arg)) + (declare (fixnum count))))) (defun random (arg &optional (state *random-state*)) - (declare (inline %random-single-float - %random-double-float + (declare (inline %random-single-float %random-double-float #!+long-float %random-long-float)) (cond - ((and (fixnump arg) (plusp arg)) - (locally - ;; The choice to inline this very common case of - ;; %INCLUSIVE-RANDOM-FIXNUM and not the less-common call - ;; below was just a guess (by WHN 2007-03-27), not based on - ;; benchmarking or anything. - (declare (inline %inclusive-random-fixnum)) - (%inclusive-random-fixnum (1- arg) state))) + ((and (fixnump arg) (<= arg random-fixnum-max) (> arg 0)) + (rem (random-chunk state) arg)) ((and (typep arg 'single-float) (> arg 0.0f0)) (%random-single-float arg state)) ((and (typep arg 'double-float) (> arg 0.0d0)) @@ -381,10 +259,8 @@ #!+long-float ((and (typep arg 'long-float) (> arg 0.0l0)) (%random-long-float arg state)) - ((and (integerp arg) (plusp arg)) - (if (= arg (1+ most-positive-fixnum)) - (%inclusive-random-fixnum most-positive-fixnum state) - (%inclusive-random-integer (1- arg) state))) + ((and (integerp arg) (> arg 0)) + (%random-integer arg state)) (t (error 'simple-type-error :expected-type '(or (integer 1) (float (0))) :datum arg diff --git a/src/compiler/float-tran.lisp b/src/compiler/float-tran.lisp index 1cb39f6..a4672e5 100644 --- a/src/compiler/float-tran.lisp +++ b/src/compiler/float-tran.lisp @@ -43,6 +43,59 @@ '(,fun num (or state *random-state*))))) (frob %random-single-float single-float) (frob %random-double-float double-float)) + +;;; Mersenne Twister RNG +;;; +;;; FIXME: It's unpleasant to have RANDOM functionality scattered +;;; through the code this way. It would be nice to move this into the +;;; same file as the other RANDOM definitions. +(deftransform random ((num &optional state) + ((integer 1 #.(expt 2 sb!vm::n-word-bits)) &optional *)) + ;; FIXME: I almost conditionalized this as #!+sb-doc. Find some way + ;; of automatically finding #!+sb-doc in proximity to DEFTRANSFORM + ;; to let me scan for places that I made this mistake and didn't + ;; catch myself. + "use inline (UNSIGNED-BYTE 32) operations" + (let ((type (lvar-type num)) + (limit (expt 2 sb!vm::n-word-bits)) + (random-chunk (ecase sb!vm::n-word-bits + (32 'random-chunk) + (64 'sb!kernel::big-random-chunk)))) + (if (numeric-type-p type) + (let ((num-high (numeric-type-high (lvar-type num)))) + (aver num-high) + (cond ((constant-lvar-p num) + ;; Check the worst case sum absolute error for the + ;; random number expectations. + (let ((rem (rem limit num-high))) + (unless (< (/ (* 2 rem (- num-high rem)) + num-high limit) + (expt 2 (- sb!kernel::random-integer-extra-bits))) + (give-up-ir1-transform + "The random number expectations are inaccurate.")) + (if (= num-high limit) + `(,random-chunk (or state *random-state*)) + #!-(or x86 x86-64) + `(rem (,random-chunk (or state *random-state*)) num) + #!+(or x86 x86-64) + ;; Use multiplication, which is faster. + `(values (sb!bignum::%multiply + (,random-chunk (or state *random-state*)) + num))))) + ((> num-high random-fixnum-max) + (give-up-ir1-transform + "The range is too large to ensure an accurate result.")) + #!+(or x86 x86-64) + ((< num-high limit) + `(values (sb!bignum::%multiply + (,random-chunk (or state *random-state*)) + num))) + (t + `(rem (,random-chunk (or state *random-state*)) num)))) + ;; KLUDGE: a relatively conservative treatment, but better + ;; than a bug (reported by PFD sbcl-devel towards the end of + ;; 2004-11. + '(rem (random-chunk (or state *random-state*)) num)))) ;;;; float accessors diff --git a/src/compiler/fndb.lisp b/src/compiler/fndb.lisp index fb05a7c..249ff05 100644 --- a/src/compiler/fndb.lisp +++ b/src/compiler/fndb.lisp @@ -381,8 +381,6 @@ (movable foldable flushable)) (defknown random ((or (float (0.0)) (integer 1)) &optional random-state) (or (float 0.0) (integer 0)) ()) -(defknown %inclusive-random-integer (unsigned-byte random-state) unsigned-byte - ()) (defknown make-random-state (&optional (or (member nil t) random-state)) random-state (flushable)) (defknown random-state-p (t) boolean (movable foldable flushable)) diff --git a/version.lisp-expr b/version.lisp-expr index 185f78c..70717c9 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.4.7" +"1.0.4.8"