diff --git a/src/main/java/org/apache/commons/math/random/AbstractWell.java b/src/main/java/org/apache/commons/math/random/AbstractWell.java index f85fe6f19..65e5a0acd 100644 --- a/src/main/java/org/apache/commons/math/random/AbstractWell.java +++ b/src/main/java/org/apache/commons/math/random/AbstractWell.java @@ -26,7 +26,8 @@ import java.io.Serializable; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -36,23 +37,27 @@ import java.io.Serializable; public abstract class AbstractWell extends BitsStreamGenerator implements Serializable { /** Serializable version identifier. */ - private static final long serialVersionUID = -8068371019303673353L; - - /** Bit mask preserving the first w - p bits in a w bits block. */ - protected final int mp; - - /** Bit mask preserving the last p bits in a w bits block. */ - protected final int mpTilde; + private static final long serialVersionUID = -817701723016583596L; /** Current index in the bytes pool. */ protected int index; /** Bytes pool. */ protected final int[] v; + + /** Index indirection table giving for each index its predecessor taking table size into account. */ protected final int[] iRm1; + + /** Index indirection table giving for each index its second predecessor taking table size into account. */ protected final int[] iRm2; + + /** Index indirection table giving for each index the value index + m1 taking table size into account. */ protected final int[] i1; + + /** Index indirection table giving for each index the value index + m2 taking table size into account. */ protected final int[] i2; + + /** Index indirection table giving for each index the value index + m3 taking table size into account. */ protected final int[] i3; /** Creates a new random number generator. @@ -93,15 +98,11 @@ public abstract class AbstractWell extends BitsStreamGenerator implements Serial // and p is the number of unused bits in the last block final int w = 32; final int r = (k + w - 1) / w; - final int p = r * w - k; + this.v = new int[r]; + this.index = 0; - // set up generator parameters - this.mp = (-1) << p; - this.mpTilde = ~mp; - this.v = new int[r]; - this.index = 0; - - // set up indirection indices + // precompute indirection index tables. These tables are used for optimizing access + // they allow saving computations like "(j + r - 2) % r" with costly modulo operations iRm1 = new int[r]; iRm2 = new int[r]; i1 = new int[r]; diff --git a/src/main/java/org/apache/commons/math/random/Well1024a.java b/src/main/java/org/apache/commons/math/random/Well1024a.java index a0eb2c8ba..250e79e03 100644 --- a/src/main/java/org/apache/commons/math/random/Well1024a.java +++ b/src/main/java/org/apache/commons/math/random/Well1024a.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -82,7 +83,6 @@ public class Well1024a extends AbstractWell { protected int next(final int bits) { final int indexRm1 = iRm1[index]; - final int indexRm2 = iRm2[index]; final int v0 = v[index]; final int vM1 = v[i1[index]]; @@ -97,7 +97,6 @@ public class Well1024a extends AbstractWell { v[index] = z3; v[indexRm1] = z4; - v[indexRm2] &= mp; index = indexRm1; return z4 >>> (32 - bits); diff --git a/src/main/java/org/apache/commons/math/random/Well19937a.java b/src/main/java/org/apache/commons/math/random/Well19937a.java index 55a17ea62..c5c32f606 100644 --- a/src/main/java/org/apache/commons/math/random/Well19937a.java +++ b/src/main/java/org/apache/commons/math/random/Well19937a.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -97,7 +98,7 @@ public class Well19937a extends AbstractWell { v[index] = z3; v[indexRm1] = z4; - v[indexRm2] &= mp; + v[indexRm2] &= 0x80000000; index = indexRm1; return z4 >>> (32 - bits); diff --git a/src/main/java/org/apache/commons/math/random/Well19937c.java b/src/main/java/org/apache/commons/math/random/Well19937c.java index 5475549ff..7731c1f13 100644 --- a/src/main/java/org/apache/commons/math/random/Well19937c.java +++ b/src/main/java/org/apache/commons/math/random/Well19937c.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -97,7 +98,7 @@ public class Well19937c extends AbstractWell { v[index] = z3; v[indexRm1] = z4; - v[indexRm2] &= mp; + v[indexRm2] &= 0x80000000; index = indexRm1; diff --git a/src/main/java/org/apache/commons/math/random/Well44497a.java b/src/main/java/org/apache/commons/math/random/Well44497a.java index 4aa55905d..dfe75de4f 100644 --- a/src/main/java/org/apache/commons/math/random/Well44497a.java +++ b/src/main/java/org/apache/commons/math/random/Well44497a.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -89,6 +90,7 @@ public class Well44497a extends AbstractWell { final int vM2 = v[i2[index]]; final int vM3 = v[i3[index]]; + // the values below include the errata of the original article final int z0 = (0xFFFF8000 & v[indexRm1]) ^ (0x00007FFF & v[indexRm2]); final int z1 = (v0 ^ (v0 << 24)) ^ (vM1 ^ (vM1 >>> 30)); final int z2 = (vM2 ^ (vM2 << 10)) ^ (vM3 << 26); @@ -99,7 +101,7 @@ public class Well44497a extends AbstractWell { v[index] = z3; v[indexRm1] = z4; - v[indexRm2] &= mp; + v[indexRm2] &= 0xFFFF8000; index = indexRm1; return z4 >>> (32 - bits); diff --git a/src/main/java/org/apache/commons/math/random/Well44497b.java b/src/main/java/org/apache/commons/math/random/Well44497b.java index 2ffea8cc4..a1456dbd1 100644 --- a/src/main/java/org/apache/commons/math/random/Well44497b.java +++ b/src/main/java/org/apache/commons/math/random/Well44497b.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -91,6 +92,7 @@ public class Well44497b extends AbstractWell { final int vM2 = v[i2[index]]; final int vM3 = v[i3[index]]; + // the values below include the errata of the original article final int z0 = (0xFFFF8000 & v[indexRm1]) ^ (0x00007FFF & v[indexRm2]); final int z1 = (v0 ^ (v0 << 24)) ^ (vM1 ^ (vM1 >>> 30)); final int z2 = (vM2 ^ (vM2 << 10)) ^ (vM3 << 26); @@ -101,7 +103,7 @@ public class Well44497b extends AbstractWell { v[index] = z3; v[indexRm1] = z4; - v[indexRm2] &= mp; + v[indexRm2] &= 0xFFFF8000; index = indexRm1; // add Matsumoto-Kurita tempering diff --git a/src/main/java/org/apache/commons/math/random/Well512a.java b/src/main/java/org/apache/commons/math/random/Well512a.java index 99c4a02be..651b9158d 100644 --- a/src/main/java/org/apache/commons/math/random/Well512a.java +++ b/src/main/java/org/apache/commons/math/random/Well512a.java @@ -24,7 +24,8 @@ package org.apache.commons.math.random; * Pierre L'Ecuyer and Makoto Matsumoto Improved * Long-Period Generators Based on Linear Recurrences Modulo 2 ACM - * Transactions on Mathematical Software, 32, 1 (2006).

+ * Transactions on Mathematical Software, 32, 1 (2006). The errata for the paper + * are in wellrng-errata.txt.

* @see WELL Random number generator * @version $Revision$ $Date$ @@ -87,8 +88,8 @@ public class Well512a extends AbstractWell { final int vi1 = v[i1[index]]; final int vi2 = v[i2[index]]; final int z0 = v[indexRm1]; - // m3: x ^ ((t >= 0) ? (x >>> t) : (x << -t)); + // the values below include the errata of the original article final int z1 = (vi ^ (vi << 16)) ^ (vi1 ^ (vi1 << 15)); final int z2 = vi2 ^ (vi2 >>> 11); final int z3 = z1 ^ z2; diff --git a/src/main/java/org/apache/commons/math/random/package.html b/src/main/java/org/apache/commons/math/random/package.html index e86a4f4c2..ec58f1591 100644 --- a/src/main/java/org/apache/commons/math/random/package.html +++ b/src/main/java/org/apache/commons/math/random/package.html @@ -16,5 +16,117 @@ limitations under the License. --> - Random number and random data generators. + +

Random number and random data generators.

+

Commons-math provides a few pseudo random number generators. The top level interface is RandomGenerator. + It is implemented by three classes: +

+

+ +

+ The JDK provided generator is a simple one that can be used only for very simple needs. + The Mersenne Twister is a fast generator with very good properties well suited for + Monte-Carlo simulation. It is equidistributed for generating vectors up to dimension 623 + and has a huge period: 219937 - 1 (which is a Mersenne prime). This generator + is described in a paper by Makoto Matsumoto and Takuji Nishimura in 1998: Mersenne Twister: + A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number Generator, ACM + Transactions on Modeling and Computer Simulation, Vol. 8, No. 1, January 1998, pp 3--30. + The WELL generators are a family of generators with period ranging from 2512 - 1 + to 244497 - 1 (this last one is also a Mersenne prime) with even better properties + than Mersenne Twister. These generators are described in a paper by François Panneton, + Pierre L'Ecuyer and Makoto Matsumoto Improved Long-Period + Generators Based on Linear Recurrences Modulo 2 ACM Transactions on Mathematical Software, + 32, 1 (2006). The errata for the paper are in wellrng-errata.txt. +

+ +

+ For simple sampling, any of these generators is sufficient. For Monte-Carlo simulations the + JDK generator does not have any of the good mathematical properties of the other generators, + so it should be avoided. The Mersenne twister and WELL generators have equidistribution properties + proven according to their bits pool size which is directly linked to their period (all of them + have maximal period, i.e. a generator with size n pool has a period 2n-1). They also + have equidistribution properties for 32 bits blocks up to s/32 dimension where s is their pool size. + So WELL19937c for exemple is equidistributed up to dimension 623 (19937/32). This means a Monte-Carlo + simulation generating a vector of n variables at each iteration has some guarantees on the properties + of the vector as long as its dimension does not exceed the limit. However, since we use bits from two + successive 32 bits generated integers to create one double, this limit is smaller when the variables are + of type double. so for Monte-Carlo simulation where less the 16 doubles are generated at each round, + WELL1024 may be sufficient. If a larger number of doubles are needed a generator with a larger pool + would be useful. +

+ +

+ The WELL generators are more modern then MersenneTwister (the paper describing than has been published + in 2006 instead of 1998) and fix some of its (few) drawbacks. If initialization array contains many + zero bits, MersenneTwister may take a very long time (several hundreds of thousands of iterations to + reach a steady state with a balanced number of zero and one in its bits pool). So the WELL generators + are better to escape zeroland as explained by the WELL generators creators. The Well19937a and + Well44497a generator are not maximally equidistributed (i.e. there are some dimensions or bits blocks + size for which they are not equidistributed). The Well512a, Well1024a, Well19937c and Well44497b are + maximally equidistributed for blocks size up to 32 bits (they should behave correctly also for double + based on more than 32 bits blocks, but equidistribution is not proven at these blocks sizes). +

+ +

+ The MersenneTwister generator uses a 624 elements integer array, so it consumes less than 2.5 kilobytes. + The WELL generators use 6 integer arrays with a size equal to the pool size, so for example the + WELL44497b generator uses about 33 kilobytes. This may be important if a very large number of + generator instances were used at the same time. +

+ +

+ All generators are quite fast. As an example, here are some comparisons, obtained on a 64 bits JVM on a + linux computer with a 2008 processor (AMD phenom Quad 9550 at 2.2 GHz). The generation rate for + MersenneTwister was about 27 millions doubles per second (remember we generate two 32 bits integers for + each double). Generation rates for other PRNG, relative to MersenneTwister: +

+ +

+ + + + + + + + + + + +
Example of performances
Namegeneration rate (relative to MersenneTwister)
{@link org.apache.commons.math.random.MersenneTwister MersenneTwister}1
{@link org.apache.commons.math.random.JDKRandomGenerator JDKRandomGenerator}between 0.96 and 1.16
{@link org.apache.commons.math.random.Well512a Well512a}between 0.85 and 0.88
{@link org.apache.commons.math.random.Well1024a Well1024a}between 0.63 and 0.73
{@link org.apache.commons.math.random.Well19937a Well19937a}between 0.70 and 0.71
{@link org.apache.commons.math.random.Well19937c Well19937c}between 0.57 and 0.71
{@link org.apache.commons.math.random.Well44497a Well44497a}between 0.69 and 0.71
{@link org.apache.commons.math.random.Well44497b Well44497b}between 0.65 and 0.71
+

+ +

+ So for most simulation problems, the better generators like {@link + org.apache.commons.math.random.Well19937c Well19937c} and {@link + org.apache.commons.math.random.Well44497b Well44497b} are probably very good choices. +

+ +

+ Note that none of these generators are suitable for cryptography. They are devoted + to simulation, and to generate very long series with strong properties on the series as a whole + (equidistribution, no correlation ...). They do not attempt to create small series but with + very strong properties of unpredictability as needed in cryptography. +

+ + diff --git a/src/site/xdoc/changes.xml b/src/site/xdoc/changes.xml index f23d64b7d..aaea722b4 100644 --- a/src/site/xdoc/changes.xml +++ b/src/site/xdoc/changes.xml @@ -71,7 +71,7 @@ The type attribute can be add,update,fix,remove. - + Added new random number generators from the Well Equidistributed Long-period Linear (WELL). diff --git a/src/site/xdoc/userguide/random.xml b/src/site/xdoc/userguide/random.xml index b6c601e50..2eadede5d 100644 --- a/src/site/xdoc/userguide/random.xml +++ b/src/site/xdoc/userguide/random.xml @@ -317,6 +317,120 @@ for (int i = 0; i < 1000; i++) { java.util.Random and wraps and delegates calls to a RandomGenerator instance.

+ +

Commons-math provides by itself several implementations of the + RandomGenerator interface: +

+

+ +

+ The JDK provided generator is a simple one that can be used only for very simple needs. + The Mersenne Twister is a fast generator with very good properties well suited for + Monte-Carlo simulation. It is equidistributed for generating vectors up to dimension 623 + and has a huge period: 219937 - 1 (which is a Mersenne prime). This generator + is described in a paper by Makoto Matsumoto and Takuji Nishimura in 1998: Mersenne Twister: + A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number Generator, ACM + Transactions on Modeling and Computer Simulation, Vol. 8, No. 1, January 1998, pp 3--30. + The WELL generators are a family of generators with period ranging from 2512 - 1 + to 244497 - 1 (this last one is also a Mersenne prime) with even better properties + than Mersenne Twister. These generators are described in a paper by François Panneton, + Pierre L'Ecuyer and Makoto Matsumoto Improved Long-Period + Generators Based on Linear Recurrences Modulo 2 ACM Transactions on Mathematical Software, + 32, 1 (2006). The errata for the paper are in wellrng-errata.txt. +

+ +

+ For simple sampling, any of these generators is sufficient. For Monte-Carlo simulations the + JDK generator does not have any of the good mathematical properties of the other generators, + so it should be avoided. The Mersenne twister and WELL generators have equidistribution properties + proven according to their bits pool size which is directly linked to their period (all of them + have maximal period, i.e. a generator with size n pool has a period 2n-1). They also + have equidistribution properties for 32 bits blocks up to s/32 dimension where s is their pool size. + So WELL19937c for exemple is equidistributed up to dimension 623 (19937/32). This means a Monte-Carlo + simulation generating a vector of n variables at each iteration has some guarantees on the properties + of the vector as long as its dimension does not exceed the limit. However, since we use bits from two + successive 32 bits generated integers to create one double, this limit is smaller when the variables are + of type double. so for Monte-Carlo simulation where less the 16 doubles are generated at each round, + WELL1024 may be sufficient. If a larger number of doubles are needed a generator with a larger pool + would be useful. +

+ +

+ The WELL generators are more modern then MersenneTwister (the paper describing than has been published + in 2006 instead of 1998) and fix some of its (few) drawbacks. If initialization array contains many + zero bits, MersenneTwister may take a very long time (several hundreds of thousands of iterations to + reach a steady state with a balanced number of zero and one in its bits pool). So the WELL generators + are better to escape zeroland as explained by the WELL generators creators. The Well19937a and + Well44497a generator are not maximally equidistributed (i.e. there are some dimensions or bits blocks + size for which they are not equidistributed). The Well512a, Well1024a, Well19937c and Well44497b are + maximally equidistributed for blocks size up to 32 bits (they should behave correctly also for double + based on more than 32 bits blocks, but equidistribution is not proven at these blocks sizes). +

+ +

+ The MersenneTwister generator uses a 624 elements integer array, so it consumes less than 2.5 kilobytes. + The WELL generators use 6 integer arrays with a size equal to the pool size, so for example the + WELL44497b generator uses about 33 kilobytes. This may be important if a very large number of + generator instances were used at the same time. +

+ +

+ All generators are quite fast. As an example, here are some comparisons, obtained on a 64 bits JVM on a + linux computer with a 2008 processor (AMD phenom Quad 9550 at 2.2 GHz). The generation rate for + MersenneTwister was between 25 and 27 millions doubles per second (remember we generate two 32 bits integers for + each double). Generation rates for other PRNG, relative to MersenneTwister: +

+ +

+ + + + + + + + + + + +
Example of performances
Namegeneration rate (relative to MersenneTwister)
MersenneTwister1
JDKRandomGeneratorbetween 0.96 and 1.16
Well512abetween 0.85 and 0.88
Well1024abetween 0.63 and 0.73
Well19937abetween 0.70 and 0.71
Well19937cbetween 0.57 and 0.71
Well44497abetween 0.69 and 0.71
Well44497bbetween 0.65 and 0.71
+

+ +

+ So for most simulation problems, the better generators like Well19937c and Well44497b are probably very good choices. +

+ +

+ Note that none of these generators are suitable for cryptography. They are devoted + to simulation, and to generate very long series with strong properties on the series as a whole + (equidistribution, no correlation ...). They do not attempt to create small series but with + very strong properties of unpredictability as needed in cryptography. +

+

Examples: