Adds the following two capabilities to RandomData: Generating random permutations
of integers, Generating random samples (returned as Object arrays) from Collections. Tests validate expected sample distribtution using chi-square tests PR: Issue 20303 Obtained from: Bugzilla Submitted by: Phil Steitz Reviewed by: Tim O'Brien git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dda7833110
commit
8a13741f2d
|
@ -53,11 +53,12 @@
|
|||
*/
|
||||
|
||||
package org.apache.commons.math;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Random data generation utilities
|
||||
* @author Phil Steitz
|
||||
* @version $Revision: 1.1 $ $Date: 2003/05/18 00:58:51 $
|
||||
* @version $Revision: 1.2 $ $Date: 2003/05/29 19:45:35 $
|
||||
*/
|
||||
public interface RandomData {
|
||||
/**
|
||||
|
@ -195,7 +196,7 @@ public interface RandomData {
|
|||
public double nextExponential(double mean);
|
||||
|
||||
/**
|
||||
* Generates a uniformly distributed random value from the opem interval
|
||||
* Generates a uniformly distributed random value from the open interval
|
||||
* (<code>lower</code>,<code>upper</code>) (i.e., endpoints excluded)
|
||||
* <strong>Definition</strong>:
|
||||
* <a href=http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm>
|
||||
|
@ -211,4 +212,42 @@ public interface RandomData {
|
|||
* <code>lower</code> is not strictly less than <code>upper</code>.
|
||||
*/
|
||||
public double nextUniform(double lower, double upper);
|
||||
|
||||
/**
|
||||
* Generates an integer array of length <code>k</code> whose entries
|
||||
* are selected randomly, without repetition, from the integers
|
||||
* {0, ... , n-1} -- i.e., generated arrays represent permutations
|
||||
* of <code>n</code> taken <code>k</code> at a time. <p>
|
||||
*
|
||||
* <strong>Preconditions:</strong><ul>
|
||||
* <li> k must be less than or equal to n </li>
|
||||
* <li> n must be positive (i.e. greater than 0) </li>
|
||||
* </ul>
|
||||
*
|
||||
* @param n domain of the permutation
|
||||
* @param k size of the permutation
|
||||
* @return random k-permutation of n
|
||||
*/
|
||||
public int[] nextPermutation(int n, int k);
|
||||
|
||||
/**
|
||||
* Returns an array of <code>k</code> objects selected randomly
|
||||
* from the Collection <code>c</code>. Sampling from <code>c</code>
|
||||
* is without replacement; but if <code>c</code> contains identical
|
||||
* objects, the sample may include repeats. If all elements of <code>
|
||||
* c</code> are distinct, the resulting object array represents a
|
||||
* <a href=http://rkb.home.cern.ch/rkb/AN16pp/node250.html#SECTION0002500000000000000000>
|
||||
* Simple Random Sample</a> of size
|
||||
* <code>k</code> from the elements of <code>c</code>.<p>
|
||||
*
|
||||
* <strong>Preconditions:</strong><ul>
|
||||
* <li> k must be less than or equal to the size of c </li>
|
||||
* <li> c must not be empty </li>
|
||||
* </ul>
|
||||
*
|
||||
* @param c collection to be sampled
|
||||
* @param k size of the sample
|
||||
* @return random sample of k elements from c
|
||||
*/
|
||||
public Object[] nextSample(Collection c, int k);
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ import java.security.SecureRandom;
|
|||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.NoSuchProviderException;
|
||||
import java.util.Random;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Implements the <code>RandomData</code> interface using
|
||||
|
@ -96,7 +97,7 @@ import java.util.Random;
|
|||
*</p>
|
||||
*
|
||||
* @author Phil Steitz
|
||||
* @version $Revision: 1.1 $ $Date: 2003/05/18 00:58:51 $
|
||||
* @version $Revision: 1.2 $ $Date: 2003/05/29 19:45:35 $
|
||||
*/
|
||||
public class RandomDataImpl implements RandomData{
|
||||
|
||||
|
@ -158,7 +159,7 @@ public class RandomDataImpl implements RandomData{
|
|||
public int nextInt(int lower, int upper) {
|
||||
if (lower >= upper) {
|
||||
throw new IllegalArgumentException
|
||||
("incorrect bounds for rendomInt");
|
||||
("upper bound must be > lower bound");
|
||||
}
|
||||
Random rand = getRan();
|
||||
return lower + (int)(Math.random() * (upper-lower+1));
|
||||
|
@ -167,7 +168,7 @@ public class RandomDataImpl implements RandomData{
|
|||
public long nextLong(long lower, long upper) {
|
||||
if (lower >= upper) {
|
||||
throw new IllegalArgumentException
|
||||
("upper bound must be >= lower bound");
|
||||
("upper bound must be > lower bound");
|
||||
}
|
||||
Random rand = getRan();
|
||||
return lower + (long)(rand.nextDouble() * (upper-lower+1));
|
||||
|
@ -237,7 +238,7 @@ public class RandomDataImpl implements RandomData{
|
|||
public int nextSecureInt(int lower, int upper) {
|
||||
if (lower >= upper) {
|
||||
throw new IllegalArgumentException
|
||||
("lower bound must be <= upper bound");
|
||||
("lower bound must be < upper bound");
|
||||
}
|
||||
SecureRandom sec = getSecRan();
|
||||
return lower + (int)(sec.nextDouble() * (upper-lower+1));
|
||||
|
@ -247,7 +248,7 @@ public class RandomDataImpl implements RandomData{
|
|||
public long nextSecureLong(long lower, long upper) {
|
||||
if (lower >= upper) {
|
||||
throw new IllegalArgumentException
|
||||
("lower bound must be <= upper bound");
|
||||
("lower bound must be < upper bound");
|
||||
}
|
||||
SecureRandom sec = getSecRan();
|
||||
return lower + (long)(sec.nextDouble() * (upper-lower+1));
|
||||
|
@ -442,4 +443,96 @@ public class RandomDataImpl implements RandomData{
|
|||
secRand = SecureRandom.getInstance(algorithm,provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses a 2-cycle permutation shuffle, as described
|
||||
* <a href=http://www.maths.abdn.ac.uk/~igc/tch/mx4002/notes/node83.html>
|
||||
* here</a>
|
||||
*
|
||||
*/
|
||||
public int[] nextPermutation(int n, int k) {
|
||||
if (k > n) {
|
||||
throw new IllegalArgumentException
|
||||
("permutation k exceeds n");
|
||||
}
|
||||
if (k == 0) {
|
||||
throw new IllegalArgumentException
|
||||
("permutation k must be > 0");
|
||||
}
|
||||
|
||||
int[] index = getNatural(n);
|
||||
shuffle(index,n-k);
|
||||
int[] result = new int[k];
|
||||
for (int i = 0; i < k; i++) {
|
||||
result[i] = index[n-i-1];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses a 2-cycle permutation shuffle to generate a random
|
||||
* permutation of <code>c.size()</code> and then returns the
|
||||
* elements whose indexes correspond to the elements of the
|
||||
* generated permutation. This technique is described, and
|
||||
* proven to generate random samples,
|
||||
* <a href=http://www.maths.abdn.ac.uk/~igc/tch/mx4002/notes/node83.html>
|
||||
* here</a>
|
||||
*/
|
||||
public Object[] nextSample(Collection c, int k) {
|
||||
int len = c.size();
|
||||
if (k > len) {
|
||||
throw new IllegalArgumentException
|
||||
("sample size exceeds collection size");
|
||||
}
|
||||
if (k == 0) {
|
||||
throw new IllegalArgumentException
|
||||
("sample size must be > 0");
|
||||
}
|
||||
|
||||
Object[] objects = c.toArray();
|
||||
int[] index = nextPermutation(len,k);
|
||||
Object[] result = new Object[k];
|
||||
for (int i = 0; i < k; i ++) {
|
||||
result[i] = objects[index[i]];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//------------------------Private methods----------------------------------
|
||||
|
||||
/**
|
||||
* Uses a 2-cycle permutation shuffle to randomly re-order the last
|
||||
* end elements of list
|
||||
*
|
||||
* @param list list to be shuffled
|
||||
* @end element past which shuffling begins
|
||||
*/
|
||||
private void shuffle(int[] list, int end) {
|
||||
int target = 0;
|
||||
for (int i = list.length-1 ; i >= end; i--) {
|
||||
if (i == 0) {
|
||||
target = 0;
|
||||
} else {
|
||||
target = nextInt(0,i);
|
||||
}
|
||||
int temp = list[target];
|
||||
list[target] = list[i];
|
||||
list[i] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array representing n
|
||||
*
|
||||
* @param n the natural number to represent
|
||||
* @return array with entries = elements of n
|
||||
*/
|
||||
private int[] getNatural(int n) {
|
||||
int[] natural = new int[n];
|
||||
for (int i = 0; i < n; i++) {
|
||||
natural[i] = i;
|
||||
}
|
||||
return natural;
|
||||
}
|
||||
|
||||
}
|
|
@ -59,11 +59,13 @@ import junit.framework.TestSuite;
|
|||
import junit.framework.AssertionFailedError;
|
||||
import java.security.NoSuchProviderException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
/**
|
||||
* Test cases for the RandomData class.
|
||||
*
|
||||
* @author Phil Steitz
|
||||
* @version $Revision: 1.2 $ $Date: 2003/05/22 15:31:38 $
|
||||
* @version $Revision: 1.3 $ $Date: 2003/05/29 19:45:35 $
|
||||
*/
|
||||
|
||||
public final class RandomDataTest extends TestCase {
|
||||
|
@ -476,5 +478,144 @@ public final class RandomDataTest extends TestCase {
|
|||
;
|
||||
}
|
||||
}
|
||||
|
||||
/** tests for nextSample() sampling from Collection */
|
||||
public void testNextSample() {
|
||||
Object[][] c = {{"0","1"},{"0","2"},{"0","3"},{"0","4"},{"1","2"},
|
||||
{"1","3"},{"1","4"},{"2","3"},{"2","4"},{"3","4"}};
|
||||
double[] observed = {0,0,0,0,0,0,0,0,0,0};
|
||||
double[] expected = {100,100,100,100,100,100,100,100,100,100};
|
||||
|
||||
HashSet cPop = new HashSet(); //{0,1,2,3,4}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
cPop.add(Integer.toString(i));
|
||||
}
|
||||
|
||||
Object[] sets = new Object[10]; // 2-sets from 5
|
||||
for (int i = 0; i < 10; i ++) {
|
||||
HashSet hs = new HashSet();
|
||||
hs.add(c[i][0]);
|
||||
hs.add(c[i][1]);
|
||||
sets[i] = hs;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 1000; i ++) {
|
||||
Object[] cSamp = randomData.nextSample(cPop,2);
|
||||
observed[findSample(sets,cSamp)]++;
|
||||
}
|
||||
|
||||
/* Use ChiSquare dist with df = 10-1 = 9, alpha = .001
|
||||
* Change to 21.67 for alpha = .01
|
||||
*/
|
||||
assertTrue("chi-square test -- will fail about 1 in 1000 times",
|
||||
testStatistic.chiSquare(expected,observed) < 27.88);
|
||||
|
||||
// Make sure sample of size = size of collection returns same collection
|
||||
HashSet hs = new HashSet();
|
||||
hs.add("one");
|
||||
Object[] one = randomData.nextSample(hs,1);
|
||||
String oneString = (String) one[0];
|
||||
if ((one.length != 1) || !oneString.equals("one")){
|
||||
fail("bad sample for set size = 1, sample size = 1");
|
||||
}
|
||||
|
||||
// Make sure we fail for sample size > collection size
|
||||
try {
|
||||
one = randomData.nextSample(hs,2);
|
||||
fail("sample size > set size, expecting IllegalArgumentException");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
;
|
||||
}
|
||||
|
||||
// Make sure we fail for empty collection
|
||||
try {
|
||||
hs = new HashSet();
|
||||
one = randomData.nextSample(hs,0);
|
||||
fail("n = k = 0, expecting IllegalArgumentException");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
private int findSample(Object[] u, Object[] samp) {
|
||||
int result = -1;
|
||||
for (int i = 0; i < u.length; i++) {
|
||||
HashSet set = (HashSet) u[i];
|
||||
HashSet sampSet = new HashSet();
|
||||
for (int j = 0; j < samp.length; j++) {
|
||||
sampSet.add(samp[j]);
|
||||
}
|
||||
if (set.equals(sampSet)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
fail("sample not found:{" + samp[0] + "," + samp[1] + "}");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** tests for nextPermutation */
|
||||
public void testNextPermutation() {
|
||||
int[][] p = {{0,1,2},{0,2,1},{1,0,2},{1,2,0},{2,0,1},{2,1,0}};
|
||||
double[] observed = {0,0,0,0,0,0,};
|
||||
double[] expected = {100,100,100,100,100,100};
|
||||
|
||||
for (int i = 0; i < 600; i++) {
|
||||
int[] perm = randomData.nextPermutation(3,3);
|
||||
observed[findPerm(p,perm)]++;
|
||||
}
|
||||
|
||||
/* Use ChiSquare dist with df = 6-1 = 5, alpha = .001
|
||||
* Change to 15.09 for alpha = .01
|
||||
*/
|
||||
assertTrue("chi-square test -- will fail about 1 in 1000 times",
|
||||
testStatistic.chiSquare(expected,observed) < 20.52);
|
||||
|
||||
// Check size = 1 boundary case
|
||||
int[] perm = randomData.nextPermutation(1,1);
|
||||
if ((perm.length != 1) || (perm[0] != 0)){
|
||||
fail("bad permutation for n = 1, sample k = 1");
|
||||
|
||||
// Make sure we fail for k size > n
|
||||
try {
|
||||
perm = randomData.nextPermutation(2,3);
|
||||
fail("permutation k > n, expecting IllegalArgumentException");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
;
|
||||
}
|
||||
|
||||
// Make sure we fail for n = 0
|
||||
try {
|
||||
perm = randomData.nextPermutation(0,0);
|
||||
fail("permutation k = n = 0, expecting IllegalArgumentException");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private int findPerm(int[][] p, int[] samp) {
|
||||
int result = -1;
|
||||
for (int i = 0; i < p.length; i++) {
|
||||
boolean good = true;
|
||||
for (int j = 0; j < samp.length; j++) {
|
||||
if (samp[j] != p[i][j]) {
|
||||
good = false;
|
||||
}
|
||||
}
|
||||
if (good) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
fail("permutation not found");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue