Adds the following two capabilities to RandomData: Generating random permutations

of integers, Generating random samples (returned as Object arrays) from Collections.
Tests validate expected sample distribtution using chi-square tests

PR: Issue 20303
Obtained from: Bugzilla
Submitted by: Phil Steitz
Reviewed by: Tim O'Brien


git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140865 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim O'Brien 2003-05-29 19:45:35 +00:00
parent dda7833110
commit 8a13741f2d
3 changed files with 282 additions and 9 deletions

View File

@ -53,11 +53,12 @@
*/
package org.apache.commons.math;
import java.util.Collection;
/**
* Random data generation utilities
* @author Phil Steitz
* @version $Revision: 1.1 $ $Date: 2003/05/18 00:58:51 $
* @version $Revision: 1.2 $ $Date: 2003/05/29 19:45:35 $
*/
public interface RandomData {
/**
@ -195,7 +196,7 @@ public interface RandomData {
public double nextExponential(double mean);
/**
* Generates a uniformly distributed random value from the opem interval
* Generates a uniformly distributed random value from the open interval
* (<code>lower</code>,<code>upper</code>) (i.e., endpoints excluded)
* <strong>Definition</strong>:
* <a href=http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm>
@ -211,4 +212,42 @@ public interface RandomData {
* <code>lower</code> is not strictly less than <code>upper</code>.
*/
public double nextUniform(double lower, double upper);
/**
* Generates an integer array of length <code>k</code> whose entries
* are selected randomly, without repetition, from the integers
* {0, ... , n-1} -- i.e., generated arrays represent permutations
* of <code>n</code> taken <code>k</code> at a time. <p>
*
* <strong>Preconditions:</strong><ul>
* <li> k must be less than or equal to n </li>
* <li> n must be positive (i.e. greater than 0) </li>
* </ul>
*
* @param n domain of the permutation
* @param k size of the permutation
* @return random k-permutation of n
*/
public int[] nextPermutation(int n, int k);
/**
* Returns an array of <code>k</code> objects selected randomly
* from the Collection <code>c</code>. Sampling from <code>c</code>
* is without replacement; but if <code>c</code> contains identical
* objects, the sample may include repeats. If all elements of <code>
* c</code> are distinct, the resulting object array represents a
* <a href=http://rkb.home.cern.ch/rkb/AN16pp/node250.html#SECTION0002500000000000000000>
* Simple Random Sample</a> of size
* <code>k</code> from the elements of <code>c</code>.<p>
*
* <strong>Preconditions:</strong><ul>
* <li> k must be less than or equal to the size of c </li>
* <li> c must not be empty </li>
* </ul>
*
* @param c collection to be sampled
* @param k size of the sample
* @return random sample of k elements from c
*/
public Object[] nextSample(Collection c, int k);
}

View File

@ -59,6 +59,7 @@ import java.security.SecureRandom;
import java.security.NoSuchAlgorithmException;
import java.security.NoSuchProviderException;
import java.util.Random;
import java.util.Collection;
/**
* Implements the <code>RandomData</code> interface using
@ -96,7 +97,7 @@ import java.util.Random;
*</p>
*
* @author Phil Steitz
* @version $Revision: 1.1 $ $Date: 2003/05/18 00:58:51 $
* @version $Revision: 1.2 $ $Date: 2003/05/29 19:45:35 $
*/
public class RandomDataImpl implements RandomData{
@ -158,7 +159,7 @@ public class RandomDataImpl implements RandomData{
public int nextInt(int lower, int upper) {
if (lower >= upper) {
throw new IllegalArgumentException
("incorrect bounds for rendomInt");
("upper bound must be > lower bound");
}
Random rand = getRan();
return lower + (int)(Math.random() * (upper-lower+1));
@ -167,7 +168,7 @@ public class RandomDataImpl implements RandomData{
public long nextLong(long lower, long upper) {
if (lower >= upper) {
throw new IllegalArgumentException
("upper bound must be >= lower bound");
("upper bound must be > lower bound");
}
Random rand = getRan();
return lower + (long)(rand.nextDouble() * (upper-lower+1));
@ -237,7 +238,7 @@ public class RandomDataImpl implements RandomData{
public int nextSecureInt(int lower, int upper) {
if (lower >= upper) {
throw new IllegalArgumentException
("lower bound must be <= upper bound");
("lower bound must be < upper bound");
}
SecureRandom sec = getSecRan();
return lower + (int)(sec.nextDouble() * (upper-lower+1));
@ -247,7 +248,7 @@ public class RandomDataImpl implements RandomData{
public long nextSecureLong(long lower, long upper) {
if (lower >= upper) {
throw new IllegalArgumentException
("lower bound must be <= upper bound");
("lower bound must be < upper bound");
}
SecureRandom sec = getSecRan();
return lower + (long)(sec.nextDouble() * (upper-lower+1));
@ -442,4 +443,96 @@ public class RandomDataImpl implements RandomData{
secRand = SecureRandom.getInstance(algorithm,provider);
}
/**
* Uses a 2-cycle permutation shuffle, as described
* <a href=http://www.maths.abdn.ac.uk/~igc/tch/mx4002/notes/node83.html>
* here</a>
*
*/
public int[] nextPermutation(int n, int k) {
if (k > n) {
throw new IllegalArgumentException
("permutation k exceeds n");
}
if (k == 0) {
throw new IllegalArgumentException
("permutation k must be > 0");
}
int[] index = getNatural(n);
shuffle(index,n-k);
int[] result = new int[k];
for (int i = 0; i < k; i++) {
result[i] = index[n-i-1];
}
return result;
}
/**
* Uses a 2-cycle permutation shuffle to generate a random
* permutation of <code>c.size()</code> and then returns the
* elements whose indexes correspond to the elements of the
* generated permutation. This technique is described, and
* proven to generate random samples,
* <a href=http://www.maths.abdn.ac.uk/~igc/tch/mx4002/notes/node83.html>
* here</a>
*/
public Object[] nextSample(Collection c, int k) {
int len = c.size();
if (k > len) {
throw new IllegalArgumentException
("sample size exceeds collection size");
}
if (k == 0) {
throw new IllegalArgumentException
("sample size must be > 0");
}
Object[] objects = c.toArray();
int[] index = nextPermutation(len,k);
Object[] result = new Object[k];
for (int i = 0; i < k; i ++) {
result[i] = objects[index[i]];
}
return result;
}
//------------------------Private methods----------------------------------
/**
* Uses a 2-cycle permutation shuffle to randomly re-order the last
* end elements of list
*
* @param list list to be shuffled
* @end element past which shuffling begins
*/
private void shuffle(int[] list, int end) {
int target = 0;
for (int i = list.length-1 ; i >= end; i--) {
if (i == 0) {
target = 0;
} else {
target = nextInt(0,i);
}
int temp = list[target];
list[target] = list[i];
list[i] = temp;
}
}
/**
* Returns an array representing n
*
* @param n the natural number to represent
* @return array with entries = elements of n
*/
private int[] getNatural(int n) {
int[] natural = new int[n];
for (int i = 0; i < n; i++) {
natural[i] = i;
}
return natural;
}
}

View File

@ -59,11 +59,13 @@ import junit.framework.TestSuite;
import junit.framework.AssertionFailedError;
import java.security.NoSuchProviderException;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.HashSet;
/**
* Test cases for the RandomData class.
*
* @author Phil Steitz
* @version $Revision: 1.2 $ $Date: 2003/05/22 15:31:38 $
* @version $Revision: 1.3 $ $Date: 2003/05/29 19:45:35 $
*/
public final class RandomDataTest extends TestCase {
@ -476,5 +478,144 @@ public final class RandomDataTest extends TestCase {
;
}
}
/** tests for nextSample() sampling from Collection */
public void testNextSample() {
Object[][] c = {{"0","1"},{"0","2"},{"0","3"},{"0","4"},{"1","2"},
{"1","3"},{"1","4"},{"2","3"},{"2","4"},{"3","4"}};
double[] observed = {0,0,0,0,0,0,0,0,0,0};
double[] expected = {100,100,100,100,100,100,100,100,100,100};
HashSet cPop = new HashSet(); //{0,1,2,3,4}
for (int i = 0; i < 5; i++) {
cPop.add(Integer.toString(i));
}
Object[] sets = new Object[10]; // 2-sets from 5
for (int i = 0; i < 10; i ++) {
HashSet hs = new HashSet();
hs.add(c[i][0]);
hs.add(c[i][1]);
sets[i] = hs;
}
for (int i = 0; i < 1000; i ++) {
Object[] cSamp = randomData.nextSample(cPop,2);
observed[findSample(sets,cSamp)]++;
}
/* Use ChiSquare dist with df = 10-1 = 9, alpha = .001
* Change to 21.67 for alpha = .01
*/
assertTrue("chi-square test -- will fail about 1 in 1000 times",
testStatistic.chiSquare(expected,observed) < 27.88);
// Make sure sample of size = size of collection returns same collection
HashSet hs = new HashSet();
hs.add("one");
Object[] one = randomData.nextSample(hs,1);
String oneString = (String) one[0];
if ((one.length != 1) || !oneString.equals("one")){
fail("bad sample for set size = 1, sample size = 1");
}
// Make sure we fail for sample size > collection size
try {
one = randomData.nextSample(hs,2);
fail("sample size > set size, expecting IllegalArgumentException");
} catch (IllegalArgumentException ex) {
;
}
// Make sure we fail for empty collection
try {
hs = new HashSet();
one = randomData.nextSample(hs,0);
fail("n = k = 0, expecting IllegalArgumentException");
} catch (IllegalArgumentException ex) {
;
}
}
private int findSample(Object[] u, Object[] samp) {
int result = -1;
for (int i = 0; i < u.length; i++) {
HashSet set = (HashSet) u[i];
HashSet sampSet = new HashSet();
for (int j = 0; j < samp.length; j++) {
sampSet.add(samp[j]);
}
if (set.equals(sampSet)) {
return i;
}
}
fail("sample not found:{" + samp[0] + "," + samp[1] + "}");
return -1;
}
/** tests for nextPermutation */
public void testNextPermutation() {
int[][] p = {{0,1,2},{0,2,1},{1,0,2},{1,2,0},{2,0,1},{2,1,0}};
double[] observed = {0,0,0,0,0,0,};
double[] expected = {100,100,100,100,100,100};
for (int i = 0; i < 600; i++) {
int[] perm = randomData.nextPermutation(3,3);
observed[findPerm(p,perm)]++;
}
/* Use ChiSquare dist with df = 6-1 = 5, alpha = .001
* Change to 15.09 for alpha = .01
*/
assertTrue("chi-square test -- will fail about 1 in 1000 times",
testStatistic.chiSquare(expected,observed) < 20.52);
// Check size = 1 boundary case
int[] perm = randomData.nextPermutation(1,1);
if ((perm.length != 1) || (perm[0] != 0)){
fail("bad permutation for n = 1, sample k = 1");
// Make sure we fail for k size > n
try {
perm = randomData.nextPermutation(2,3);
fail("permutation k > n, expecting IllegalArgumentException");
} catch (IllegalArgumentException ex) {
;
}
// Make sure we fail for n = 0
try {
perm = randomData.nextPermutation(0,0);
fail("permutation k = n = 0, expecting IllegalArgumentException");
} catch (IllegalArgumentException ex) {
;
}
}
}
private int findPerm(int[][] p, int[] samp) {
int result = -1;
for (int i = 0; i < p.length; i++) {
boolean good = true;
for (int j = 0; j < samp.length; j++) {
if (samp[j] != p[i][j]) {
good = false;
}
}
if (good) {
return i;
}
}
fail("permutation not found");
return -1;
}
}