MATH-1341
Replacement for "o.a.c.m.random.RandomDataGenerator". Features: * No trivial syntactic sugar for sampling from distributions * No duplicate code (secure vs non-secure data generation share the same code) * No lazy initialization New class also obsoletes class "RandomGeneratorFactory".
This commit is contained in:
parent
f695c9ce35
commit
363be2fea6
|
@ -0,0 +1,492 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math4.random;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import org.apache.commons.math4.exception.MathInternalError;
|
||||
import org.apache.commons.math4.exception.NotANumberException;
|
||||
import org.apache.commons.math4.exception.NotFiniteNumberException;
|
||||
import org.apache.commons.math4.exception.NotStrictlyPositiveException;
|
||||
import org.apache.commons.math4.exception.NumberIsTooLargeException;
|
||||
import org.apache.commons.math4.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math4.rng.UniformRandomProvider;
|
||||
import org.apache.commons.math4.util.MathArrays;
|
||||
|
||||
/**
|
||||
* Factory for creating generators of miscellaneous data.
|
||||
*
|
||||
* @since 4.0
|
||||
*/
|
||||
public class RandomUtils {
|
||||
/**
|
||||
* Class contains only static methods.
|
||||
*/
|
||||
private RandomUtils() {}
|
||||
|
||||
/**
|
||||
* @param rng Underlying generator. Reference is copied so the RNG
|
||||
* is shared with the caller.
|
||||
* @return a {@link DataGenerator data generator}.
|
||||
*/
|
||||
public static DataGenerator createDataGenerator(final UniformRandomProvider rng) {
|
||||
return new DataGenerator(rng);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps an instance of the JDK's {@link Random} class.
|
||||
* The actual generation of random numbers will be delegated to that
|
||||
* instance.
|
||||
* <p>
|
||||
* If cryptographically secure data is required, one can use this
|
||||
* factory method, with an instance of the {@link java.security.SecureRandom}
|
||||
* class as the argument.
|
||||
* Note that data generation will be much slower in this case.
|
||||
* </p>
|
||||
*
|
||||
* @param rng Underlying generator. Reference is copied so the RNG
|
||||
* is shared with the caller.
|
||||
* @return a {@link DataGenerator data generator}.
|
||||
*/
|
||||
public static DataGenerator createDataGenerator(final Random rng) {
|
||||
return createDataGenerator(asUniformRandomProvider(rng));
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a {@link Random} instance.
|
||||
*
|
||||
* @param rng JDK {@link Random} instance to which the random number
|
||||
* generation is delegated. Reference is copied so the RNG is shared
|
||||
* with the caller.
|
||||
* @return a {@link UniformRandomProvider} instance.
|
||||
*/
|
||||
public static UniformRandomProvider asUniformRandomProvider(final Random rng) {
|
||||
return new UniformRandomProvider() {
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void nextBytes(byte[] bytes) {
|
||||
rng.nextBytes(bytes);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void nextBytes(byte[] bytes,
|
||||
int start,
|
||||
int len) {
|
||||
final byte[] reduced = new byte[len];
|
||||
rng.nextBytes(reduced);
|
||||
System.arraycopy(reduced, 0, bytes, start, len);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public int nextInt() {
|
||||
return rng.nextInt();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public int nextInt(int n) {
|
||||
if (n <= 0) {
|
||||
throw new NotStrictlyPositiveException(n);
|
||||
}
|
||||
return rng.nextInt(n);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public long nextLong() {
|
||||
return rng.nextLong();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public long nextLong(long n) {
|
||||
// Code copied from "o.a.c.m.rng.internal.BaseProvider".
|
||||
|
||||
if (n > 0) {
|
||||
long bits;
|
||||
long val;
|
||||
do {
|
||||
bits = rng.nextLong() >>> 1;
|
||||
val = bits % n;
|
||||
} while (bits - val + (n - 1) < 0);
|
||||
return val;
|
||||
}
|
||||
|
||||
throw new NotStrictlyPositiveException(n);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public boolean nextBoolean() {
|
||||
return rng.nextBoolean();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float nextFloat() {
|
||||
return rng.nextFloat();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public double nextDouble() {
|
||||
return rng.nextDouble();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Various random data generation routines.
|
||||
*/
|
||||
public static class DataGenerator {
|
||||
/** Underlying RNG. */
|
||||
private final UniformRandomProvider rng;
|
||||
|
||||
/**
|
||||
* @param rng Underlying generator.
|
||||
*/
|
||||
DataGenerator(UniformRandomProvider rng) {
|
||||
this.rng = rng;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a random string of hex characters of length {@code len}.
|
||||
*
|
||||
* <strong>Algorithm Description:</strong> how hexadecimal strings are
|
||||
* generated depends on the value of the {@code useSha1} argument.
|
||||
*
|
||||
* <ul>
|
||||
* <li>If {@code useSha1 == false}, a 2-step process is used:
|
||||
* <ol>
|
||||
* <li>
|
||||
* {@code len / 2 + 1} binary bytes are generated using the underlying
|
||||
* generator.
|
||||
* </li>
|
||||
* <li>
|
||||
* Each binary byte is translated into 2 hex digits.
|
||||
* </li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* <li>
|
||||
* If {@code useSha1 == true}, hex strings are generated in 40-byte
|
||||
* segments using a 3-step process:
|
||||
* <ol>
|
||||
* <li>
|
||||
* 20 random bytes are generated using the underlying generator.
|
||||
* </li>
|
||||
* <li>
|
||||
* SHA-1 hash is applied to yield a 20-byte binary digest.
|
||||
* </li>
|
||||
* <li>
|
||||
* Each byte of the binary digest is converted to 2 hex digits.
|
||||
* </li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @param len Length of the generated string.
|
||||
* @param useSha1 Whether to use a digest.
|
||||
* If {@code true} (resp. {@code false}), the 3-step (resp. 2-step)
|
||||
* process will be used.
|
||||
* @return the random string.
|
||||
* @throws NotStrictlyPositiveException if {@code len <= 0}.
|
||||
*/
|
||||
public String nextHexString(int len,
|
||||
boolean useSha1) {
|
||||
if (len <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.LENGTH, len);
|
||||
}
|
||||
|
||||
// Initialize output buffer.
|
||||
final StringBuilder outBuffer = new StringBuilder();
|
||||
|
||||
if (!useSha1) {
|
||||
// Generate int(len/2)+1 random bytes.
|
||||
final byte[] randomBytes = new byte[(len / 2) + 1];
|
||||
rng.nextBytes(randomBytes);
|
||||
|
||||
// Convert each byte to 2 hex digits.
|
||||
for (int i = 0; i < randomBytes.length; i++) {
|
||||
final Integer c = Integer.valueOf(randomBytes[i]);
|
||||
|
||||
// Add 128 to byte value to make interval 0-255 before
|
||||
// conversion to hex.
|
||||
// This guarantees <= 2 hex digits from "toHexString".
|
||||
// "toHexString" would otherwise add 2^32 to negative arguments.
|
||||
String hex = Integer.toHexString(c.intValue() + 128);
|
||||
|
||||
// Make sure we add 2 hex digits for each byte.
|
||||
if (hex.length() == 1) {
|
||||
hex = "0" + hex;
|
||||
}
|
||||
outBuffer.append(hex);
|
||||
}
|
||||
} else {
|
||||
MessageDigest alg = null;
|
||||
try {
|
||||
alg = MessageDigest.getInstance("SHA-1");
|
||||
} catch (NoSuchAlgorithmException ex) {
|
||||
// Should never happen.
|
||||
throw new MathInternalError(ex);
|
||||
}
|
||||
alg.reset();
|
||||
|
||||
// Compute number of iterations required (40 bytes each).
|
||||
final int numIter = (len / 40) + 1;
|
||||
|
||||
for (int iter = 1; iter < numIter + 1; iter++) {
|
||||
final byte[] randomBytes = new byte[40];
|
||||
rng.nextBytes(randomBytes);
|
||||
alg.update(randomBytes);
|
||||
|
||||
// Create 20-byte binary hash.
|
||||
final byte[] hash = alg.digest();
|
||||
|
||||
// Loop over the hash, converting each byte to 2 hex digits
|
||||
for (int i = 0; i < hash.length; i++) {
|
||||
final Integer c = Integer.valueOf(hash[i]);
|
||||
|
||||
// Add 128 to byte value to make interval 0-255.
|
||||
// This guarantees <= 2 hex digits from "toHexString".
|
||||
// "toHexString" would otherwise add 2^32 to negative arguments.
|
||||
String hex = Integer.toHexString(c.intValue() + 128);
|
||||
|
||||
// Keep strings uniform length: guarantees 40 bytes.
|
||||
if (hex.length() == 1) {
|
||||
hex = "0" + hex;
|
||||
}
|
||||
outBuffer.append(hex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return outBuffer.toString().substring(0, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a uniformly distributed random long integer between {@code lower}
|
||||
* and {@code upper} (endpoints included).
|
||||
*
|
||||
* @param lower Lower bound for generated long integer.
|
||||
* @param upper Upper bound for generated long integer.
|
||||
* @return a random long integer greater than or equal to {@code lower}
|
||||
* and less than or equal to {@code upper}
|
||||
* @throws NumberIsTooLargeException if {@code lower >= upper}
|
||||
*/
|
||||
public long nextLong(final long lower,
|
||||
final long upper) {
|
||||
if (lower >= upper) {
|
||||
throw new NumberIsTooLargeException(LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND,
|
||||
lower, upper, false);
|
||||
}
|
||||
final long max = (upper - lower) + 1;
|
||||
if (max <= 0) {
|
||||
// Range is too wide to fit in a positive long (larger than 2^63);
|
||||
// as it covers more than half the long range, we use directly a
|
||||
// simple rejection method.
|
||||
while (true) {
|
||||
final long r = rng.nextLong();
|
||||
if (r >= lower && r <= upper) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
} else if (max < Integer.MAX_VALUE){
|
||||
// We can shift the range and generate directly a positive int.
|
||||
return lower + rng.nextInt((int) max);
|
||||
} else {
|
||||
// We can shift the range and generate directly a positive long.
|
||||
return lower + rng.nextLong(max);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a uniformly distributed random value from the open interval
|
||||
* {@code (lower, upper)} (i.e., endpoints excluded).
|
||||
* <p>
|
||||
* <strong>Definition</strong>:
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm">
|
||||
* Uniform Distribution</a> {@code lower} and {@code upper - lower} are the
|
||||
* <a href = "http://www.itl.nist.gov/div898/handbook/eda/section3/eda364.htm">
|
||||
* location and scale parameters</a>, respectively.</p>
|
||||
* <p>
|
||||
* <strong>Algorithm Description</strong>: scales the output of
|
||||
* Random.nextDouble(), but rejects 0 values (i.e., will generate another
|
||||
* random double if Random.nextDouble() returns 0). This is necessary to
|
||||
* provide a symmetric output interval (both endpoints excluded).
|
||||
* </p>
|
||||
*
|
||||
* @param lower Lower bound of the support (excluded).
|
||||
* @param upper Upper bound of the support (excluded).
|
||||
* @return a uniformly distributed random value between lower and upper
|
||||
* (both excluded).
|
||||
* @throws NumberIsTooLargeException if {@code lower >= upper}.
|
||||
* @throws NotFiniteNumberException if one of the bounds is infinite.
|
||||
* @throws NotANumberException if one of the bounds is NaN.
|
||||
*/
|
||||
public double nextUniform(double lower, double upper) {
|
||||
return nextUniform(lower, upper, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a uniformly distributed random value from the interval
|
||||
* {@code (lower, upper)} or the interval {@code [lower, upper)}. The lower
|
||||
* bound is thus optionally included, while the upper bound is always
|
||||
* excluded.
|
||||
* <p>
|
||||
* <strong>Definition</strong>:
|
||||
* <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda3662.htm">
|
||||
* Uniform Distribution</a> {@code lower} and {@code upper - lower} are the
|
||||
* <a href = "http://www.itl.nist.gov/div898/handbook/eda/section3/eda364.htm">
|
||||
* location and scale parameters</a>, respectively.</p>
|
||||
* <p>
|
||||
* <strong>Algorithm Description</strong>: if the lower bound is excluded,
|
||||
* scales the output of "nextDouble()", but rejects 0 values (i.e. it
|
||||
* will generate another random double if "nextDouble()" returns 0).
|
||||
* This is necessary to provide a symmetric output interval (both
|
||||
* endpoints excluded).
|
||||
* </p>
|
||||
*
|
||||
* @param lower Lower bound of the support.
|
||||
* @param upper Exclusive upper bound of the support.
|
||||
* @param lowerInclusive {@code true} if the lower bound is inclusive.
|
||||
* @return a uniformly distributed random value in the {@code (lower, upper)}
|
||||
* interval, if {@code lowerInclusive} is {@code false}, or in the
|
||||
* {@code [lower, upper)} interval, if {@code lowerInclusive} is
|
||||
* {@code true}.
|
||||
* @throws NumberIsTooLargeException if {@code lower >= upper}.
|
||||
* @throws NotFiniteNumberException if one of the bounds is infinite.
|
||||
* @throws NotANumberException if one of the bounds is NaN.
|
||||
*/
|
||||
public double nextUniform(double lower,
|
||||
double upper,
|
||||
boolean lowerInclusive) {
|
||||
if (lower >= upper) {
|
||||
throw new NumberIsTooLargeException(LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND,
|
||||
lower, upper, false);
|
||||
}
|
||||
if (Double.isInfinite(lower)) {
|
||||
throw new NotFiniteNumberException(LocalizedFormats.INFINITE_BOUND, lower);
|
||||
}
|
||||
if (Double.isInfinite(upper)) {
|
||||
throw new NotFiniteNumberException(LocalizedFormats.INFINITE_BOUND, upper);
|
||||
}
|
||||
if (Double.isNaN(lower) || Double.isNaN(upper)) {
|
||||
throw new NotANumberException();
|
||||
}
|
||||
|
||||
// Ensure nextDouble() isn't 0.0
|
||||
double u = rng.nextDouble();
|
||||
while (!lowerInclusive && u <= 0.0) {
|
||||
u = rng.nextDouble();
|
||||
}
|
||||
|
||||
return u * upper + (1.0 - u) * lower;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an integer array of length {@code k} whose entries are selected
|
||||
* randomly, without repetition, from the integers {@code 0, ..., n - 1}
|
||||
* (inclusive).
|
||||
* <p>
|
||||
* Generated arrays represent permutations of {@code n} taken {@code k} at a
|
||||
* time.
|
||||
* </p>
|
||||
* <p>
|
||||
* This method calls {@link MathArrays#shuffle(int[],UniformRandomProvider)
|
||||
* MathArrays.shuffle} in order to create a random shuffle of the set
|
||||
* of natural numbers {@code { 0, 1, ..., n - 1 }}.
|
||||
* </p>
|
||||
*
|
||||
* @param n Domain of the permutation.
|
||||
* @param k Size of the permutation.
|
||||
* @return a random {@code k}-permutation of {@code n}, as an array of
|
||||
* integers.
|
||||
* @throws NumberIsTooLargeException if {@code k > n}.
|
||||
* @throws NotStrictlyPositiveException if {@code k <= 0}.
|
||||
*/
|
||||
public int[] nextPermutation(int n,
|
||||
int k)
|
||||
throws NumberIsTooLargeException, NotStrictlyPositiveException {
|
||||
if (k > n) {
|
||||
throw new NumberIsTooLargeException(LocalizedFormats.PERMUTATION_EXCEEDS_N,
|
||||
k, n, true);
|
||||
}
|
||||
if (k <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.PERMUTATION_SIZE,
|
||||
k);
|
||||
}
|
||||
|
||||
final int[] index = MathArrays.natural(n);
|
||||
MathArrays.shuffle(index, rng);
|
||||
|
||||
// Return a new array containing the first "k" entries of "index".
|
||||
return MathArrays.copyOf(index, k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of {@code k} objects selected randomly from the
|
||||
* given {@code collection}.
|
||||
*
|
||||
* <p>
|
||||
* Sampling is without replacement; but if {@code collection} contains
|
||||
* identical objects, the sample may include repeats. If all elements
|
||||
* are distinct, the resulting object array represents a
|
||||
* <a href="http://rkb.home.cern.ch/rkb/AN16pp/node250.html#SECTION0002500000000000000000">
|
||||
* Simple Random Sample</a> of size {@code k} from the elements of
|
||||
* the {@code collection}.
|
||||
* </p>
|
||||
* <p>
|
||||
* This method calls {@link #nextPermutation(int,int) nextPermutation(c.size(), k)}
|
||||
* in order to sample the collection.
|
||||
* </p>
|
||||
*
|
||||
* @param <T> Type of objects held in the {@code collection}.
|
||||
* @param collection Collection to be sampled.
|
||||
* @param k Size of the sample.
|
||||
* @return a random sample of {@code k} elements from the {@code collection}.
|
||||
* @throws NumberIsTooLargeException if {@code k > collection.size()}.
|
||||
* @throws NotStrictlyPositiveException if {@code k <= 0}.
|
||||
*/
|
||||
public <T> List<T> nextSample(Collection<T> collection,
|
||||
int k) {
|
||||
final int len = collection.size();
|
||||
if (k > len) {
|
||||
throw new NumberIsTooLargeException(LocalizedFormats.SAMPLE_SIZE_EXCEEDS_COLLECTION_SIZE,
|
||||
k, len, true);
|
||||
}
|
||||
if (k <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, k);
|
||||
}
|
||||
|
||||
final T[] objects = (T[]) collection.toArray(new Object[len]);
|
||||
final int[] index = nextPermutation(len, k);
|
||||
final List<T> result = new ArrayList<T>(k);
|
||||
for (int i = 0; i < k; i++) {
|
||||
result.add(objects[index[i]]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue