Constrained EmpiricalDistribution sample/getNextValue methods to return

values within the range of the data; correctly linked RandomGenerator to
superclass so that RealDistribution reseedRandomGenerator method works.

JIRA: MATH-984



git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1604639 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2014-06-22 18:53:27 +00:00
parent e31a222cef
commit 71fd124ddb
5 changed files with 256 additions and 95 deletions

View File

@ -73,6 +73,11 @@ Users are encouraged to upgrade to this version as this release not
2. A few methods in the FastMath class are in fact slower that their
counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901).
">
<action dev="psteitz" type="fix" issue="MATH-984">
Constrained EmpiricalDistribution sample/getNextValue methods to return
values within the range of the data; correctly linked RandomGenerator to
superclass so that RealDistribution reseedRandomGenerator method works.
</action>
<action dev="luc" type="add" issue="MATH-1120" due-to="Venkatesha Murthy">
Added several different estimation types and NaN handling strategies for Percentile.
</action>

View File

@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.distribution;
import org.apache.commons.math3.exception.OutOfRangeException;
/**
* Implementation of the constant real distribution.
*
* @version $Id$
* @since 3.4
*/
public class ConstantRealDistribution extends AbstractRealDistribution {
/** Serialization ID */
private static final long serialVersionUID = -4157745166772046273L;
/** Constant value of the distribution */
private final double value;
/**
* Create a constant real distribution with the given value.
*
* @param value the constant value of this distribution
*/
public ConstantRealDistribution(double value) {
super(null); // Avoid creating RandomGenerator
this.value = value;
}
/** {@inheritDoc} */
public double density(double x) {
return x == value ? 1 : 0;
}
/** {@inheritDoc} */
public double cumulativeProbability(double x) {
return x < value ? 0 : 1;
}
@Override
public double inverseCumulativeProbability(final double p)
throws OutOfRangeException {
if (p < 0.0 || p > 1.0) {
throw new OutOfRangeException(p, 0, 1);
}
return value;
}
/**
* {@inheritDoc}
*/
public double getNumericalMean() {
return value;
}
/**
* {@inheritDoc}
*/
public double getNumericalVariance() {
return 0;
}
/**
* {@inheritDoc}
*/
public double getSupportLowerBound() {
return value;
}
/**
* {@inheritDoc}
*/
public double getSupportUpperBound() {
return value;
}
/** {@inheritDoc} */
public boolean isSupportLowerBoundInclusive() {
return true;
}
/** {@inheritDoc} */
public boolean isSupportUpperBoundInclusive() {
return true;
}
/**
* {@inheritDoc}
*/
public boolean isSupportConnected() {
return true;
}
/** {@inheritDoc} */
@Override
public double sample() {
return value;
}
/**
* Override with no-op (there is no generator).
* @param seed (ignored)
*/
@Override
public void reseedRandomGenerator(long seed) {}
}

View File

@ -29,6 +29,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.distribution.AbstractRealDistribution;
import org.apache.commons.math3.distribution.ConstantRealDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.RealDistribution;
import org.apache.commons.math3.exception.MathIllegalStateException;
@ -212,7 +213,7 @@ public class EmpiricalDistribution extends AbstractRealDistribution {
*/
private EmpiricalDistribution(int binCount,
RandomDataGenerator randomData) {
super(null);
super(randomData.getRandomGenerator());
this.binCount = binCount;
this.randomData = randomData;
binStats = new ArrayList<SummaryStatistics>();
@ -478,23 +479,7 @@ public class EmpiricalDistribution extends AbstractRealDistribution {
throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED);
}
// Start with a uniformly distributed random number in (0,1)
final double x = randomData.nextUniform(0,1);
// Use this to select the bin and generate a Gaussian within the bin
for (int i = 0; i < binCount; i++) {
if (x <= upperBounds[i]) {
SummaryStatistics stats = binStats.get(i);
if (stats.getN() > 0) {
if (stats.getStandardDeviation() > 0) { // more than one obs
return getKernel(stats).sample();
} else {
return stats.getMean(); // only one obs in bin
}
}
}
}
throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED);
return sample();
}
/**
@ -767,15 +752,6 @@ public class EmpiricalDistribution extends AbstractRealDistribution {
return true;
}
/**
* {@inheritDoc}
* @since 3.1
*/
@Override
public double sample() {
return getNextValue();
}
/**
* {@inheritDoc}
* @since 3.1
@ -843,15 +819,20 @@ public class EmpiricalDistribution extends AbstractRealDistribution {
}
/**
* The within-bin smoothing kernel.
* The within-bin smoothing kernel. Returns a Gaussian distribution
* parameterized by {@code bStats}, unless the bin contains only one
* observation, in which case a constant distribution is returned.
*
* @param bStats summary statistics for the bin
* @return within-bin kernel parameterized by bStats
*/
protected RealDistribution getKernel(SummaryStatistics bStats) {
// Default to Gaussian
return new NormalDistribution(randomData.getRandomGenerator(),
if (bStats.getN() == 1) {
return new ConstantRealDistribution(bStats.getMean());
} else {
return new NormalDistribution(randomData.getRandomGenerator(),
bStats.getMean(), bStats.getStandardDeviation(),
NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
}
}

View File

@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.distribution;
import org.junit.Assert;
import org.junit.Test;
/**
* Test cases for ConstantRealDistribution.
*/
public class ConstantRealDistributionTest extends RealDistributionAbstractTest {
// --- Override tolerance -------------------------------------------------
@Override
public void setUp() {
super.setUp();
setTolerance(0);
}
//--- Implementations for abstract methods --------------------------------
/** Creates the default uniform real distribution instance to use in tests. */
@Override
public ConstantRealDistribution makeDistribution() {
return new ConstantRealDistribution(1);
}
/** Creates the default cumulative probability distribution test input values */
@Override
public double[] makeCumulativeTestPoints() {
return new double[] {0, 0.5, 1};
}
/** Creates the default cumulative probability distribution test expected values */
@Override
public double[] makeCumulativeTestValues() {
return new double[] {0, 0, 1};
}
/** Creates the default probability density test expected values */
@Override
public double[] makeDensityTestValues() {
return new double[] {0, 0, 1};
}
/** Override default test, verifying that inverse cum is constant */
@Override
@Test
public void testInverseCumulativeProbabilities() {
RealDistribution dist = getDistribution();
for (double x : getCumulativeTestValues()) {
Assert.assertEquals(1,dist.inverseCumulativeProbability(x), 0);
}
}
//--- Additional test cases -----------------------------------------------
@Test
public void testMeanVariance() {
ConstantRealDistribution dist;
dist = new ConstantRealDistribution(-1);
Assert.assertEquals(dist.getNumericalMean(), -1, 0d);
Assert.assertEquals(dist.getNumericalVariance(), 0, 0d);
}
@Test
public void testSampling() {
ConstantRealDistribution dist = new ConstantRealDistribution(0);
for (int i = 0; i < 10; i++) {
Assert.assertEquals(0, dist.sample(), 0);
}
}
}

View File

@ -29,6 +29,7 @@ import org.apache.commons.math3.analysis.UnivariateFunction;
import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator;
import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator;
import org.apache.commons.math3.distribution.AbstractRealDistribution;
import org.apache.commons.math3.distribution.ConstantRealDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.RealDistribution;
import org.apache.commons.math3.distribution.RealDistributionAbstractTest;
@ -409,6 +410,31 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
}
}
/**
* MATH-984
* Verify that sampled values do not go outside of the range of the data.
*/
@Test
public void testSampleValuesRange() {
// Concentrate values near the endpoints of (0, 1).
// Unconstrained Gaussian kernel would generate values outside the interval.
final double[] data = new double[100];
for (int i = 0; i < 50; i++) {
data[i] = 1 / ((double) i + 1);
}
for (int i = 51; i < 100; i++) {
data[i] = 1 - 1 / (100 - (double) i + 2);
}
EmpiricalDistribution dist = new EmpiricalDistribution(10);
dist.load(data);
dist.reseedRandomGenerator(1000);
for (int i = 0; i < 1000; i++) {
final double dev = dist.sample();
Assert.assertTrue(dev < 1);
Assert.assertTrue(dev > 0);
}
}
/**
* Find the bin that x belongs (relative to {@link #makeDistribution()}).
*/
@ -503,7 +529,7 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
// Use constant distribution equal to bin mean within bin
@Override
protected RealDistribution getKernel(SummaryStatistics bStats) {
return new ConstantDistribution(bStats.getMean());
return new ConstantRealDistribution(bStats.getMean());
}
}
@ -521,68 +547,4 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
UniformRealDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
}
/**
* Distribution that takes just one value.
*/
private class ConstantDistribution extends AbstractRealDistribution {
private static final long serialVersionUID = 1L;
/** Singleton value in the sample space */
private final double c;
public ConstantDistribution(double c) {
this.c = c;
}
public double density(double x) {
return 0;
}
public double cumulativeProbability(double x) {
return x < c ? 0 : 1;
}
@Override
public double inverseCumulativeProbability(double p) {
if (p < 0.0 || p > 1.0) {
throw new OutOfRangeException(p, 0, 1);
}
return c;
}
public double getNumericalMean() {
return c;
}
public double getNumericalVariance() {
return 0;
}
public double getSupportLowerBound() {
return c;
}
public double getSupportUpperBound() {
return c;
}
public boolean isSupportLowerBoundInclusive() {
return false;
}
public boolean isSupportUpperBoundInclusive() {
return true;
}
public boolean isSupportConnected() {
return true;
}
@Override
public double sample() {
return c;
}
}
}