MATH-1431: Avoid NaN in case of empty bin.

Thanks to Artem Onuchin.

Closes #79.
This commit is contained in:
Gilles Sadowski 2021-05-31 03:37:22 +02:00
parent 55e7cf0a51
commit 9d4d838638
3 changed files with 27 additions and 2 deletions

View File

@ -743,7 +743,8 @@ public class EmpiricalDistribution extends AbstractRealDistribution
* @return within-bin kernel parameterized by bStats * @return within-bin kernel parameterized by bStats
*/ */
protected ContinuousDistribution getKernel(SummaryStatistics bStats) { protected ContinuousDistribution getKernel(SummaryStatistics bStats) {
if (bStats.getN() == 1 || bStats.getVariance() == 0) { if (bStats.getN() <= 1 ||
bStats.getVariance() == 0) {
return new ConstantContinuousDistribution(bStats.getMean()); return new ConstantContinuousDistribution(bStats.getMean());
} else { } else {
return new NormalDistribution(bStats.getMean(), bStats.getStandardDeviation()); return new NormalDistribution(bStats.getMean(), bStats.getStandardDeviation());

View File

@ -24,10 +24,13 @@ import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.statistics.distribution.ContinuousDistribution; import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.statistics.distribution.ConstantContinuousDistribution; import org.apache.commons.statistics.distribution.ConstantContinuousDistribution;
import org.apache.commons.statistics.distribution.UniformContinuousDistribution; import org.apache.commons.statistics.distribution.UniformContinuousDistribution;
import org.apache.commons.statistics.distribution.NormalDistribution; import org.apache.commons.statistics.distribution.NormalDistribution;
import org.apache.commons.statistics.distribution.ExponentialDistribution;
import org.apache.commons.math4.legacy.TestUtils; import org.apache.commons.math4.legacy.TestUtils;
import org.apache.commons.math4.legacy.analysis.UnivariateFunction; import org.apache.commons.math4.legacy.analysis.UnivariateFunction;
import org.apache.commons.math4.legacy.analysis.integration.BaseAbstractUnivariateIntegrator; import org.apache.commons.math4.legacy.analysis.integration.BaseAbstractUnivariateIntegrator;
@ -35,7 +38,6 @@ import org.apache.commons.math4.legacy.analysis.integration.IterativeLegendreGau
import org.apache.commons.math4.legacy.exception.MathIllegalStateException; import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
import org.apache.commons.math4.legacy.exception.NullArgumentException; import org.apache.commons.math4.legacy.exception.NullArgumentException;
import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException; import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics; import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics;
import org.apache.commons.math4.legacy.util.FastMath; import org.apache.commons.math4.legacy.util.FastMath;
import org.junit.Assert; import org.junit.Assert;
@ -657,6 +659,25 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
Assert.assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol); Assert.assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol);
} }
@Test
public void testMath1431() {
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
final ContinuousDistribution.Sampler exponentialDistributionSampler
= new ExponentialDistribution(0.05).createSampler(rng);
final double[] empiricalDataPoints = new double[3000];
for (int i = 0; i < empiricalDataPoints.length; i++) {
empiricalDataPoints[i] = exponentialDistributionSampler.sample();
}
final EmpiricalDistribution testDistribution = new EmpiricalDistribution(100);
testDistribution.load(empiricalDataPoints);
for (int i = 0; i < 1000; i++) {
final double point = rng.nextDouble();
final double cdf = testDistribution.cumulativeProbability(point);
Assert.assertFalse("point: " + point, Double.isNaN(cdf));
}
}
/** /**
* Empirical distribution using a constant smoothing kernel. * Empirical distribution using a constant smoothing kernel.

View File

@ -86,6 +86,9 @@ Caveat:
nightmare was one of the main reasons for creating more focused nightmare was one of the main reasons for creating more focused
components.] components.]
"> ">
<action dev="erans" type="fix" issue="MATH-1431" due-to="Artem Onuchin">
"EmpiricalDistribution" handles empty bin.
</action>
<action dev="erans" type="update" issue="MATH-1582"> <action dev="erans" type="update" issue="MATH-1582">
Transforms codes moved into a dedicated maven module. Transforms codes moved into a dedicated maven module.
</action> </action>