MATH-1007 Add mode function to StatUtils class

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1504495 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastian Bazley 2013-07-18 14:59:48 +00:00
parent d4aa5c6037
commit a6d222c066
3 changed files with 128 additions and 0 deletions

View File

@ -51,6 +51,9 @@ If the output is not quite correct, check for invisible trailing spaces!
</properties>
<body>
<release version="x.y" date="TBD" description="TBD">
<action dev="sebb" type="add" issue="MATH-1007">
Add mode function to StatUtils class
</action>
<action dev="psteitz" type="update" issue="MATH-1006">
Enabled LaTeX expressions in javadoc via MathJax.
</action>

View File

@ -16,7 +16,11 @@
*/
package org.apache.commons.math3.stat;
import java.util.List;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.NotPositiveException;
import org.apache.commons.math3.exception.NullArgumentException;
import org.apache.commons.math3.exception.NumberIsTooSmallException;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.NoDataException;
@ -791,4 +795,88 @@ public final class StatUtils {
}
return standardizedSample;
}
/**
* Returns the sample mode(s). The mode is the most frequently occurring
* value in the sample. If there is a unique value with maximum frequency,
* this value is returned as the only element of the output array. Otherwise,
* the returned array contains the maximum frequency elements in increasing
* order. For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
* the returned array will have length two, with 0 in the first element and
* 5 in the second.
*
* <p>NaN values are ignored when computing the mode - i.e., NaNs will never
* appear in the output array. If the sample includes only NaNs or has
* length 0, an empty array is returned.</p>
*
* @param sample input data
* @return array of array of the most frequently occuring element(s) sorted in ascending order.
* @throws MathIllegalArgumentException if the indices are invalid or the array is null
*/
public static double[] mode(double[] sample) throws MathIllegalArgumentException {
if (sample == null) {
throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
}
return getMode(sample, 0, sample.length);
}
/**
* Returns the sample mode(s). The mode is the most frequently occurring
* value in the sample. If there is a unique value with maximum frequency,
* this value is returned as the only element of the output array. Otherwise,
* the returned array contains the maximum frequency elements in increasing
* order. For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17},
* the returned array will have length two, with 0 in the first element and
* 5 in the second.
*
* <p>NaN values are ignored when computing the mode - i.e., NaNs will never
* appear in the output array. If the sample includes only NaNs or has
* length 0, an empty array is returned.</p>
*
* @param sample input data
* @param begin index (0-based) of the first array element to include
* @param length the number of elements to include
*
* @return array of array of the most frequently occuring element(s) sorted in ascending order.
* @throws MathIllegalArgumentException if the indices are invalid or the array is null
*/
public static double[] mode(double[] sample, final int begin, final int length) {
if (sample == null) {
throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
}
if (begin < 0) {
throw new NotPositiveException(LocalizedFormats.START_POSITION, Integer.valueOf(begin));
}
if (length < 0) {
throw new NotPositiveException(LocalizedFormats.LENGTH, Integer.valueOf(length));
}
return getMode(sample, begin, length);
}
/*
* Private helper method.
* Assumes parameters have been validated.
*/
private static double[] getMode(double[] values, final int begin, final int length) {
// Add the values to the frequency table
Frequency freq = new Frequency();
for (int i = begin; i < begin + length; i++) {
final double value = values[i];
if (!Double.isNaN(value)) {
freq.addValue(Double.valueOf(value));
}
}
List<Comparable<?>> list = freq.getMode();
// Convert the list to an array of primitive double
double[] modes = new double[list.size()];
int i = 0;
for(Comparable<?> c : list) {
modes[i++] = ((Double) c).doubleValue();
}
return modes;
}
}

View File

@ -507,5 +507,42 @@ public final class StatUtilsTest {
Assert.assertEquals(1.0, stats.getStandardDeviation(), distance);
}
@Test
public void testMode() {
final double[] singleMode = {0, 1, 0, 2, 7, 11, 12};
final double[] modeSingle = StatUtils.mode(singleMode);
Assert.assertEquals(0, modeSingle[0], Double.MIN_VALUE);
Assert.assertEquals(1, modeSingle.length);
final double[] twoMode = {0, 1, 2, 0, 2, 3, 7, 11};
final double[] modeDouble = StatUtils.mode(twoMode);
Assert.assertEquals(0, modeDouble[0], Double.MIN_VALUE);
Assert.assertEquals(2, modeDouble[1], Double.MIN_VALUE);
Assert.assertEquals(2, modeDouble.length);
final double[] nanInfested = {0, 0, 0, Double.NaN, Double.NaN, Double.NaN, Double.NaN, 2, 2, 2, 3, 5};
final double[] modeNan = StatUtils.mode(nanInfested);
Assert.assertEquals(0, modeNan[0], Double.MIN_VALUE);
Assert.assertEquals(2, modeNan[1], Double.MIN_VALUE);
Assert.assertEquals(2, modeNan.length);
final double[] infInfested = {0, 0, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY, 2, 2, 3, 5};
final double[] modeInf = StatUtils.mode(infInfested);
Assert.assertEquals(Double.NEGATIVE_INFINITY, modeInf[0], Double.MIN_VALUE);
Assert.assertEquals(0, modeInf[1], Double.MIN_VALUE);
Assert.assertEquals(2, modeInf[2], Double.MIN_VALUE);
Assert.assertEquals(Double.POSITIVE_INFINITY, modeInf[3], Double.MIN_VALUE);
Assert.assertEquals(4, modeInf.length);
final double[] noData = {};
final double[] modeNodata = StatUtils.mode(noData);
Assert.assertEquals(0, modeNodata.length);
final double[] nansOnly = {Double.NaN, Double.NaN};
final double[] modeNansOnly = StatUtils.mode(nansOnly);
Assert.assertEquals(0, modeNansOnly.length);
}
}