From a6d222c0664c50ba87abb96a659272a3b3052ab6 Mon Sep 17 00:00:00 2001 From: Sebastian Bazley Date: Thu, 18 Jul 2013 14:59:48 +0000 Subject: [PATCH] MATH-1007 Add mode function to StatUtils class git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1504495 13f79535-47bb-0310-9956-ffa450edef68 --- src/changes/changes.xml | 3 + .../apache/commons/math3/stat/StatUtils.java | 88 +++++++++++++++++++ .../commons/math3/stat/StatUtilsTest.java | 37 ++++++++ 3 files changed, 128 insertions(+) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 2cb18f0e5..4801ec1fe 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,9 @@ If the output is not quite correct, check for invisible trailing spaces! + + Add mode function to StatUtils class + Enabled LaTeX expressions in javadoc via MathJax. diff --git a/src/main/java/org/apache/commons/math3/stat/StatUtils.java b/src/main/java/org/apache/commons/math3/stat/StatUtils.java index 523541e28..30f210d2a 100644 --- a/src/main/java/org/apache/commons/math3/stat/StatUtils.java +++ b/src/main/java/org/apache/commons/math3/stat/StatUtils.java @@ -16,7 +16,11 @@ */ package org.apache.commons.math3.stat; +import java.util.List; + import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.NullArgumentException; import org.apache.commons.math3.exception.NumberIsTooSmallException; import org.apache.commons.math3.exception.DimensionMismatchException; import org.apache.commons.math3.exception.NoDataException; @@ -791,4 +795,88 @@ public final class StatUtils { } return standardizedSample; } + + /** + * Returns the sample mode(s). The mode is the most frequently occurring + * value in the sample. If there is a unique value with maximum frequency, + * this value is returned as the only element of the output array. Otherwise, + * the returned array contains the maximum frequency elements in increasing + * order. For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17}, + * the returned array will have length two, with 0 in the first element and + * 5 in the second. + * + *

NaN values are ignored when computing the mode - i.e., NaNs will never + * appear in the output array. If the sample includes only NaNs or has + * length 0, an empty array is returned.

+ * + * @param sample input data + * @return array of array of the most frequently occuring element(s) sorted in ascending order. + * @throws MathIllegalArgumentException if the indices are invalid or the array is null + */ + public static double[] mode(double[] sample) throws MathIllegalArgumentException { + if (sample == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + return getMode(sample, 0, sample.length); + } + + /** + * Returns the sample mode(s). The mode is the most frequently occurring + * value in the sample. If there is a unique value with maximum frequency, + * this value is returned as the only element of the output array. Otherwise, + * the returned array contains the maximum frequency elements in increasing + * order. For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17}, + * the returned array will have length two, with 0 in the first element and + * 5 in the second. + * + *

NaN values are ignored when computing the mode - i.e., NaNs will never + * appear in the output array. If the sample includes only NaNs or has + * length 0, an empty array is returned.

+ * + * @param sample input data + * @param begin index (0-based) of the first array element to include + * @param length the number of elements to include + * + * @return array of array of the most frequently occuring element(s) sorted in ascending order. + * @throws MathIllegalArgumentException if the indices are invalid or the array is null + */ + public static double[] mode(double[] sample, final int begin, final int length) { + if (sample == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + + if (begin < 0) { + throw new NotPositiveException(LocalizedFormats.START_POSITION, Integer.valueOf(begin)); + } + + if (length < 0) { + throw new NotPositiveException(LocalizedFormats.LENGTH, Integer.valueOf(length)); + } + + return getMode(sample, begin, length); + } + + /* + * Private helper method. + * Assumes parameters have been validated. + */ + private static double[] getMode(double[] values, final int begin, final int length) { + // Add the values to the frequency table + Frequency freq = new Frequency(); + for (int i = begin; i < begin + length; i++) { + final double value = values[i]; + if (!Double.isNaN(value)) { + freq.addValue(Double.valueOf(value)); + } + } + List> list = freq.getMode(); + // Convert the list to an array of primitive double + double[] modes = new double[list.size()]; + int i = 0; + for(Comparable c : list) { + modes[i++] = ((Double) c).doubleValue(); + } + return modes; + } + } diff --git a/src/test/java/org/apache/commons/math3/stat/StatUtilsTest.java b/src/test/java/org/apache/commons/math3/stat/StatUtilsTest.java index 49cc5e83e..0ef432239 100644 --- a/src/test/java/org/apache/commons/math3/stat/StatUtilsTest.java +++ b/src/test/java/org/apache/commons/math3/stat/StatUtilsTest.java @@ -507,5 +507,42 @@ public final class StatUtilsTest { Assert.assertEquals(1.0, stats.getStandardDeviation(), distance); } + + @Test + public void testMode() { + final double[] singleMode = {0, 1, 0, 2, 7, 11, 12}; + final double[] modeSingle = StatUtils.mode(singleMode); + Assert.assertEquals(0, modeSingle[0], Double.MIN_VALUE); + Assert.assertEquals(1, modeSingle.length); + + final double[] twoMode = {0, 1, 2, 0, 2, 3, 7, 11}; + final double[] modeDouble = StatUtils.mode(twoMode); + Assert.assertEquals(0, modeDouble[0], Double.MIN_VALUE); + Assert.assertEquals(2, modeDouble[1], Double.MIN_VALUE); + Assert.assertEquals(2, modeDouble.length); + + final double[] nanInfested = {0, 0, 0, Double.NaN, Double.NaN, Double.NaN, Double.NaN, 2, 2, 2, 3, 5}; + final double[] modeNan = StatUtils.mode(nanInfested); + Assert.assertEquals(0, modeNan[0], Double.MIN_VALUE); + Assert.assertEquals(2, modeNan[1], Double.MIN_VALUE); + Assert.assertEquals(2, modeNan.length); + + final double[] infInfested = {0, 0, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, + Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY, 2, 2, 3, 5}; + final double[] modeInf = StatUtils.mode(infInfested); + Assert.assertEquals(Double.NEGATIVE_INFINITY, modeInf[0], Double.MIN_VALUE); + Assert.assertEquals(0, modeInf[1], Double.MIN_VALUE); + Assert.assertEquals(2, modeInf[2], Double.MIN_VALUE); + Assert.assertEquals(Double.POSITIVE_INFINITY, modeInf[3], Double.MIN_VALUE); + Assert.assertEquals(4, modeInf.length); + + final double[] noData = {}; + final double[] modeNodata = StatUtils.mode(noData); + Assert.assertEquals(0, modeNodata.length); + + final double[] nansOnly = {Double.NaN, Double.NaN}; + final double[] modeNansOnly = StatUtils.mode(nansOnly); + Assert.assertEquals(0, modeNansOnly.length); + } }