From fed1f17d619c9d9be69a18b7f4dc59b1211cc6e7 Mon Sep 17 00:00:00 2001 From: "Mark R. Diggory" Date: Tue, 24 Jun 2003 14:03:32 +0000 Subject: [PATCH] Additon of begin and length controls to StatUtils. Addition on test for array conditions. git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/math/trunk@140945 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/commons/math/stat/StatUtils.java | 246 +++++++++++++++--- .../commons/math/stat/StatUtilsTest.java | 10 +- 2 files changed, 221 insertions(+), 35 deletions(-) diff --git a/src/java/org/apache/commons/math/stat/StatUtils.java b/src/java/org/apache/commons/math/stat/StatUtils.java index d34535af3..75cc5a43d 100644 --- a/src/java/org/apache/commons/math/stat/StatUtils.java +++ b/src/java/org/apache/commons/math/stat/StatUtils.java @@ -68,8 +68,20 @@ public class StatUtils { * @return the sum of the values or Double.NaN if the array is empty */ public static double sum(double[] values) { + return sum(values, 0, values.length); + } + + /** + * The sum of the values that have been added to Univariate. + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the sum of the values or Double.NaN if the array is empty + */ + public static double sum(double[] values, int begin, int length) { + testInput(values, begin, length); double accum = 0.0; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { accum += values[i]; } return accum; @@ -81,8 +93,20 @@ public class StatUtils { * @return the sum of the squared values or Double.NaN if the array is empty */ public static double sumSq(double[] values) { + return sumSq(values, 0, values.length); + } + + /** + * Returns the sum of the squares of the available values. + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the sum of the squared values or Double.NaN if the array is empty + */ + public static double sumSq(double[] values, int begin, int length) { + testInput(values, begin, length); double accum = 0.0; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { accum += Math.pow(values[i], 2.0); } return accum; @@ -94,12 +118,21 @@ public class StatUtils { * @return the product values or Double.NaN if the array is empty */ public static double product(double[] values) { - double product = Double.NaN; - if (values.length > 0) { - product = 1.0; - for (int i = 0; i < values.length; i++) { - product *= values[i]; - } + return product(values, 0, values.length); + } + + /** + * Returns the product for this collection of values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the product values or Double.NaN if the array is empty + */ + public static double product(double[] values, int begin, int length) { + testInput(values, begin, length); + double product = 1.0; + for (int i = begin; i < begin + length; i++) { + product *= values[i]; } return product; } @@ -110,12 +143,21 @@ public class StatUtils { * @return the sumLog value or Double.NaN if the array is empty */ public static double sumLog(double[] values) { - double sumLog = Double.NaN; - if (values.length > 0) { - sumLog = 0.0; - for (int i = 0; i < values.length; i++) { - sumLog += Math.log(values[i]); - } + return sumLog(values, 0, values.length); + } + + /** + * Returns the sum of the natural logs for this collection of values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the sumLog value or Double.NaN if the array is empty + */ + public static double sumLog(double[] values, int begin, int length) { + testInput(values, begin, length); + double sumLog = 0.0; + for (int i = begin; i < begin + length; i++) { + sumLog += Math.log(values[i]); } return sumLog; } @@ -127,7 +169,20 @@ public class StatUtils { * any of the values are <= 0. */ public static double geometricMean(double[] values) { - return Math.exp(sumLog(values) / (double) values.length); + return geometricMean(values, 0, values.length); + } + + /** + * Returns the geometric mean for this collection of values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the geometric mean or Double.NaN if the array is empty or + * any of the values are <= 0. + */ + public static double geometricMean(double[] values, int begin, int length) { + testInput(values, begin, length); + return Math.exp(sumLog(values, begin, length) / (double) length ); } /** @@ -140,6 +195,19 @@ public class StatUtils { return sum(values) / (double) values.length; } + /** + * Returns the + * arithmetic mean of the available values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the mean of the values or Double.NaN if the array is empty + */ + public static double mean(double[] values, int begin, int length) { + testInput(values, begin, length); + return sum(values, begin, length) / ((double) length); + } + /** * * @param values Is a double[] containing the values @@ -147,9 +215,25 @@ public class StatUtils { * or 0.0 for a single value set. */ public static double standardDeviation(double[] values) { + return standardDeviation(values, 0, values.length); + } + + /** + * + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the result, Double.NaN if no values for an empty array + * or 0.0 for a single value set. + */ + public static double standardDeviation( + double[] values, + int begin, + int length) { + testInput(values, begin, length); double stdDev = Double.NaN; if (values.length != 0) { - stdDev = Math.sqrt(variance(values)); + stdDev = Math.sqrt(variance(values, begin, length)); } return (stdDev); } @@ -168,21 +252,41 @@ public class StatUtils { * or 0.0 for a single value set. */ public static double variance(double[] values) { - double variance = Double.NaN; + return variance(values, 0, values.length); + } + /** + * Returns the variance of the available values. This uses a corrected + * two pass algorithm of the following + * + * corrected two pass formula (14.1.8), and also referenced in:

+ * "Algorithms for Computing the Sample Variance: Analysis and + * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J. + * 1983, American Statistician, vol. 37, pp. 242?247. + * + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the result, Double.NaN if no values for an empty array + * or 0.0 for a single value set. + */ + public static double variance(double[] values, int begin, int length) { + testInput(values, begin, length); + + double variance = Double.NaN; if (values.length == 1) { variance = 0; } else if (values.length > 1) { - double mean = mean(values); + double mean = mean(values, begin, length); double accum = 0.0; double accum2 = 0.0; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { accum += Math.pow((values[i] - mean), 2.0); accum2 += (values[i] - mean); } variance = - (accum - (Math.pow(accum2, 2) / (double) values.length)) - / (double) (values.length - 1); + (accum - (Math.pow(accum2, 2) / ((double)length))) + / (double) (length - 1); } return variance; } @@ -194,22 +298,36 @@ public class StatUtils { * @return the skewness of the values or Double.NaN if the array is empty */ public static double skewness(double[] values) { + return skewness(values, 0, values.length); + } + /** + * Returns the skewness of a collection of values. Skewness is a + * measure of the assymetry of a given distribution. + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the skewness of the values or Double.NaN if the array is empty + */ + public static double skewness(double[] values, int begin, int length) { + + testInput(values, begin, length); + // Initialize the skewness double skewness = Double.NaN; // Get the mean and the standard deviation - double mean = mean(values); - double stdDev = standardDeviation(values); + double mean = mean(values, begin, length); + double stdDev = standardDeviation(values, begin, length); // Sum the cubes of the distance from the mean divided by the // standard deviation double accum = 0.0; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { accum += Math.pow((values[i] - mean) / stdDev, 3.0); } // Get N - double n = values.length; + double n = length; // Calculate skewness skewness = (n / ((n - 1) * (n - 2))) * accum; @@ -224,22 +342,37 @@ public class StatUtils { * @return the kurtosis of the values or Double.NaN if the array is empty */ public static double kurtosis(double[] values) { + return kurtosis(values, 0, values.length); + } + + /** + * Returns the kurtosis for this collection of values. Kurtosis is a + * measure of the "peakedness" of a distribution. + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the kurtosis of the values or Double.NaN if the array is empty + */ + public static double kurtosis(double[] values, int begin, int length) { + + testInput(values, begin, length); + // Initialize the kurtosis double kurtosis = Double.NaN; // Get the mean and the standard deviation - double mean = mean(values); - double stdDev = standardDeviation(values); + double mean = mean(values, begin, length); + double stdDev = standardDeviation(values, begin, length); // Sum the ^4 of the distance from the mean divided by the // standard deviation double accum = 0.0; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { accum += Math.pow((values[i] - mean) / stdDev, 4.0); } // Get N - double n = values.length; + double n = length; double coefficientOne = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)); double termTwo = ((3 * Math.pow(n - 1, 2.0)) / ((n - 2) * (n - 3))); @@ -248,19 +381,31 @@ public class StatUtils { return kurtosis; } - + /** * Returns the maximum of the available values * @param values Is a double[] containing the values * @return the maximum of the values or Double.NaN if the array is empty */ public static double max(double[] values) { + return max(values, 0, values.length); + } + + /** + * Returns the maximum of the available values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the maximum of the values or Double.NaN if the array is empty + */ + public static double max(double[] values, int begin, int length) { + testInput(values, begin, length); double max = Double.NaN; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { if (i == 0) { max = values[i]; } else { - max = Math.max(max, values[i]); + max = (max > values[i]) ? max : values[i]; } } return max; @@ -272,14 +417,47 @@ public class StatUtils { * @return the minimum of the values or Double.NaN if the array is empty */ public static double min(double[] values) { + return min(values, 0, values.length); + } + + /** + * Returns the minimum of the available values + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + * @return the minimum of the values or Double.NaN if the array is empty + */ + public static double min(double[] values, int begin, int length) { + testInput(values, begin, length); + double min = Double.NaN; - for (int i = 0; i < values.length; i++) { + for (int i = begin; i < begin + length; i++) { if (i == 0) { min = values[i]; } else { - min = Math.min(min, values[i]); + min = (min < values[i]) ? min : values[i]; } } return min; } + + /** + * Private testInput method used by all methods to verify the content + * of the array and indicies are correct. + * @param values Is a double[] containing the values + * @param begin processing at this point in the array + * @param length processing at this point in the array + */ + private static void testInput(double[] values, int begin, int length) { + + if (length > values.length) + throw new IllegalArgumentException("length > values.length"); + + if (begin + length > values.length) + throw new IllegalArgumentException("begin + length > values.length"); + + if (values == null) + throw new IllegalArgumentException("input value array is null"); + + } } diff --git a/src/test/org/apache/commons/math/stat/StatUtilsTest.java b/src/test/org/apache/commons/math/stat/StatUtilsTest.java index f3c428285..8f91e89a0 100644 --- a/src/test/org/apache/commons/math/stat/StatUtilsTest.java +++ b/src/test/org/apache/commons/math/stat/StatUtilsTest.java @@ -62,7 +62,7 @@ import junit.framework.TestSuite; * * @author Phil Steitz * @author Mark Diggory - * @version $Revision: 1.1 $ $Date: 2003/06/18 15:59:55 $ + * @version $Revision: 1.2 $ $Date: 2003/06/24 14:03:31 $ */ public final class StatUtilsTest extends TestCase { @@ -187,4 +187,12 @@ public final class StatUtilsTest extends TestCase { 0.00001); } + public void testArrayIndexConditions() throws Exception { + double[] values = { 1.0, 2.0, 3.0, 4.0 }; + + assertEquals("Sum not expected", 5.0, StatUtils.sum(values,1,2),Double.MIN_VALUE); + assertEquals("Sum not expected", 3.0, StatUtils.sum(values,0,2),Double.MIN_VALUE); + assertEquals("Sum not expected", 7.0, StatUtils.sum(values,2,2),Double.MIN_VALUE); + + } } \ No newline at end of file